aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--meta-arm-gcs/.gitignore3
-rw-r--r--meta-arm-gcs/COPYING.MIT17
-rw-r--r--meta-arm-gcs/README33
-rw-r--r--meta-arm-gcs/conf/layer.conf13
-rw-r--r--meta-arm-gcs/gcs.yml52
-rw-r--r--meta-arm-gcs/recipes-bsp/trusted-firmware-a/trusted-firmware-a_%.bbappend1
-rw-r--r--meta-arm-gcs/recipes-core/glibc/files/gcs.patch1668
-rw-r--r--meta-arm-gcs/recipes-core/glibc/glibc_2.39.bbappend3
-rw-r--r--meta-arm-gcs/recipes-devtools/binutils/binutils_2.42.bbappend3
-rw-r--r--meta-arm-gcs/recipes-devtools/binutils/files/gcs.patch973
-rw-r--r--meta-arm-gcs/recipes-devtools/gcc/files/gcs.patch1872
-rw-r--r--meta-arm-gcs/recipes-devtools/gcc/gcc-source_14.0.bbappend3
-rw-r--r--meta-arm-gcs/recipes-kernel/linux/files/gcs.patch7723
-rw-r--r--meta-arm-gcs/recipes-kernel/linux/linux-yocto-dev.bbappend13
14 files changed, 12377 insertions, 0 deletions
diff --git a/meta-arm-gcs/.gitignore b/meta-arm-gcs/.gitignore
new file mode 100644
index 00000000..340e1745
--- /dev/null
+++ b/meta-arm-gcs/.gitignore
@@ -0,0 +1,3 @@
+build
+meta-arm
+poky
diff --git a/meta-arm-gcs/COPYING.MIT b/meta-arm-gcs/COPYING.MIT
new file mode 100644
index 00000000..fb950dc6
--- /dev/null
+++ b/meta-arm-gcs/COPYING.MIT
@@ -0,0 +1,17 @@
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/meta-arm-gcs/README b/meta-arm-gcs/README
new file mode 100644
index 00000000..74286a86
--- /dev/null
+++ b/meta-arm-gcs/README
@@ -0,0 +1,33 @@
+Guarded Control Stack prototype
+===============================
+
+To build:
+
+First, install Kas if you don't already have it:
+
+$ pip3 install --user kas
+
+Then use Kas to build an image:
+
+$ kas build --update gcs.yml
+
+To run the image inside an FVP:
+
+$ kas shell gcs.yml -c "../../scripts/runfvp --console"
+
+
+Verification:
+
+On boot, the kernel should detect that the cores have GCS:
+
+ CPU features: detected: Guarded Control Stack (GCS)
+
+Binaries should be tagged with PAC/BTI/GCS:
+
+ $ readelf -n /bin/bash | grep AArch
+ Properties: AArch64 feature: BTI, PAC, GCS
+
+Enabling GCS tunables shouldn't cause crashes:
+
+ $ GLIBC_TUNABLES=glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2 ls /
+ bin boot dev etc home lib lost+found media mnt proc run sbin sys tmp usr var
diff --git a/meta-arm-gcs/conf/layer.conf b/meta-arm-gcs/conf/layer.conf
new file mode 100644
index 00000000..c0649cc5
--- /dev/null
+++ b/meta-arm-gcs/conf/layer.conf
@@ -0,0 +1,13 @@
+# We have a conf and classes directory, add to BBPATH
+BBPATH .= ":${LAYERDIR}"
+
+# We have recipes-* directories, add to BBFILES
+BBFILES += "${LAYERDIR}/recipes-*/*/*.bb \
+ ${LAYERDIR}/recipes-*/*/*.bbappend"
+
+BBFILE_COLLECTIONS += "meta-arm-gcs"
+BBFILE_PATTERN_meta-arm-gcs = "^${LAYERDIR}/"
+BBFILE_PRIORITY_meta-arm-gcs = "5"
+
+LAYERDEPENDS_meta-arm-gcs = "core meta-arm-bsp"
+LAYERSERIES_COMPAT_meta-arm-gcs = "scarthgap"
diff --git a/meta-arm-gcs/gcs.yml b/meta-arm-gcs/gcs.yml
new file mode 100644
index 00000000..e3d3d1c8
--- /dev/null
+++ b/meta-arm-gcs/gcs.yml
@@ -0,0 +1,52 @@
+header:
+ version: 14
+
+distro: poky
+
+machine: fvp-base
+
+repos:
+ meta-arm:
+ layers:
+ meta-arm:
+ meta-arm-bsp:
+ meta-arm-toolchain:
+ meta-arm-gcs:
+
+ poky:
+ url: https://git.yoctoproject.org/git/poky-contrib
+ branch: ross/gcc14
+ layers:
+ meta:
+ meta-poky:
+
+local_conf_header:
+ setup: |
+ # We accept the FVP EULA
+ LICENSE_FLAGS_ACCEPTED += "Arm-FVP-EULA"
+ # Enable running an image inside a FVP
+ IMAGE_CLASSES += "fvpboot"
+ # Use the 6.8 kernel
+ PREFERRED_PROVIDER_virtual/kernel = "linux-yocto-dev"
+ # No root password for ease
+ EXTRA_IMAGE_FEATURES += "empty-root-password"
+ # Install a toolchain
+ EXTRA_IMAGE_FEATURES += "tools-sdk"
+ # Install a pre-generated SSH key because key generation in a FVP is tiresome
+ CORE_IMAGE_EXTRA_INSTALL += "ssh-pregen-hostkeys"
+
+ fvp_options: |
+ # Set the cores to v9.4 and turn on more features
+ FVP_CONFIG[cluster0.has_arm_v9-4] = "1"
+ FVP_CONFIG[cluster1.has_arm_v9-4] = "1"
+ FVP_CONFIG[cluster0.has_branch_target_exception] = "1"
+ FVP_CONFIG[cluster1.has_branch_target_exception] = "1"
+ FVP_CONFIG[cluster0.has_gcs] = "1"
+ FVP_CONFIG[cluster1.has_gcs] = "1"
+ FVP_CONFIG[cluster0.has_chkfeat] = "1"
+ FVP_CONFIG[cluster1.has_chkfeat] = "1"
+ FVP_CONFIG[cluster0.has_permission_indirection_s1] = "1"
+ FVP_CONFIG[cluster1.has_permission_indirection_s1] = "1"
+
+target:
+ - core-image-full-cmdline
diff --git a/meta-arm-gcs/recipes-bsp/trusted-firmware-a/trusted-firmware-a_%.bbappend b/meta-arm-gcs/recipes-bsp/trusted-firmware-a/trusted-firmware-a_%.bbappend
new file mode 100644
index 00000000..e8304667
--- /dev/null
+++ b/meta-arm-gcs/recipes-bsp/trusted-firmware-a/trusted-firmware-a_%.bbappend
@@ -0,0 +1 @@
+EXTRA_OEMAKE:append:fvp-base = " CTX_INCLUDE_AARCH32_REGS=0"
diff --git a/meta-arm-gcs/recipes-core/glibc/files/gcs.patch b/meta-arm-gcs/recipes-core/glibc/files/gcs.patch
new file mode 100644
index 00000000..289246d2
--- /dev/null
+++ b/meta-arm-gcs/recipes-core/glibc/files/gcs.patch
@@ -0,0 +1,1668 @@
+Upstream-Status: Pending [https://sourceware.org/git/?p=glibc.git;a=shortlog;h=refs/heads/arm/gcs]
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+
+From ccb4e98a6cbdf5b8939a6ae416248bb1436a1338 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 13 Jul 2023 06:43:33 +0100
+Subject: [PATCH 01/21] aarch64: Add HWCAP2_GCS
+
+---
+ sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h
+index 91f0e312b9..476cf5a299 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h
++++ b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h
+@@ -100,3 +100,4 @@
+ #define HWCAP2_SME_F16F16 (1UL << 42)
+ #define HWCAP2_MOPS (1UL << 43)
+ #define HWCAP2_HBC (1UL << 44)
++#define HWCAP2_GCS (1UL << 48)
+--
+2.34.1
+
+
+From 15d1674cae644619d521af249b3a4f1c17cdcee9 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Wed, 22 Feb 2023 14:35:00 +0000
+Subject: [PATCH 02/21] aarch64: Add asm helpers for GCS
+
+The Guarded Control Stack instructions can be present even if the
+hardware does not support the extension (runtime checked feature),
+so the asm code should be backward compatible with old assemblers.
+---
+ sysdeps/aarch64/sysdep.h | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
+index 464674e89c..661d9fe8c1 100644
+--- a/sysdeps/aarch64/sysdep.h
++++ b/sysdeps/aarch64/sysdep.h
+@@ -74,6 +74,13 @@ strip_pac (void *p)
+ #define PACIASP hint 25
+ #define AUTIASP hint 29
+
++/* Guarded Control Stack support. */
++#define CHKFEAT_X16 hint 40
++#define MRS_GCSPR(x) mrs x, s3_3_c2_c5_1
++#define GCSPOPM(x) sysl x, #3, c7, c7, #1
++#define GCSSS1(x) sys #3, c7, c7, #2, x
++#define GCSSS2(x) sysl x, #3, c7, c7, #3
++
+ /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */
+ #define FEATURE_1_AND 0xc0000000
+ #define FEATURE_1_BTI 1
+--
+2.34.1
+
+
+From 0f6417aafcb5332cea53f81daa2dca9588c8b733 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 22 Dec 2023 09:47:33 +0000
+Subject: [PATCH 03/21] elf.h: define GNU_PROPERTY_AARCH64_FEATURE_1_GCS
+
+---
+ elf/elf.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/elf/elf.h b/elf/elf.h
+index 455731663c..f927dad3c3 100644
+--- a/elf/elf.h
++++ b/elf/elf.h
+@@ -1377,6 +1377,7 @@ typedef struct
+
+ #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+ #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
++#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS (1U << 2)
+
+ /* The x86 instruction sets indicated by the corresponding bits are
+ used in program. Their support in the hardware is optional. */
+--
+2.34.1
+
+
+From 6c7378b54f6f8c3db222894ed27342782bf526b7 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 22 Dec 2023 12:29:48 +0000
+Subject: [PATCH 04/21] aarch64: mark objects with GCS property note
+
+TODO: binutils config check
+TODO: build attributes instead of gnu property
+---
+ sysdeps/aarch64/sysdep.h | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
+index 661d9fe8c1..55e0230b5e 100644
+--- a/sysdeps/aarch64/sysdep.h
++++ b/sysdeps/aarch64/sysdep.h
+@@ -85,6 +85,7 @@ strip_pac (void *p)
+ #define FEATURE_1_AND 0xc0000000
+ #define FEATURE_1_BTI 1
+ #define FEATURE_1_PAC 2
++#define FEATURE_1_GCS 4
+
+ /* Add a NT_GNU_PROPERTY_TYPE_0 note. */
+ #define GNU_PROPERTY(type, value) \
+@@ -103,9 +104,9 @@ strip_pac (void *p)
+ /* Add GNU property note with the supported features to all asm code
+ where sysdep.h is included. */
+ #if HAVE_AARCH64_BTI && HAVE_AARCH64_PAC_RET
+-GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
++GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC|FEATURE_1_GCS)
+ #elif HAVE_AARCH64_BTI
+-GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI)
++GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
+ #endif
+
+ /* Define an entry point visible from C. */
+--
+2.34.1
+
+
+From e6bc31c117194bfadcf10a6c90b6586800a33a11 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 25 Nov 2022 18:16:07 +0000
+Subject: [PATCH 05/21] aarch64: Define jmp_buf offset for GCS
+
+The target specific internal __longjmp is called with a __jmp_buf
+argument which has its size exposed in the ABI. On aarch64 this has
+no space left, so GCSPR cannot be restored in longjmp in the usual
+way, which is needed for the Guarded Control Stack (GCS) extension.
+
+setjmp is implemented via __sigsetjmp which has a jmp_buf argument
+however it is also called with __pthread_unwind_buf_t argument cast
+to jmp_buf (in cancellation cleanup code built with -fno-exception).
+The two types, jmp_buf and __pthread_unwind_buf_t, have common bits
+beyond the __jmp_buf field and there is unused space there which we
+can use for saving GCSPR.
+
+For this to work some bits of those two generic types have to be
+reserved for target specific use and the generic code in glibc has
+to ensure that __longjmp is always called with a __jmp_buf that is
+embedded into one of those two types. Morally __longjmp should be
+changed to take jmp_buf as argument, but that is an intrusive change
+across targets.
+
+Note: longjmp is never called with __pthread_unwind_buf_t from user
+code, only the internal __libc_longjmp is called with that type and
+thus the two types could have separate longjmp implementations on a
+target. We don't rely on this now (but migh in the future given that
+cancellation unwind does not need to restore GCSPR).
+
+Given the above this patch finds an unused slot for GCSPR. This
+placement is not exposed in the ABI so it may change in the future.
+This is also very target ABI specific so the generic types cannot
+be easily changed to clearly mark the reserved fields.
+---
+ sysdeps/aarch64/jmpbuf-offsets.h | 63 ++++++++++++++++++++++++++++++++
+ 1 file changed, 63 insertions(+)
+
+diff --git a/sysdeps/aarch64/jmpbuf-offsets.h b/sysdeps/aarch64/jmpbuf-offsets.h
+index 632328c7e2..ec047cf6b1 100644
+--- a/sysdeps/aarch64/jmpbuf-offsets.h
++++ b/sysdeps/aarch64/jmpbuf-offsets.h
+@@ -39,6 +39,69 @@
+ #define JB_D14 20
+ #define JB_D15 21
+
++/* The target specific part of jmp_buf has no space for expansion but
++ the public jmp_buf ABI type has. Unfortunately there is another type
++ that is used with setjmp APIs and exposed by thread cancellation (in
++ binaries built with -fno-exceptions) which complicates the situation.
++
++ // Internal layout of the public jmp_buf type on AArch64.
++ // This is passed to setjmp, longjmp, sigsetjmp, siglongjmp.
++ struct
++ {
++ uint64_t jmpbuf[22]; // Target specific part.
++ uint32_t mask_was_saved; // savemask bool used by sigsetjmp/siglongjmp.
++ uint32_t pad;
++ uint64_t saved_mask; // sigset_t bits used on linux.
++ uint64_t unused[15]; // sigset_t bits not used on linux.
++ };
++
++ // Internal layout of the public __pthread_unwind_buf_t type.
++ // This is passed to sigsetjmp with !savemask and to the internal
++ // __libc_longjmp (currently alias of longjmp on AArch64).
++ struct
++ {
++ uint64_t jmpbuf[22]; // Must match jmp_buf.
++ uint32_t mask_was_saved; // Must match jmp_buf, always 0.
++ uint32_t pad;
++ void *prev; // List for unwinding.
++ void *cleanup; // Cleanup handlers.
++ uint32_t canceltype; // 1 bit cancellation type.
++ uint32_t pad2;
++ void *pad3;
++ };
++
++ Ideally only the target specific part of jmp_buf (A) is accessed by
++ __setjmp and __longjmp. But that is always embedded into one of the
++ two types above so the bits that are unused in those types (B) may be
++ reused for target specific purposes. Setjmp can't distinguish between
++ jmp_buf and __pthread_unwind_buf_t, but longjmp can: only an internal
++ longjmp call uses the latter, so state that is not needed for cancel
++ cleanups can go to fields (C). If generic code is refactored then the
++ usage of additional fields can be optimized (D). And some fields are
++ only accessible in the savedmask case (E). Reusability of jmp_buf
++ fields on AArch64 for target purposes:
++
++ struct
++ {
++ uint64_t A[22]; // 0 .. 176
++ uint32_t D; // 176 .. 180
++ uint32_t B; // 180 .. 184
++ uint64_t D; // 184 .. 192
++ uint64_t C; // 192 .. 200
++ uint32_t C; // 200 .. 204
++ uint32_t B; // 204 .. 208
++ uint64_t B; // 208 .. 216
++ uint64_t E[12]; // 216 .. 312
++ }
++
++ The B fields can be used with minimal glibc code changes. We need a
++ 64 bit field for the Guarded Control Stack pointer (GCSPR_EL0) which
++ can use a C field too as cancellation cleanup does not execute RET
++ for a previous BL of the cancelled thread, but that would require a
++ custom __libc_longjmp. This layout can change in the future.
++*/
++#define JB_GCSPR 208
++
+ #ifndef __ASSEMBLER__
+ #include <setjmp.h>
+ #include <stdint.h>
+--
+2.34.1
+
+
+From c3274a8582b4915efea5a16558e730d362bea177 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 23 Feb 2023 08:54:04 +0000
+Subject: [PATCH 06/21] aarch64: Add GCS support to longjmp
+
+This implementations ensures that longjmp across different stacks
+works: it scans for GCS cap token and switches GCS if necessary
+then the target GCSPR is restored with a GCSPOPM loop once the
+current GCSPR is on the same GCS.
+
+This makes longjmp linear time in the number of jumped over stack
+frames when GCS is enabled.
+---
+ sysdeps/aarch64/__longjmp.S | 31 +++++++++++++++++++++++++++++++
+ sysdeps/aarch64/setjmp.S | 10 ++++++++++
+ 2 files changed, 41 insertions(+)
+
+diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
+index 7b6add751e..ecd272262d 100644
+--- a/sysdeps/aarch64/__longjmp.S
++++ b/sysdeps/aarch64/__longjmp.S
+@@ -91,6 +91,37 @@ ENTRY (__longjmp)
+ ldp d12, d13, [x0, #JB_D12<<3]
+ ldp d14, d15, [x0, #JB_D14<<3]
+
++ /* GCS support. */
++ mov x16, 1
++ CHKFEAT_X16
++ tbnz x16, 0, L(gcs_done)
++ MRS_GCSPR (x2)
++ ldr x3, [x0, #JB_GCSPR]
++ mov x4, x3
++ /* x2: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */
++L(gcs_scan):
++ cmp x2, x4
++ b.eq L(gcs_pop)
++ sub x4, x4, 8
++ /* Check for a cap token. */
++ ldr x5, [x4]
++ and x6, x4, 0xfffffffffffff000
++ orr x6, x6, 1
++ cmp x5, x6
++ b.eq L(gcs_switch)
++ b L(gcs_scan)
++L(gcs_switch):
++ add x2, x4, 8
++ GCSSS1 (x4)
++ GCSSS2 (xzr)
++L(gcs_pop):
++ cmp x2, x3
++ b.eq L(gcs_done)
++ GCSPOPM (xzr)
++ add x2, x2, 8
++ b L(gcs_pop)
++L(gcs_done):
++
+ /* Originally this was implemented with a series of
+ .cfi_restore() directives.
+
+diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
+index 43fdb1b2fb..f7ffccfaba 100644
+--- a/sysdeps/aarch64/setjmp.S
++++ b/sysdeps/aarch64/setjmp.S
+@@ -57,6 +57,16 @@ ENTRY (__sigsetjmp)
+ stp d10, d11, [x0, #JB_D10<<3]
+ stp d12, d13, [x0, #JB_D12<<3]
+ stp d14, d15, [x0, #JB_D14<<3]
++
++ /* GCS support. */
++ mov x16, 1
++ CHKFEAT_X16
++ tbnz x16, 0, L(gcs_done)
++ MRS_GCSPR (x2)
++ add x2, x2, 8 /* GCS state right after setjmp returns. */
++ str x2, [x0, #JB_GCSPR]
++L(gcs_done):
++
+ #ifdef PTR_MANGLE
+ mov x4, sp
+ PTR_MANGLE (5, 4, 3, 2)
+--
+2.34.1
+
+
+From 868c129b90a52f7c30b8a560dc580f851db4b6fc Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Wed, 27 Dec 2023 18:36:10 +0000
+Subject: [PATCH 07/21] aarch64: Add GCS support to vfork
+
+---
+ sysdeps/unix/sysv/linux/aarch64/vfork.S | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/sysdeps/unix/sysv/linux/aarch64/vfork.S b/sysdeps/unix/sysv/linux/aarch64/vfork.S
+index e71e492da3..cfaf4a1ffb 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/vfork.S
++++ b/sysdeps/unix/sysv/linux/aarch64/vfork.S
+@@ -33,8 +33,14 @@ ENTRY (__vfork)
+
+ cmn x0, #4095
+ b.cs .Lsyscall_error
++ cbz x0, L(child)
+ RET
+-
++L(child):
++ /* Return with indirect branch in the child to support GCS.
++ Clear x30 to crash early if the child tries to ret. */
++ mov x1, x30
++ mov x30, 0
++ br x1
+ PSEUDO_END (__vfork)
+ libc_hidden_def (__vfork)
+
+--
+2.34.1
+
+
+From 996a31b2a3313039b5c66d7de15577e5bc145278 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 24 Feb 2023 16:29:32 +0000
+Subject: [PATCH 08/21] aarch64: Add GCS support for setcontext
+
+Userspace ucontext needs to store GCSPR, it does not have to be
+compatible with the kernel ucontext. For now we use the linux
+struct gcs_context layout but only use the gcspr field from it.
+
+Similar implementation to the longjmp code, supports switching GCS
+if the target GCS is capped, and unwinding a continous GCS to a
+previous state.
+---
+ sysdeps/unix/sysv/linux/aarch64/getcontext.S | 17 +++++++-
+ sysdeps/unix/sysv/linux/aarch64/setcontext.S | 39 +++++++++++++++++++
+ sysdeps/unix/sysv/linux/aarch64/swapcontext.S | 32 +++++++++++----
+ .../sysv/linux/aarch64/ucontext-internal.h | 5 +++
+ 4 files changed, 84 insertions(+), 9 deletions(-)
+
+diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
+index e5b69c9a82..30e2b39399 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S
++++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
+@@ -83,9 +83,24 @@ ENTRY(__getcontext)
+ mrs x4, fpcr
+ str w4, [x3, oFPCR - oFPSR]
+
+- /* Write the termination context extension header. */
+ add x2, x2, #FPSIMD_CONTEXT_SIZE
+
++ /* Save the GCSPR. */
++ mov x16, 1
++ CHKFEAT_X16
++ tbnz x16, 0, L(gcs_done)
++ mov w3, #(GCS_MAGIC & 0xffff)
++ movk w3, #(GCS_MAGIC >> 16), lsl #16
++ str w3, [x2, #oHEAD + oMAGIC]
++ mov w3, #GCS_CONTEXT_SIZE
++ str w3, [x2, #oHEAD + oSIZE]
++ MRS_GCSPR (x4)
++ add x4, x4, 8 /* GCS state right after getcontext returns. */
++ str x4, [x2, #oGCSPR]
++ add x2, x2, #GCS_CONTEXT_SIZE
++L(gcs_done):
++
++ /* Write the termination context extension header. */
+ str wzr, [x2, #oHEAD + oMAGIC]
+ str wzr, [x2, #oHEAD + oSIZE]
+
+diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+index ba659438c5..c08e83ee60 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
++++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+@@ -130,6 +130,45 @@ ENTRY (__setcontext)
+ ldr w4, [x3, oFPCR - oFPSR]
+ msr fpcr, x4
+
++ /* Restore the GCS. */
++ mov x16, 1
++ CHKFEAT_X16
++ tbnz x16, 0, L(gcs_done)
++ /* Get target GCS from GCS context. */
++ ldr w1, [x2, #oHEAD + oSIZE]
++ add x2, x2, x1
++ mov w3, #(GCS_MAGIC & 0xffff)
++ movk w3, #(GCS_MAGIC >> 16), lsl #16
++ ldr w1, [x2, #oHEAD + oMAGIC]
++ cmp w1, w3
++ b.ne L(gcs_done)
++ ldr x3, [x2, #oGCSPR]
++ MRS_GCSPR (x2)
++ mov x4, x3
++ /* x2: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */
++L(gcs_scan):
++ cmp x2, x4
++ b.eq L(gcs_pop)
++ sub x4, x4, 8
++ /* Check for a cap token. */
++ ldr x5, [x4]
++ and x6, x4, 0xfffffffffffff000
++ orr x6, x6, 1
++ cmp x5, x6
++ b.eq L(gcs_switch)
++ b L(gcs_scan)
++L(gcs_switch):
++ add x2, x4, 8
++ GCSSS1 (x4)
++ GCSSS2 (xzr)
++L(gcs_pop):
++ cmp x2, x3
++ b.eq L(gcs_done)
++ GCSPOPM (xzr)
++ add x2, x2, 8
++ b L(gcs_pop)
++L(gcs_done):
++
+ 2:
+ ldr x16, [x0, oPC]
+ /* Restore arg registers. */
+diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
+index f049140d35..45b1277c74 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
++++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
+@@ -32,8 +32,15 @@ ENTRY(__swapcontext)
+ And set up x1 to become the return address of the caller, so we
+ can return there with a normal RET instead of an indirect jump. */
+ stp xzr, x30, [x0, oX0 + 0 * SZREG]
++
++ /* With GCS, swapcontext calls are followed by BTI J, otherwise
++ we have to be compatible with old BTI enabled binaries. */
++ mov x16, 1
++ CHKFEAT_X16
++ tbz x16, 0, L(skip_x30_redirect)
+ /* Arrange the oucp context to return to 2f. */
+ adr x30, 2f
++L(skip_x30_redirect):
+
+ stp x18, x19, [x0, oX0 + 18 * SZREG]
+ stp x20, x21, [x0, oX0 + 20 * SZREG]
+@@ -72,14 +79,27 @@ ENTRY(__swapcontext)
+ mrs x4, fpcr
+ str w4, [x3, #oFPCR - oFPSR]
+
+- /* Write the termination context extension header. */
+ add x2, x2, #FPSIMD_CONTEXT_SIZE
+
++ /* Save the GCSPR. */
++ tbnz x16, 0, L(gcs_done)
++ mov w3, #(GCS_MAGIC & 0xffff)
++ movk w3, #(GCS_MAGIC >> 16), lsl #16
++ str w3, [x2, #oHEAD + oMAGIC]
++ mov w3, #GCS_CONTEXT_SIZE
++ str w3, [x2, #oHEAD + oSIZE]
++ MRS_GCSPR (x4)
++ add x4, x4, 8 /* GCSPR of the caller. */
++ str x4, [x2, #oGCSPR]
++ add x2, x2, #GCS_CONTEXT_SIZE
++L(gcs_done):
++
++ /* Write the termination context extension header. */
+ str wzr, [x2, #oHEAD + oMAGIC]
+ str wzr, [x2, #oHEAD + oSIZE]
+
+ /* Preserve ucp. */
+- mov x21, x1
++ mov x9, x1
+
+ /* rt_sigprocmask (SIG_SETMASK, &ucp->uc_sigmask, &oucp->uc_sigmask,
+ _NSIG8) */
+@@ -93,12 +113,8 @@ ENTRY(__swapcontext)
+ svc 0
+ cbnz x0, 1f
+
+- mov x22, x30
+- mov x0, x21
+- bl JUMPTARGET (__setcontext)
+- mov x30, x22
+- RET
+-
++ mov x0, x9
++ b JUMPTARGET (__setcontext)
+ 1:
+ b C_SYMBOL_NAME(__syscall_error)
+ 2:
+diff --git a/sysdeps/unix/sysv/linux/aarch64/ucontext-internal.h b/sysdeps/unix/sysv/linux/aarch64/ucontext-internal.h
+index 096d5fb7c7..84f5365c0e 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/ucontext-internal.h
++++ b/sysdeps/unix/sysv/linux/aarch64/ucontext-internal.h
+@@ -43,3 +43,8 @@
+ #define oX21 (oX0 + 21*8)
+ #define oFP (oX0 + 29*8)
+ #define oLR (oX0 + 30*8)
++
++/* Use kernel layout for saving GCSPR in ucontext. */
++#define GCS_MAGIC 0x47435300
++#define GCS_CONTEXT_SIZE 32
++#define oGCSPR 8
+--
+2.34.1
+
+
+From 99f884d57f17afea877006284f469747711e1d51 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 28 Dec 2023 15:53:38 +0000
+Subject: [PATCH 09/21] aarch64: mark swapcontext with indirect_return
+
+---
+ sysdeps/aarch64/bits/indirect-return.h | 36 ++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
+ create mode 100644 sysdeps/aarch64/bits/indirect-return.h
+
+diff --git a/sysdeps/aarch64/bits/indirect-return.h b/sysdeps/aarch64/bits/indirect-return.h
+new file mode 100644
+index 0000000000..99905f21fa
+--- /dev/null
++++ b/sysdeps/aarch64/bits/indirect-return.h
+@@ -0,0 +1,36 @@
++/* Definition of __INDIRECT_RETURN. AArch64 version.
++ Copyright (C) 2024 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#ifndef _UCONTEXT_H
++# error "Never include <bits/indirect-return.h> directly; use <ucontext.h> instead."
++#endif
++
++/* __INDIRECT_RETURN indicates that swapcontext may return via
++ an indirect branch. This happens when GCS is enabled, so
++ add the attribute if available, otherwise returns_twice has
++ a similar effect, but it prevents some code transformations
++ that can cause build failures in some rare cases so it is
++ only used when GCS is enabled. */
++#if __glibc_has_attribute (__indirect_return__)
++# define __INDIRECT_RETURN __attribute__ ((__indirect_return__))
++#elif __glibc_has_attribute (__returns_twice__) \
++ && defined __ARM_FEATURE_GCS_DEFAULT
++# define __INDIRECT_RETURN __attribute__ ((__returns_twice__))
++#else
++# define __INDIRECT_RETURN
++#endif
+--
+2.34.1
+
+
+From 54e90582aabdb44f010ad5dfd64ce9c3e6d33914 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 4 Apr 2023 10:42:21 +0100
+Subject: [PATCH 10/21] aarch64: Add GCS support for makecontext
+
+Changed the makecontext logic: previously the first setcontext jumped
+straight to the user callback function and the return address is set
+to __startcontext. This does not work when GCS is enabled as the
+integrity of the return address is protected, so instead the context
+is setup such that setcontext jumps to __startcontext which calls the
+user callback (passed in x20).
+
+The map_shadow_stack syscall is used to allocate a suitably sized GCS
+(which includes some reserved area to account for altstack signal
+handlers and otherwise supports maximum number of 16 byte aligned
+stack frames on the given stack) however the GCS is never freed as
+the lifetime of ucontext and related stack is user managed.
+---
+ sysdeps/unix/sysv/linux/aarch64/makecontext.c | 61 ++++++++++++++++++-
+ sysdeps/unix/sysv/linux/aarch64/setcontext.S | 4 ++
+ 2 files changed, 63 insertions(+), 2 deletions(-)
+
+diff --git a/sysdeps/unix/sysv/linux/aarch64/makecontext.c b/sysdeps/unix/sysv/linux/aarch64/makecontext.c
+index 5aec182013..9e66b6761c 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/makecontext.c
++++ b/sysdeps/unix/sysv/linux/aarch64/makecontext.c
+@@ -22,6 +22,52 @@
+ #include <stdint.h>
+ #include <ucontext.h>
+
++#define GCS_MAGIC 0x47435300
++
++static struct _aarch64_ctx *extension (void *p)
++{
++ return p;
++}
++
++#ifndef __NR_map_shadow_stack
++# define __NR_map_shadow_stack 453
++#endif
++#ifndef SHADOW_STACK_SET_TOKEN
++# define SHADOW_STACK_SET_TOKEN (1UL << 0)
++# define SHADOW_STACK_SET_MARKER (1UL << 1)
++#endif
++
++static void *
++map_shadow_stack (void *addr, size_t size, unsigned long flags)
++{
++ return (void *) INLINE_SYSCALL_CALL (map_shadow_stack, addr, size, flags);
++}
++
++#define GCS_MAX_SIZE (1UL << 31)
++#define GCS_ALTSTACK_RESERVE 160
++
++static void *
++alloc_makecontext_gcs (size_t stack_size)
++{
++ size_t size = (stack_size / 2 + GCS_ALTSTACK_RESERVE) & -8UL;
++ if (size > GCS_MAX_SIZE)
++ size = GCS_MAX_SIZE;
++
++ unsigned long flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN;
++ void *base = map_shadow_stack (NULL, size, flags);
++ if (base == (void *) -1)
++ /* ENOSYS, bad size or OOM. */
++ abort ();
++ uint64_t *gcsp = (uint64_t *) ((char *) base + size);
++ /* Skip end of GCS token. */
++ gcsp--;
++ /* Verify GCS cap token. */
++ gcsp--;
++ if (((uint64_t)gcsp & 0xfffffffffffff000) + 1 != *gcsp)
++ abort ();
++ /* Return the target GCS pointer for context switch. */
++ return gcsp + 1;
++}
+
+ /* makecontext sets up a stack and the registers for the
+ user context. The stack looks like this:
+@@ -56,10 +102,21 @@ __makecontext (ucontext_t *ucp, void (*func) (void), int argc, ...)
+ sp = (uint64_t *) (((uintptr_t) sp) & -16L);
+
+ ucp->uc_mcontext.regs[19] = (uintptr_t) ucp->uc_link;
++ ucp->uc_mcontext.regs[20] = (uintptr_t) func;
+ ucp->uc_mcontext.sp = (uintptr_t) sp;
+- ucp->uc_mcontext.pc = (uintptr_t) func;
++ ucp->uc_mcontext.pc = (uintptr_t) __startcontext;
+ ucp->uc_mcontext.regs[29] = (uintptr_t) 0;
+- ucp->uc_mcontext.regs[30] = (uintptr_t) &__startcontext;
++ ucp->uc_mcontext.regs[30] = (uintptr_t) 0;
++
++ void *p = ucp->uc_mcontext.__reserved;
++ if (extension (p)->magic == FPSIMD_MAGIC)
++ p = (char *)p + extension (p)->size;
++ if (extension (p)->magic == GCS_MAGIC)
++ {
++ /* Using the kernel struct gcs_context layout. */
++ struct { uint64_t x, gcspr, y, z; } *q = p;
++ q->gcspr = (uint64_t) alloc_makecontext_gcs (ucp->uc_stack.ss_size);
++ }
+
+ va_start (ap, argc);
+ for (i = 0; i < argc; ++i)
+diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+index c08e83ee60..6aa7236693 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
++++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+@@ -181,7 +181,11 @@ L(gcs_done):
+ PSEUDO_END (__setcontext)
+ weak_alias (__setcontext, setcontext)
+
++/* makecontext start function: receives uc_link in x19 and func in x20.
++ Arguments of func, x29, x30 and sp are set up by the caller. */
+ ENTRY (__startcontext)
++ cfi_undefined (x30)
++ blr x20
+ mov x0, x19
+ cbnz x0, __setcontext
+ 1: b HIDDEN_JUMPTARGET (exit)
+--
+2.34.1
+
+
+From 56253d5f47330f502dd6bc8f3e12eeabf6c20a8b Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Mon, 17 Jul 2023 16:54:15 +0100
+Subject: [PATCH 11/21] aarch64: Try to free the GCS of makecontext
+
+Free GCS after a makecontext start func returns and at thread exit, so
+assume makecontext cannot outlive the thread where it was created.
+
+This is an attempt to bound the lifetime of the GCS allocated for
+makecontext, but it is still possible to have significant GCS leaks,
+new GCS aware APIs could solve that, but that would not allow using
+GCS with existing code transparently.
+---
+ include/set-freeres.h | 4 ++
+ malloc/thread-freeres.c | 3 +
+ sysdeps/unix/sysv/linux/aarch64/makecontext.c | 65 +++++++++++++++++++
+ sysdeps/unix/sysv/linux/aarch64/setcontext.S | 19 +++++-
+ sysdeps/unix/sysv/linux/aarch64/sysdep.h | 6 +-
+ 5 files changed, 93 insertions(+), 4 deletions(-)
+
+diff --git a/include/set-freeres.h b/include/set-freeres.h
+index 4177b453fa..c3d64b4f41 100644
+--- a/include/set-freeres.h
++++ b/include/set-freeres.h
+@@ -78,6 +78,10 @@ extern void __nss_database_freeres (void) attribute_hidden;
+ extern int _IO_cleanup (void) attribute_hidden;;
+ /* From dlfcn/dlerror.c */
+ extern void __libc_dlerror_result_free (void) attribute_hidden;
++/* From libc.so, arch specific. */
++#ifdef ARCH_THREAD_FREERES
++extern void ARCH_THREAD_FREERES (void) attribute_hidden;
++#endif
+
+ /* From either libc.so or libpthread.so */
+ extern void __libpthread_freeres (void) attribute_hidden;
+diff --git a/malloc/thread-freeres.c b/malloc/thread-freeres.c
+index 55ba4e7b83..69867f3a3b 100644
+--- a/malloc/thread-freeres.c
++++ b/malloc/thread-freeres.c
+@@ -29,6 +29,9 @@
+ void
+ __libc_thread_freeres (void)
+ {
++#ifdef ARCH_THREAD_FREERES
++ call_function_static_weak (ARCH_THREAD_FREERES);
++#endif
+ #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_32)
+ __rpc_thread_destroy ();
+ #endif
+diff --git a/sysdeps/unix/sysv/linux/aarch64/makecontext.c b/sysdeps/unix/sysv/linux/aarch64/makecontext.c
+index 9e66b6761c..779f7e55aa 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/makecontext.c
++++ b/sysdeps/unix/sysv/linux/aarch64/makecontext.c
+@@ -20,7 +20,9 @@
+ #include <sysdep.h>
+ #include <stdarg.h>
+ #include <stdint.h>
++#include <stdlib.h>
+ #include <ucontext.h>
++#include <sys/mman.h>
+
+ #define GCS_MAGIC 0x47435300
+
+@@ -29,6 +31,47 @@ static struct _aarch64_ctx *extension (void *p)
+ return p;
+ }
+
++struct gcs_list {
++ struct gcs_list *next;
++ void *base;
++ size_t size;
++};
++
++static __thread struct gcs_list *gcs_list_head = NULL;
++
++static void
++record_gcs (void *base, size_t size)
++{
++ struct gcs_list *p = malloc (sizeof *p);
++ if (p == NULL)
++ abort ();
++ p->base = base;
++ p->size = size;
++ p->next = gcs_list_head;
++ gcs_list_head = p;
++}
++
++static void
++free_gcs_list (void)
++{
++ for (;;)
++ {
++ struct gcs_list *p = gcs_list_head;
++ if (p == NULL)
++ break;
++ gcs_list_head = p->next;
++ __munmap (p->base, p->size);
++ free (p);
++ }
++}
++
++/* Called during thread shutdown to free resources. */
++void
++__libc_aarch64_thread_freeres (void)
++{
++ free_gcs_list ();
++}
++
+ #ifndef __NR_map_shadow_stack
+ # define __NR_map_shadow_stack 453
+ #endif
+@@ -58,6 +101,9 @@ alloc_makecontext_gcs (size_t stack_size)
+ if (base == (void *) -1)
+ /* ENOSYS, bad size or OOM. */
+ abort ();
++
++ record_gcs (base, size);
++
+ uint64_t *gcsp = (uint64_t *) ((char *) base + size);
+ /* Skip end of GCS token. */
+ gcsp--;
+@@ -69,6 +115,25 @@ alloc_makecontext_gcs (size_t stack_size)
+ return gcsp + 1;
+ }
+
++void
++__free_makecontext_gcs (void *gcs)
++{
++ struct gcs_list *p = gcs_list_head;
++ struct gcs_list **q = &gcs_list_head;
++ for (;;)
++ {
++ if (p == NULL)
++ abort ();
++ if (gcs == p->base + p->size - 8)
++ break;
++ q = &p->next;
++ p = p->next;
++ }
++ *q = p->next;
++ __munmap (p->base, p->size);
++ free (p);
++}
++
+ /* makecontext sets up a stack and the registers for the
+ user context. The stack looks like this:
+
+diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+index 6aa7236693..723be73213 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
++++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+@@ -34,6 +34,9 @@
+ .text
+
+ ENTRY (__setcontext)
++ /* If x10 is set then old GCS is freed. */
++ mov x10, 0
++__setcontext_internal:
+ PTR_ARG (0)
+ /* Save a copy of UCP. */
+ mov x9, x0
+@@ -145,7 +148,8 @@ ENTRY (__setcontext)
+ ldr x3, [x2, #oGCSPR]
+ MRS_GCSPR (x2)
+ mov x4, x3
+- /* x2: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */
++ mov x1, x2
++ /* x1, x2: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */
+ L(gcs_scan):
+ cmp x2, x4
+ b.eq L(gcs_pop)
+@@ -163,10 +167,18 @@ L(gcs_switch):
+ GCSSS2 (xzr)
+ L(gcs_pop):
+ cmp x2, x3
+- b.eq L(gcs_done)
++ b.eq L(gcs_free_old)
+ GCSPOPM (xzr)
+ add x2, x2, 8
+ b L(gcs_pop)
++L(gcs_free_old):
++ cbz x10, L(gcs_done)
++ mov x28, x0
++ mov x0, x1
++ bl __free_makecontext_gcs
++ mov x0, x28
++ ldp x28, x29, [x0, oX0 + 28 * SZREG]
++ ldr x30, [x0, oX0 + 30 * SZREG]
+ L(gcs_done):
+
+ 2:
+@@ -187,6 +199,7 @@ ENTRY (__startcontext)
+ cfi_undefined (x30)
+ blr x20
+ mov x0, x19
+- cbnz x0, __setcontext
++ mov x10, 1
++ cbnz x0, __setcontext_internal
+ 1: b HIDDEN_JUMPTARGET (exit)
+ END (__startcontext)
+diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+index bbbe35723c..590318dee8 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
++++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+@@ -29,8 +29,12 @@
+
+ #include <tls.h>
+
+-/* In order to get __set_errno() definition in INLINE_SYSCALL. */
+ #ifndef __ASSEMBLER__
++/* Thread cleanup function. */
++#define ARCH_THREAD_FREERES __libc_aarch64_thread_freeres
++void __libc_aarch64_thread_freeres (void) attribute_hidden;
++
++/* In order to get __set_errno() definition in INLINE_SYSCALL. */
+ #include <errno.h>
+ #endif
+
+--
+2.34.1
+
+
+From 200010339f4fa0449a7bd76555931881eaea916c Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Mon, 17 Jul 2023 08:31:05 +0100
+Subject: [PATCH 12/21] aarch64: Add glibc.cpu.aarch64_gcs tunable
+
+This tunable is for controlling the GCS status. It is the argument to
+the PR_SET_SHADOW_STACK_STATUS prctl, by default 0, so GCS is disabled.
+
+The status is stored into GL(dl_aarch64_gcs) early and only applied
+later, since enabling GCS is tricky: it must happen on a top level
+stack frame. (Using GL instead of GLRO because it may need updates
+depending on loaded libraries that happen after readonly protection
+is applied, however library marking based GCS setting is not yet
+implemented.)
+---
+ sysdeps/aarch64/dl-tunables.list | 5 +++
+ .../unix/sysv/linux/aarch64/cpu-features.c | 4 ++
+ .../unix/sysv/linux/aarch64/dl-procruntime.c | 37 +++++++++++++++++++
+ 3 files changed, 46 insertions(+)
+ create mode 100644 sysdeps/unix/sysv/linux/aarch64/dl-procruntime.c
+
+diff --git a/sysdeps/aarch64/dl-tunables.list b/sysdeps/aarch64/dl-tunables.list
+index 92c6cbfa92..c08be86932 100644
+--- a/sysdeps/aarch64/dl-tunables.list
++++ b/sysdeps/aarch64/dl-tunables.list
+@@ -21,5 +21,10 @@ glibc {
+ name {
+ type: STRING
+ }
++ aarch64_gcs {
++ type: UINT_64
++ minval: 0
++ default: 0
++ }
+ }
+ }
+diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+index b1a3f673f0..a8b6a4654a 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+@@ -128,4 +128,8 @@ init_cpu_features (struct cpu_features *cpu_features)
+
+ /* Check if MOPS is supported. */
+ cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
++
++ if (GLRO (dl_hwcap2) & HWCAP2_GCS)
++ /* GCS status may be updated later by binary compatibility checks. */
++ GL (dl_aarch64_gcs) = TUNABLE_GET (glibc, cpu, aarch64_gcs, uint64_t, 0);
+ }
+diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-procruntime.c b/sysdeps/unix/sysv/linux/aarch64/dl-procruntime.c
+new file mode 100644
+index 0000000000..23c61da6c4
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/aarch64/dl-procruntime.c
+@@ -0,0 +1,37 @@
++/* Data for processor runtime information. AArch64 version.
++ Copyright (C) 2024 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#ifndef PROCINFO_CLASS
++# define PROCINFO_CLASS
++#endif
++
++#if !IS_IN (ldconfig)
++# if !defined PROCINFO_DECL && defined SHARED
++ ._dl_aarch64_gcs
++# else
++PROCINFO_CLASS unsigned long _dl_aarch64_gcs
++# endif
++# ifndef PROCINFO_DECL
++= 0
++# endif
++# if !defined SHARED || defined PROCINFO_DECL
++;
++# else
++,
++# endif
++#endif
+--
+2.34.1
+
+
+From cf7e262541c78c65a33ed78d39c6dc0a50b36d96 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 13 Jul 2023 07:22:44 +0100
+Subject: [PATCH 13/21] aarch64: Enable GCS in static linked exe
+
+Use the ARCH_SETUP_TLS hook to enable GCS in the static linked case.
+The system call must be inlined and then GCS is enabled on a top
+level stack frame that does not return and has no exception handlers
+above it.
+---
+ sysdeps/unix/sysv/linux/aarch64/libc-start.h | 49 ++++++++++++++++++++
+ 1 file changed, 49 insertions(+)
+ create mode 100644 sysdeps/unix/sysv/linux/aarch64/libc-start.h
+
+diff --git a/sysdeps/unix/sysv/linux/aarch64/libc-start.h b/sysdeps/unix/sysv/linux/aarch64/libc-start.h
+new file mode 100644
+index 0000000000..ccf0f8af5c
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/aarch64/libc-start.h
+@@ -0,0 +1,49 @@
++/* AArch64 definitions for libc main startup.
++ Copyright (C) 2024 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#ifndef _LIBC_START_H
++#define _LIBC_START_H
++
++#ifndef SHARED
++
++# ifndef PR_SET_SHADOW_STACK_STATUS
++# define PR_GET_SHADOW_STACK_STATUS 71
++# define PR_SET_SHADOW_STACK_STATUS 72
++# define PR_LOCK_SHADOW_STACK_STATUS 73
++# define PR_SHADOW_STACK_ENABLE (1UL << 0)
++# define PR_SHADOW_STACK_WRITE (1UL << 1)
++# define PR_SHADOW_STACK_PUSH (1UL << 2)
++# endif
++
++/* Must be on a top-level stack frame that does not return. */
++static inline void __attribute__((always_inline))
++aarch64_libc_setup_tls (void)
++{
++ __libc_setup_tls ();
++
++ uint64_t s = GL(dl_aarch64_gcs);
++ if (s != 0)
++ INLINE_SYSCALL_CALL (prctl, PR_SET_SHADOW_STACK_STATUS, s, 0, 0, 0);
++}
++
++# define ARCH_SETUP_IREL() apply_irel ()
++# define ARCH_SETUP_TLS() aarch64_libc_setup_tls ()
++# define ARCH_APPLY_IREL()
++#endif /* ! SHARED */
++
++#endif /* _LIBC_START_H */
+--
+2.34.1
+
+
+From 7ea8526a50e6867b154d2bb4fbe9de4ff2fc9468 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 14 Jul 2023 15:49:11 +0100
+Subject: [PATCH 14/21] aarch64: Enable GCS in dynamic linked exe
+
+Use the dynamic linker start code to enable GCS in the dynamic linked
+case after _dl_start returns and before _dl_start_user which marks
+the point after which user code may run.
+
+Like in the static linked case this ensures that GCS is enabled on a
+top level stack frame.
+---
+ sysdeps/aarch64/Makefile | 4 +++-
+ sysdeps/aarch64/dl-start.S | 23 +++++++++++++++++++++--
+ sysdeps/aarch64/rtld-global-offsets.sym | 5 +++++
+ 3 files changed, 29 insertions(+), 3 deletions(-)
+
+diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
+index 141d7d9cc2..ca8b96f550 100644
+--- a/sysdeps/aarch64/Makefile
++++ b/sysdeps/aarch64/Makefile
+@@ -35,7 +35,9 @@ endif
+ ifeq ($(subdir),elf)
+ sysdep-rtld-routines += dl-start
+ sysdep-dl-routines += tlsdesc dl-tlsdesc
+-gen-as-const-headers += dl-link.sym
++gen-as-const-headers += \
++ dl-link.sym \
++ rtld-global-offsets.sym
+
+ tests-internal += tst-ifunc-arg-1 tst-ifunc-arg-2
+
+diff --git a/sysdeps/aarch64/dl-start.S b/sysdeps/aarch64/dl-start.S
+index d645484e79..271bd5bf00 100644
+--- a/sysdeps/aarch64/dl-start.S
++++ b/sysdeps/aarch64/dl-start.S
+@@ -18,6 +18,7 @@
+ <https://www.gnu.org/licenses/>. */
+
+ #include <sysdep.h>
++#include <rtld-global-offsets.h>
+
+ ENTRY (_start)
+ /* Create an initial frame with 0 LR and FP */
+@@ -25,11 +26,30 @@ ENTRY (_start)
+ mov x29, #0
+ mov x30, #0
+
++ /* Load and relocate all library dependencies. */
+ mov x0, sp
+ PTR_ARG (0)
+ bl _dl_start
+ /* Returns user entry point in x0. */
+ mov PTR_REG (21), PTR_REG (0)
++
++ /* Use GL(dl_aarch64_gcs) to set the shadow stack status. */
++ adrp x16, _rtld_local
++ add PTR_REG (16), PTR_REG (16), :lo12:_rtld_local
++ ldr x1, [x16, GL_DL_AARCH64_GCS_OFFSET]
++ cbz x1, L(skip_gcs_enable)
++
++ /* Enable GCS before user code runs. Note that IFUNC resolvers and
++ LD_AUDIT hooks may run before, but should not create threads. */
++#define PR_SET_SHADOW_STACK_STATUS 72
++ mov x0, PR_SET_SHADOW_STACK_STATUS
++ mov x2, 0
++ mov x3, 0
++ mov x4, 0
++ mov x8, #SYS_ify(prctl)
++ svc 0x0
++L(skip_gcs_enable):
++
+ .globl _dl_start_user
+ .type _dl_start_user, %function
+ _dl_start_user:
+@@ -40,8 +60,7 @@ _dl_start_user:
+ /* Compute envp. */
+ add PTR_REG (3), PTR_REG (2), PTR_REG (1), lsl PTR_LOG_SIZE
+ add PTR_REG (3), PTR_REG (3), PTR_SIZE
+- adrp x16, _rtld_local
+- add PTR_REG (16), PTR_REG (16), :lo12:_rtld_local
++ /* Run the init functions of the loaded modules. */
+ ldr PTR_REG (0), [x16]
+ bl _dl_init
+ /* Load the finalizer function. */
+diff --git a/sysdeps/aarch64/rtld-global-offsets.sym b/sysdeps/aarch64/rtld-global-offsets.sym
+index 23cdaf7d9e..6c0690bb95 100644
+--- a/sysdeps/aarch64/rtld-global-offsets.sym
++++ b/sysdeps/aarch64/rtld-global-offsets.sym
+@@ -3,8 +3,13 @@
+ #include <ldsodefs.h>
+
+ #define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
++#define GL_offsetof(name) offsetof (struct rtld_global, _##name)
+
+ -- Offsets of _rtld_global_ro in libc.so
+
+ GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap)
+ GLRO_DL_HWCAP2_OFFSET GLRO_offsetof (dl_hwcap2)
++
++-- Offsets of _rtld_global in libc.so
++
++GL_DL_AARCH64_GCS_OFFSET GL_offsetof (dl_aarch64_gcs)
+--
+2.34.1
+
+
+From 1e348038b0f013ef9c30e3c4072b9555344391cb Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 21 Dec 2023 15:14:08 +0000
+Subject: [PATCH 15/21] aarch64: add glibc.cpu.aarch64_gcs_policy
+
+policy sets how gcs tunable and gcs marking turns into gcs state:
+
+0: state = tunable
+1: state = marking ? tunable : (tunable && dlopen ? err : 0)
+2: state = marking ? tunable : (tunable ? err : 0)
+
+TODO: state lock
+---
+ sysdeps/aarch64/dl-tunables.list | 5 +++++
+ sysdeps/unix/sysv/linux/aarch64/cpu-features.c | 9 +++++++--
+ sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c | 13 +++++++++++++
+ 3 files changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/sysdeps/aarch64/dl-tunables.list b/sysdeps/aarch64/dl-tunables.list
+index c08be86932..2a07a6216b 100644
+--- a/sysdeps/aarch64/dl-tunables.list
++++ b/sysdeps/aarch64/dl-tunables.list
+@@ -26,5 +26,10 @@ glibc {
+ minval: 0
+ default: 0
+ }
++ aarch64_gcs_policy {
++ type: UINT_64
++ minval: 0
++ default: 0
++ }
+ }
+ }
+diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+index a8b6a4654a..bab5c32892 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+@@ -130,6 +130,11 @@ init_cpu_features (struct cpu_features *cpu_features)
+ cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
+
+ if (GLRO (dl_hwcap2) & HWCAP2_GCS)
+- /* GCS status may be updated later by binary compatibility checks. */
+- GL (dl_aarch64_gcs) = TUNABLE_GET (glibc, cpu, aarch64_gcs, uint64_t, 0);
++ {
++ /* GCS status may be updated later by binary compatibility checks. */
++ GL (dl_aarch64_gcs) = TUNABLE_GET (glibc, cpu, aarch64_gcs, uint64_t, 0);
++ /* Fixed GCS policy. */
++ GLRO (dl_aarch64_gcs_policy) =
++ TUNABLE_GET (glibc, cpu, aarch64_gcs_policy, uint64_t, 0);
++ }
+ }
+diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c b/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
+index 7af232de52..a9d5ee9df5 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
++++ b/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
+@@ -54,6 +54,19 @@ PROCINFO_CLASS struct cpu_features _dl_aarch64_cpu_features
+ # else
+ ,
+ # endif
++# if !defined PROCINFO_DECL && defined SHARED
++ ._dl_aarch64_gcs_policy
++# else
++PROCINFO_CLASS uint64_t _dl_aarch64_gcs_policy
++# endif
++# ifndef PROCINFO_DECL
++= 0
++# endif
++# if !defined SHARED || defined PROCINFO_DECL
++;
++# else
++,
++# endif
+ #endif
+
+ /* Number of HWCAP bits set. */
+--
+2.34.1
+
+
+From 92cd77cd36137d81ac9500f595f0c4d0c3c6539d Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 28 Dec 2023 18:31:32 +0000
+Subject: [PATCH 16/21] aarch64: use l_searchlist.r_list for bti
+
+Allows using the same function for static exe.
+---
+ sysdeps/aarch64/dl-bti.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c
+index fd0d308a39..e03bfc2acb 100644
+--- a/sysdeps/aarch64/dl-bti.c
++++ b/sysdeps/aarch64/dl-bti.c
+@@ -84,10 +84,9 @@ _dl_bti_check (struct link_map *l, const char *program)
+ if (l->l_mach.bti_fail)
+ bti_failed (l, program);
+
+- unsigned int i = l->l_searchlist.r_nlist;
+- while (i-- > 0)
++ for (unsigned int i = 0; i < l->l_searchlist.r_nlist; i++)
+ {
+- struct link_map *dep = l->l_initfini[i];
++ struct link_map *dep = l->l_searchlist.r_list[i];
+ if (dep->l_mach.bti_fail)
+ bti_failed (dep, program);
+ }
+--
+2.34.1
+
+
+From deded666b363b18c93ee6baed7dcf32551158eca Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 21 Dec 2023 23:05:10 +0000
+Subject: [PATCH 17/21] aarch64: handling gcs marking
+
+---
+ sysdeps/aarch64/Makefile | 4 ++-
+ sysdeps/aarch64/dl-gcs.c | 59 +++++++++++++++++++++++++++++++++++++++
+ sysdeps/aarch64/dl-prop.h | 15 ++++++----
+ sysdeps/aarch64/linkmap.h | 1 +
+ 4 files changed, 73 insertions(+), 6 deletions(-)
+ create mode 100644 sysdeps/aarch64/dl-gcs.c
+
+diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
+index ca8b96f550..74479604f2 100644
+--- a/sysdeps/aarch64/Makefile
++++ b/sysdeps/aarch64/Makefile
+@@ -9,7 +9,9 @@ LDFLAGS-rtld += -Wl,-z,force-bti,--fatal-warnings
+ endif
+
+ ifeq ($(subdir),elf)
+-sysdep-dl-routines += dl-bti
++sysdep-dl-routines += \
++ dl-bti \
++ dl-gcs
+
+ tests += tst-audit26 \
+ tst-audit27
+diff --git a/sysdeps/aarch64/dl-gcs.c b/sysdeps/aarch64/dl-gcs.c
+new file mode 100644
+index 0000000000..a92deb54b5
+--- /dev/null
++++ b/sysdeps/aarch64/dl-gcs.c
+@@ -0,0 +1,59 @@
++/* AArch64 GCS functions.
++ Copyright (C) 2024 Free Software Foundation, Inc.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <unistd.h>
++#include <errno.h>
++#include <libintl.h>
++#include <ldsodefs.h>
++
++static void
++fail (struct link_map *l, const char *program)
++{
++ if (program)
++ _dl_fatal_printf ("%s: %s: not GCS compatible\n", program, l->l_name);
++ else
++ _dl_signal_error (0, l->l_name, "dlopen", N_("not GCS compatible"));
++}
++
++static void
++check_gcs (struct link_map *l, const char *program)
++{
++ if (!l->l_mach.gcs)
++ {
++ if (GLRO(dl_aarch64_gcs_policy) == 2 || !program)
++ fail (l, program);
++ if (GLRO(dl_aarch64_gcs_policy) == 1 && program)
++ GL(dl_aarch64_gcs) = 0;
++ }
++}
++
++/* Apply GCS policy for L and its dependencies. */
++
++void
++_dl_gcs_check (struct link_map *l, const char *program)
++{
++ /* GCS is disabled. */
++ if (GL(dl_aarch64_gcs) == 0)
++ return;
++ /* GCS marking is ignored. */
++ if (GLRO(dl_aarch64_gcs_policy) == 0)
++ return;
++
++ check_gcs (l, program);
++ for (unsigned int i = 0; i < l->l_searchlist.r_nlist; i++)
++ check_gcs (l->l_initfini[i], program);
++}
+diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h
+index df05c0211d..72ac11aec0 100644
+--- a/sysdeps/aarch64/dl-prop.h
++++ b/sysdeps/aarch64/dl-prop.h
+@@ -24,16 +24,21 @@ extern void _dl_bti_protect (struct link_map *, int) attribute_hidden;
+ extern void _dl_bti_check (struct link_map *, const char *)
+ attribute_hidden;
+
++extern void _dl_gcs_check (struct link_map *, const char *)
++ attribute_hidden;
++
+ static inline void __attribute__ ((always_inline))
+ _rtld_main_check (struct link_map *m, const char *program)
+ {
+ _dl_bti_check (m, program);
++ _dl_gcs_check (m, program);
+ }
+
+ static inline void __attribute__ ((always_inline))
+ _dl_open_check (struct link_map *m)
+ {
+ _dl_bti_check (m, NULL);
++ _dl_gcs_check (m, NULL);
+ }
+
+ static inline void __attribute__ ((always_inline))
+@@ -45,10 +50,6 @@ static inline int
+ _dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
+ uint32_t datasz, void *data)
+ {
+- if (!GLRO(dl_aarch64_cpu_features).bti)
+- /* Skip note processing. */
+- return 0;
+-
+ if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND)
+ {
+ /* Stop if the property note is ill-formed. */
+@@ -57,7 +58,11 @@ _dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
+
+ unsigned int feature_1 = *(unsigned int *) data;
+ if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
+- _dl_bti_protect (l, fd);
++ if (GLRO(dl_aarch64_cpu_features).bti)
++ _dl_bti_protect (l, fd);
++
++ if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_GCS)
++ l->l_mach.gcs = 1;
+
+ /* Stop if we processed the property note. */
+ return 0;
+diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h
+index 56a63fc3dd..423fc0bd8e 100644
+--- a/sysdeps/aarch64/linkmap.h
++++ b/sysdeps/aarch64/linkmap.h
+@@ -23,4 +23,5 @@ struct link_map_machine
+ ElfW(Addr) plt; /* Address of .plt */
+ void *tlsdesc_table; /* Address of TLS descriptor hash table. */
+ bool bti_fail; /* Failed to enable Branch Target Identification. */
++ bool gcs; /* Guarded Control Stack marking. */
+ };
+--
+2.34.1
+
+
+From 5017a71252bd923b764b58cd61021b028c84d361 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 28 Dec 2023 18:32:02 +0000
+Subject: [PATCH 18/21] aarch64: use l_searchlist.r_list for gcs
+
+Allows using the same function for static exe.
+---
+ sysdeps/aarch64/dl-gcs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sysdeps/aarch64/dl-gcs.c b/sysdeps/aarch64/dl-gcs.c
+index a92deb54b5..764b8a56e9 100644
+--- a/sysdeps/aarch64/dl-gcs.c
++++ b/sysdeps/aarch64/dl-gcs.c
+@@ -55,5 +55,5 @@ _dl_gcs_check (struct link_map *l, const char *program)
+
+ check_gcs (l, program);
+ for (unsigned int i = 0; i < l->l_searchlist.r_nlist; i++)
+- check_gcs (l->l_initfini[i], program);
++ check_gcs (l->l_searchlist.r_list[i], program);
+ }
+--
+2.34.1
+
+
+From 84bfdb91bded2358b2642eed3bdae3c049576eb4 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Wed, 10 Jan 2024 16:20:24 +0000
+Subject: [PATCH 19/21] aarch64: ignore GCS property of ld.so
+
+ldso->l_mach.gcs may not be set up, just assume ldso is GCS compatible.
+---
+ sysdeps/aarch64/dl-gcs.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/sysdeps/aarch64/dl-gcs.c b/sysdeps/aarch64/dl-gcs.c
+index 764b8a56e9..b81aa30787 100644
+--- a/sysdeps/aarch64/dl-gcs.c
++++ b/sysdeps/aarch64/dl-gcs.c
+@@ -32,6 +32,11 @@ fail (struct link_map *l, const char *program)
+ static void
+ check_gcs (struct link_map *l, const char *program)
+ {
++#ifdef SHARED
++ /* Ignore GCS marking on ld.so: its properties are not processed. */
++ if (l->l_real == &GL(dl_rtld_map))
++ return;
++#endif
+ if (!l->l_mach.gcs)
+ {
+ if (GLRO(dl_aarch64_gcs_policy) == 2 || !program)
+--
+2.34.1
+
+
+From fcdce58087260a68d1a74b28e5b0146e69511f16 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Wed, 27 Dec 2023 17:17:11 +0000
+Subject: [PATCH 20/21] aarch64: process gnu properties in static exe
+
+---
+ sysdeps/unix/sysv/linux/aarch64/libc-start.h | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/sysdeps/unix/sysv/linux/aarch64/libc-start.h b/sysdeps/unix/sysv/linux/aarch64/libc-start.h
+index ccf0f8af5c..6e2e9762ee 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/libc-start.h
++++ b/sysdeps/unix/sysv/linux/aarch64/libc-start.h
+@@ -21,6 +21,8 @@
+
+ #ifndef SHARED
+
++# include <dl-prop.h>
++
+ # ifndef PR_SET_SHADOW_STACK_STATUS
+ # define PR_GET_SHADOW_STACK_STATUS 71
+ # define PR_SET_SHADOW_STACK_STATUS 72
+@@ -36,6 +38,16 @@ aarch64_libc_setup_tls (void)
+ {
+ __libc_setup_tls ();
+
++ struct link_map *main_map = _dl_get_dl_main_map ();
++ const ElfW(Phdr) *phdr = GL(dl_phdr);
++ const ElfW(Phdr) *ph;
++ for (ph = phdr; ph < phdr + GL(dl_phnum); ph++)
++ if (ph->p_type == PT_GNU_PROPERTY)
++ {
++ _dl_process_pt_gnu_property (main_map, -1, ph);
++ _rtld_main_check (main_map, _dl_argv[0]);
++ break;
++ }
+ uint64_t s = GL(dl_aarch64_gcs);
+ if (s != 0)
+ INLINE_SYSCALL_CALL (prctl, PR_SET_SHADOW_STACK_STATUS, s, 0, 0, 0);
+--
+2.34.1
+
+
+From bea263f87c18cc7949b556db73883a209edd27dc Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Wed, 14 Feb 2024 15:06:40 +0000
+Subject: [PATCH 21/21] doc: add plain text readme for using GCS
+
+---
+ README | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 69 insertions(+)
+
+diff --git a/README b/README
+index 2e360eb70a..061818d51b 100644
+--- a/README
++++ b/README
+@@ -1,3 +1,72 @@
++this branch contains experimental GCS support (not ABI stable)
++
++source and branches
++-------------------
++
++binutils-gdb: upstream-git users/ARM/gcs-binutils-gdb-master
++gcc (trunk): upstream-git vendors/ARM/gcs
++gcc (gcc-13): upstream-git vendors/ARM/gcs-13
++ note: gcc vendor branches need setup https://gcc.gnu.org/gitwrite.html#vendor
++glibc: upstream-git arm/gcs
++linux: https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git arm64-gcs
++fvp fast model can be used for testing.
++
++toolchain build
++---------------
++
++two options:
++
++(1) branch-protect by default
++ configure gcc with --enable-standard-branch-protection
++ and build glibc normally
++
++(2) do not branch-protect by default, require explicit cflags
++ configure gcc with
++ CFLAGS_FOR_TARGET='-O2 -mbranch-protection=standard'
++ CXXFLAGS_FOR_TARGET='-O2 -mbranch-protection=standard'
++ and configure glibc with
++ CFLAGS='-g -O2 -mbranch-protection=standard'
++ build user code with
++ CFLAGS+=-mbranch-protection=standard
++ (equivalent to -mbranch-protection=bti+pac+gcs)
++
++linking
++-------
++
++use ldflags:
++
++-z experimental-gcs={always,never,implicit}
++ always: force GCS marking on
++ never: force GCS marking off
++ implicit: mark output if all inputs are marked (default)
++
++-z experimental-gcs-report={none,warning,error}
++ none: silent (default)
++ warning: when output is marked, unmarked input is a warning
++ error: when output is marked, unmarked input is an error
++
++runtime
++-------
++
++run with environment var
++
++ GLIBC_TUNABLES=glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2
++
++by default both tunables are 0, the meaning is
++
++glibc.cpu.aarch64_gcs_policy=0:
++ GCS is enabled if glibc.cpu.aarch64_gcs is set
++glibc.cpu.aarch64_gcs_policy=1:
++ GCS is enabled if glibc.cpu.aarch64_gcs is set and binary is marked
++ if GCS is enabled an incompatible dlopen is an error
++glibc.cpu.aarch64_gcs_policy=2:
++ GCS is enabled if glibc.cpu.aarch64_gcs is set
++ if GCS is enabled any incompatible binary is an error
++
++
++original readme
++---------------
++
+ This directory contains the sources of the GNU C Library.
+ See the file "version.h" for what release version you have.
+
+--
+2.34.1
+
diff --git a/meta-arm-gcs/recipes-core/glibc/glibc_2.39.bbappend b/meta-arm-gcs/recipes-core/glibc/glibc_2.39.bbappend
new file mode 100644
index 00000000..b3aba4f9
--- /dev/null
+++ b/meta-arm-gcs/recipes-core/glibc/glibc_2.39.bbappend
@@ -0,0 +1,3 @@
+FILESEXTRAPATHS:prepend := "${THISDIR}/files:"
+
+SRC_URI += "file://gcs.patch"
diff --git a/meta-arm-gcs/recipes-devtools/binutils/binutils_2.42.bbappend b/meta-arm-gcs/recipes-devtools/binutils/binutils_2.42.bbappend
new file mode 100644
index 00000000..b3aba4f9
--- /dev/null
+++ b/meta-arm-gcs/recipes-devtools/binutils/binutils_2.42.bbappend
@@ -0,0 +1,3 @@
+FILESEXTRAPATHS:prepend := "${THISDIR}/files:"
+
+SRC_URI += "file://gcs.patch"
diff --git a/meta-arm-gcs/recipes-devtools/binutils/files/gcs.patch b/meta-arm-gcs/recipes-devtools/binutils/files/gcs.patch
new file mode 100644
index 00000000..9bbfaf4e
--- /dev/null
+++ b/meta-arm-gcs/recipes-devtools/binutils/files/gcs.patch
@@ -0,0 +1,973 @@
+From afe69c2e274db719e1835ee112150012271b62b7 Mon Sep 17 00:00:00 2001
+From: Srinath Parvathaneni <srinath.parvathaneni@arm.com>
+Date: Tue, 30 Jan 2024 08:59:53 +0000
+Subject: [PATCH] aarch64: Add support for GCS in AArch64 linker.
+
+This patch adds support for GCS in AArch64 linker.
+
+This patch implements the following:
+1) Defines GNU_PROPERTY_AARCH64_FEATURE_1_GCS bit for GCS in
+GNU_PROPERTY_AARCH64_FEATURE_1_AND macro.
+
+2) Adds readelf support to read and print the GNU properties
+in AArch64.
+
+Displaying notes found in: .note.gnu.property
+[ ]+Owner[ ]+Data size[ ]+Description
+ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
+ Properties: AArch64 feature: GCS
+
+3) Adds support for -z experimental-gcs linker option and document
+all the values allowed with option (-z experimental-gcs[=always|never|implicit]).
+-z experimental-gcs is equivalent to -z experimental-gcs=always and
+when option is not passed in the command line, it defaults to implicit.
+
+4) Adds support for -z experimental-gcs-report linker option and document
+all the values allowed with this option (-z experimental-gcs-report[=none|warning|error]).
+-z experimental-gcs-report is equivalent to -z experimental-gcs-report=none
+and when option is not passed in the command line, it defaults to none.
+
+The ABI changes adding GNU_PROPERTY_AARCH64_FEATURE_1_GCS to the
+GNU property GNU_PROPERTY_AARCH64_FEATURE_1_AND is merged into main and
+can be found below.
+https://github.com/ARM-software/abi-aa/blob/main/sysvabi64/sysvabi64.rst
+
+Upstream-Status: Pending [https://sourceware.org/git/?p=binutils-gdb.git;a=shortlog;h=refs/heads/users/ARM/gcs-binutils-gdb-master]
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+---
+ bfd/elfnn-aarch64.c | 87 +++++++++++++++++----
+ bfd/elfxx-aarch64.c | 37 ++++++++-
+ bfd/elfxx-aarch64.h | 36 +++++++--
+ binutils/readelf.c | 4 +
+ include/elf/common.h | 1 +
+ ld/emultempl/aarch64elf.em | 45 ++++++++++-
+ ld/testsuite/ld-aarch64/aarch64-elf.exp | 23 ++++++
+ ld/testsuite/ld-aarch64/property-bti-pac1.d | 2 +-
+ ld/testsuite/ld-aarch64/property-bti-pac1.s | 14 ++++
+ ld/testsuite/ld-aarch64/property-gcs.s | 25 ++++++
+ ld/testsuite/ld-aarch64/property-gcs1.d | 6 ++
+ ld/testsuite/ld-aarch64/property-gcs10.d | 6 ++
+ ld/testsuite/ld-aarch64/property-gcs11.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs12.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs13.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs14.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs15.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs16.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs17.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs18.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs19.d | 6 ++
+ ld/testsuite/ld-aarch64/property-gcs2.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs2.s | 33 ++++++++
+ ld/testsuite/ld-aarch64/property-gcs20.d | 6 ++
+ ld/testsuite/ld-aarch64/property-gcs21.d | 6 ++
+ ld/testsuite/ld-aarch64/property-gcs22.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs3.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs4.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs5.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs6.d | 12 +++
+ ld/testsuite/ld-aarch64/property-gcs7.d | 6 ++
+ ld/testsuite/ld-aarch64/property-gcs8.d | 11 +++
+ ld/testsuite/ld-aarch64/property-gcs9.d | 12 +++
+ 33 files changed, 495 insertions(+), 26 deletions(-)
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs.s
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs1.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs10.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs11.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs12.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs13.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs14.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs15.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs16.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs17.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs18.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs19.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs2.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs2.s
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs20.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs21.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs22.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs3.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs4.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs5.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs6.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs7.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs8.d
+ create mode 100644 ld/testsuite/ld-aarch64/property-gcs9.d
+
+diff --git a/bfd/elfnn-aarch64.c b/bfd/elfnn-aarch64.c
+index 109517db4aa..428f2c3507d 100644
+--- a/bfd/elfnn-aarch64.c
++++ b/bfd/elfnn-aarch64.c
+@@ -2546,6 +2546,12 @@ struct elf_aarch64_obj_tdata
+ GNU_PROPERTY_AARCH64_FEATURE_1_BTI. */
+ int no_bti_warn;
+
++ /* Mark ouput with GCS based on -z experimental-gcs. */
++ aarch64_gcs_type gcs_type;
++ /* Report linker warning/error for -z experimental-gcs-report based on
++ -z experimental-gcs. */
++ aarch64_gcs_report gcs_report;
++
+ /* PLT type based on security. */
+ aarch64_plt_type plt_type;
+ };
+@@ -5011,7 +5017,7 @@ bfd_elfNN_aarch64_set_options (struct bfd *output_bfd,
+ int fix_erratum_835769,
+ erratum_84319_opts fix_erratum_843419,
+ int no_apply_dynamic_relocs,
+- aarch64_bti_pac_info bp_info)
++ aarch64_gnu_prop_info bp_info)
+ {
+ struct elf_aarch64_link_hash_table *globals;
+
+@@ -5039,6 +5045,24 @@ bfd_elfNN_aarch64_set_options (struct bfd *output_bfd,
+ default:
+ break;
+ }
++
++ switch (bp_info.gcs_type)
++ {
++ case GCS_ALWAYS:
++ elf_aarch64_tdata (output_bfd)->gnu_and_prop
++ |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
++ break;
++ case GCS_NEVER:
++ elf_aarch64_tdata (output_bfd)->gnu_and_prop
++ &= ~GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
++ break;
++
++ default:
++ break;
++ }
++
++ elf_aarch64_tdata (output_bfd)->gcs_type = bp_info.gcs_type;
++ elf_aarch64_tdata (output_bfd)->gcs_report = bp_info.gcs_report;
+ elf_aarch64_tdata (output_bfd)->plt_type = bp_info.plt_type;
+ setup_plt_values (link_info, bp_info.plt_type);
+ }
+@@ -10196,7 +10220,12 @@ static bfd *
+ elfNN_aarch64_link_setup_gnu_properties (struct bfd_link_info *info)
+ {
+ uint32_t prop = elf_aarch64_tdata (info->output_bfd)->gnu_and_prop;
+- bfd *pbfd = _bfd_aarch64_elf_link_setup_gnu_properties (info, &prop);
++ aarch64_gcs_report gcs_report
++ = elf_aarch64_tdata (info->output_bfd)->gcs_report;
++ aarch64_gcs_report gcs_type
++ = elf_aarch64_tdata (info->output_bfd)->gcs_type;
++ bfd *pbfd = _bfd_aarch64_elf_link_setup_gnu_properties (info, &prop,
++ gcs_report, gcs_type);
+ elf_aarch64_tdata (info->output_bfd)->gnu_and_prop = prop;
+ elf_aarch64_tdata (info->output_bfd)->plt_type
+ |= (prop & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ? PLT_BTI : 0;
+@@ -10215,30 +10244,54 @@ elfNN_aarch64_merge_gnu_properties (struct bfd_link_info *info,
+ {
+ uint32_t prop
+ = elf_aarch64_tdata (info->output_bfd)->gnu_and_prop;
++ aarch64_gcs_report gcs_report
++ = elf_aarch64_tdata (info->output_bfd)->gcs_report;
++ aarch64_gcs_type gcs_type
++ = elf_aarch64_tdata (info->output_bfd)->gcs_type;
+
+- /* If output has been marked with BTI using command line argument, give out
+- warning if necessary. */
+ /* Properties are merged per type, hence only check for warnings when merging
+ GNU_PROPERTY_AARCH64_FEATURE_1_AND. */
+- if (((aprop && aprop->pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND)
++ if ((aprop && aprop->pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND)
+ || (bprop && bprop->pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND))
+- && (prop & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
+- && (!elf_aarch64_tdata (info->output_bfd)->no_bti_warn))
+ {
+- if ((aprop && !(aprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_BTI))
+- || !aprop)
++ /* If output has been marked with BTI using command line argument, give
++ out warning if necessary. */
++ if ((prop & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
++ && (!elf_aarch64_tdata (info->output_bfd)->no_bti_warn))
+ {
+- _bfd_error_handler (_("%pB: warning: BTI turned on by -z force-bti when "
+- "all inputs do not have BTI in NOTE section."),
+- abfd);
++ if ((aprop && !(aprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_BTI))
++ || !aprop)
++ {
++ _bfd_error_handler (_("%pB: warning: BTI turned on by -z "
++ "force-bti when all inputs do not have BTI "
++ "in NOTE section."), abfd);
++ }
++ if ((bprop && !(bprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_BTI))
++ || !bprop)
++ {
++ _bfd_error_handler (_("%pB: warning: BTI turned on by -z "
++ "force-bti when all inputs do not have BTI "
++ "in NOTE section."), bbfd);
++ }
+ }
+- if ((bprop && !(bprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_BTI))
+- || !bprop)
++
++ /* If output has been marked with GCS using -z experimental-gcs and input
++ is missing GCS marking throw warning/error on
++ -z experimental-gcs-report=warning/error. */
++ if ((prop & GNU_PROPERTY_AARCH64_FEATURE_1_GCS) && gcs_report != GCS_NONE)
+ {
+- _bfd_error_handler (_("%pB: warning: BTI turned on by -z force-bti when "
+- "all inputs do not have BTI in NOTE section."),
+- bbfd);
++ if ((aprop && !(aprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_GCS))
++ || !aprop)
++ _bfd_aarch64_elf_check_gcs_report (gcs_report, abfd);
++ if ((bprop && !(bprop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_GCS))
++ || !bprop)
++ _bfd_aarch64_elf_check_gcs_report (gcs_report, bbfd);
+ }
++
++ if (gcs_type == GCS_NEVER && aprop != NULL)
++ aprop->u.number &= ~GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
++ if (gcs_type == GCS_NEVER && bprop != NULL)
++ bprop->u.number &= ~GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+ }
+
+ return _bfd_aarch64_elf_merge_gnu_properties (info, abfd, aprop,
+diff --git a/bfd/elfxx-aarch64.c b/bfd/elfxx-aarch64.c
+index d1279adc2e4..dd64f2067ac 100644
+--- a/bfd/elfxx-aarch64.c
++++ b/bfd/elfxx-aarch64.c
+@@ -702,7 +702,9 @@ _bfd_aarch64_elf_write_core_note (bfd *abfd, char *buf, int *bufsiz, int note_ty
+ GPROP accordingly. */
+ bfd *
+ _bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *info,
+- uint32_t *gprop)
++ uint32_t *gprop,
++ aarch64_gcs_report gcs_report,
++ aarch64_gcs_type gcs_type)
+ {
+ asection *sec;
+ bfd *pbfd;
+@@ -738,6 +740,11 @@ _bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *info,
+ _bfd_error_handler (_("%pB: warning: BTI turned on by -z force-bti "
+ "when all inputs do not have BTI in NOTE "
+ "section."), ebfd);
++
++ if ((gnu_prop & GNU_PROPERTY_AARCH64_FEATURE_1_GCS)
++ && !(prop->u.number & GNU_PROPERTY_AARCH64_FEATURE_1_GCS))
++ _bfd_aarch64_elf_check_gcs_report (gcs_report, ebfd);
++
+ prop->u.number |= gnu_prop;
+ prop->pr_kind = property_number;
+
+@@ -765,6 +772,14 @@ _bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *info,
+ elf_section_type (sec) = SHT_NOTE;
+ }
+ }
++ else if (ebfd != NULL && gcs_type == GCS_NEVER)
++ {
++ prop = _bfd_elf_get_property (ebfd, GNU_PROPERTY_AARCH64_FEATURE_1_AND,
++ 4);
++ prop->u.number &= ~GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
++ if (prop->u.number == 0)
++ prop->pr_kind = property_remove;
++ }
+
+ pbfd = _bfd_elf_link_setup_gnu_properties (info);
+
+@@ -785,7 +800,8 @@ _bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *info,
+ {
+ gnu_prop = (p->property.u.number
+ & (GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+- | GNU_PROPERTY_AARCH64_FEATURE_1_BTI));
++ | GNU_PROPERTY_AARCH64_FEATURE_1_BTI
++ | GNU_PROPERTY_AARCH64_FEATURE_1_GCS));
+ break;
+ }
+ else if (GNU_PROPERTY_AARCH64_FEATURE_1_AND < p->property.pr_type)
+@@ -922,3 +938,20 @@ _bfd_aarch64_elf_link_fixup_gnu_properties
+ }
+ }
+ }
++
++/* Check AArch64 GCS report. */
++void
++_bfd_aarch64_elf_check_gcs_report (aarch64_gcs_report gcs_report, bfd *ebfd)
++{
++ if (gcs_report == GCS_WARN)
++ _bfd_error_handler (_("%pB: warning: GCS turned on by -z experimental-gcs "
++ "on the output when all inputs do not have GCS in NOTE "
++ "section."), ebfd);
++ else if (gcs_report == GCS_ERROR)
++ {
++ _bfd_error_handler (_("%pB: error: GCS turned on by -z experimental-gcs "
++ "on the output when all inputs do not have GCS in "
++ "NOTE section."), ebfd);
++ _exit (EXIT_FAILURE);
++ }
++}
+diff --git a/bfd/elfxx-aarch64.h b/bfd/elfxx-aarch64.h
+index 6c084f75796..ca523d81df1 100644
+--- a/bfd/elfxx-aarch64.h
++++ b/bfd/elfxx-aarch64.h
+@@ -46,6 +46,27 @@ typedef enum
+ BTI_WARN = 1, /* BTI is enabled with -z force-bti. */
+ } aarch64_enable_bti_type;
+
++/* To indicate whether GNU_PROPERTY_AARCH64_FEATURE_1_GCS bit is
++ enabled/disabled on the output when -z experimental-gcs linker
++ command line option is passed. */
++typedef enum
++{
++ GCS_NEVER = 0, /* gcs is disabled on output. */
++ GCS_IMPLICIT = 1, /* gcs is deduced from input object. */
++ GCS_ALWAYS = 2, /* gsc is enabled on output. */
++} aarch64_gcs_type;
++
++/* To indicate whether to generate linker warning/errors for
++ -z experimental-gcs-report when -z experimental-gcs=always is passed. */
++typedef enum
++{
++ GCS_NONE = 0, /* Does not emit any warning/error messages. */
++ GCS_WARN = 1, /* Emit warning when the input objects are missing gcs
++ markings and output have gcs marking. */
++ GCS_ERROR = 2, /* Emit error when the input objects are missing gcs
++ markings and output have gcs marking. */
++} aarch64_gcs_report;
++
+ /* A structure to encompass all information coming from BTI or PAC
+ related command line options. This involves the "PLT_TYPE" to determine
+ which version of PLTs to pick and "BTI_TYPE" to determine if
+@@ -54,7 +75,9 @@ typedef struct
+ {
+ aarch64_plt_type plt_type;
+ aarch64_enable_bti_type bti_type;
+-} aarch64_bti_pac_info;
++ aarch64_gcs_type gcs_type;
++ aarch64_gcs_report gcs_report;
++} aarch64_gnu_prop_info;
+
+ /* An enum to define what kind of erratum fixes we should apply. This gives the
+ user a bit more control over the sequences we generate. */
+@@ -67,11 +90,11 @@ typedef enum
+
+ extern void bfd_elf64_aarch64_set_options
+ (bfd *, struct bfd_link_info *, int, int, int, int, erratum_84319_opts, int,
+- aarch64_bti_pac_info);
++ aarch64_gnu_prop_info);
+
+ extern void bfd_elf32_aarch64_set_options
+ (bfd *, struct bfd_link_info *, int, int, int, int, erratum_84319_opts, int,
+- aarch64_bti_pac_info);
++ aarch64_gnu_prop_info);
+
+ /* AArch64 stub generation support for ELF64. Called from the linker. */
+ extern int elf64_aarch64_setup_section_lists
+@@ -135,8 +158,9 @@ _bfd_aarch64_elf_write_core_note (bfd *, char *, int *, int, ...);
+ #define elf_backend_write_core_note _bfd_aarch64_elf_write_core_note
+
+ extern bfd *
+-_bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *,
+- uint32_t *);
++_bfd_aarch64_elf_link_setup_gnu_properties (struct bfd_link_info *, uint32_t *,
++ aarch64_gcs_report,
++ aarch64_gcs_type);
+
+ extern enum elf_property_kind
+ _bfd_aarch64_elf_parse_gnu_properties (bfd *, unsigned int,
+@@ -146,6 +170,8 @@ extern bool
+ _bfd_aarch64_elf_merge_gnu_properties (struct bfd_link_info *, bfd *,
+ elf_property *, elf_property *,
+ uint32_t);
++extern void
++_bfd_aarch64_elf_check_gcs_report (aarch64_gcs_report, bfd *);
+
+ extern void
+ _bfd_aarch64_elf_link_fixup_gnu_properties (struct bfd_link_info *,
+diff --git a/binutils/readelf.c b/binutils/readelf.c
+index 5e4ad6ea6ad..794cbb77a9c 100644
+--- a/binutils/readelf.c
++++ b/binutils/readelf.c
+@@ -20636,6 +20636,10 @@ decode_aarch64_feature_1_and (unsigned int bitmask)
+ printf ("PAC");
+ break;
+
++ case GNU_PROPERTY_AARCH64_FEATURE_1_GCS:
++ printf ("GCS");
++ break;
++
+ default:
+ printf (_("<unknown: %x>"), bit);
+ break;
+diff --git a/include/elf/common.h b/include/elf/common.h
+index 6a66456cd22..289b8821b7d 100644
+--- a/include/elf/common.h
++++ b/include/elf/common.h
+@@ -1001,6 +1001,7 @@
+
+ #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+ #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
++#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS (1U << 2)
+
+ /* Values used in GNU .note.ABI-tag notes (NT_GNU_ABI_TAG). */
+ #define GNU_ABI_TAG_LINUX 0
+diff --git a/ld/emultempl/aarch64elf.em b/ld/emultempl/aarch64elf.em
+index b647909ae63..fb331e06553 100644
+--- a/ld/emultempl/aarch64elf.em
++++ b/ld/emultempl/aarch64elf.em
+@@ -36,6 +36,12 @@ static erratum_84319_opts fix_erratum_843419 = ERRAT_NONE;
+ static int no_apply_dynamic_relocs = 0;
+ static aarch64_plt_type plt_type = PLT_NORMAL;
+ static aarch64_enable_bti_type bti_type = BTI_NONE;
++static aarch64_gcs_type gcs_type = GCS_IMPLICIT;
++static aarch64_gcs_report gcs_report = GCS_NONE;
++static const char * egr = "experimental-gcs-report";
++static const char * eg = "experimental-gcs";
++#define EGR_LEN strlen (egr)
++#define EG_LEN strlen (eg)
+
+ static void
+ gld${EMULATION_NAME}_before_parse (void)
+@@ -321,9 +327,11 @@ aarch64_elf_create_output_section_statements (void)
+ return;
+ }
+
+- aarch64_bti_pac_info bp_info;
++ aarch64_gnu_prop_info bp_info;
+ bp_info.plt_type = plt_type;
+ bp_info.bti_type = bti_type;
++ bp_info.gcs_type = gcs_type;
++ bp_info.gcs_report = gcs_report;
+
+ bfd_elf${ELFSIZE}_aarch64_set_options (link_info.output_bfd, &link_info,
+ no_enum_size_warning,
+@@ -408,6 +416,19 @@ PARSE_AND_LIST_OPTIONS='
+ fprintf (file, _(" --no-apply-dynamic-relocs Do not apply link-time values for dynamic relocations\n"));
+ fprintf (file, _(" -z force-bti Turn on Branch Target Identification mechanism and generate PLTs with BTI. Generate warnings for missing BTI on inputs\n"));
+ fprintf (file, _(" -z pac-plt Protect PLTs with Pointer Authentication.\n"));
++ fprintf (file, _("\
++ -z experimental-gcs[=always|never|implicit] Turn on Guarded Control Stack(gcs) mechanism on the output.\n\
++ implicit(default): deduce gcs from input objects.\n\
++ always: always marks the output with gcs.\n\
++ never: never marks the output with gcs.\n"));
++ fprintf (file, _("\
++ -z experimental-gcs-report[=none|warning|error] Emit warning/error on mismatch of gcs marking between input objects and ouput.\n\
++ none (default): Does not emit any warning/error messages.\n\
++ warning: Emit warning when the input objects are missing gcs markings\n\
++ and output have gcs marking.\n\
++ error: Emit error when the input objects are missing gcs markings\n\
++ and output have gcs marking.\n"));
++
+ '
+
+ PARSE_AND_LIST_ARGS_CASE_Z_AARCH64='
+@@ -418,6 +439,28 @@ PARSE_AND_LIST_ARGS_CASE_Z_AARCH64='
+ }
+ else if (strcmp (optarg, "pac-plt") == 0)
+ plt_type |= PLT_PAC;
++ else if (strncmp (optarg, egr, EGR_LEN) == 0)
++ {
++ if (strlen (optarg) == EGR_LEN || strcmp (optarg + EGR_LEN, "=none") == 0)
++ gcs_report = GCS_NONE;
++ else if (strcmp (optarg + EGR_LEN, "=warning") == 0)
++ gcs_report = GCS_WARN;
++ else if (strcmp (optarg + EGR_LEN, "=error") == 0)
++ gcs_report = GCS_ERROR;
++ else
++ einfo (_("%P: error: unrecognized: `%s'\''\n"), optarg);
++ }
++ else if (strncmp (optarg, eg, EG_LEN) == 0)
++ {
++ if (strlen (optarg) == EG_LEN || strcmp (optarg + EG_LEN, "=always") == 0)
++ gcs_type = GCS_ALWAYS;
++ else if (strcmp (optarg + EG_LEN, "=never") == 0)
++ gcs_type = GCS_NEVER;
++ else if (strcmp (optarg + EG_LEN, "=implicit") == 0)
++ gcs_type = GCS_IMPLICIT;
++ else
++ einfo (_("%P: error: unrecognized: `%s'\''\n"), optarg);
++ }
+ '
+ PARSE_AND_LIST_ARGS_CASE_Z="$PARSE_AND_LIST_ARGS_CASE_Z $PARSE_AND_LIST_ARGS_CASE_Z_AARCH64"
+
+diff --git a/ld/testsuite/ld-aarch64/aarch64-elf.exp b/ld/testsuite/ld-aarch64/aarch64-elf.exp
+index 9ce61579e6c..31abc5a07d8 100644
+--- a/ld/testsuite/ld-aarch64/aarch64-elf.exp
++++ b/ld/testsuite/ld-aarch64/aarch64-elf.exp
+@@ -471,3 +471,26 @@ run_dump_test_lp64 "bti-far-3"
+ if { ![skip_sframe_tests] } {
+ run_dump_test "sframe-simple-1"
+ }
++
++run_dump_test "property-gcs1"
++run_dump_test "property-gcs2"
++run_dump_test "property-gcs3"
++run_dump_test "property-gcs4"
++run_dump_test "property-gcs5"
++run_dump_test "property-gcs6"
++run_dump_test "property-gcs7"
++run_dump_test "property-gcs8"
++run_dump_test "property-gcs9"
++run_dump_test "property-gcs10"
++run_dump_test "property-gcs11"
++run_dump_test "property-gcs12"
++run_dump_test "property-gcs13"
++run_dump_test "property-gcs14"
++run_dump_test "property-gcs15"
++run_dump_test "property-gcs16"
++run_dump_test "property-gcs17"
++run_dump_test "property-gcs18"
++run_dump_test "property-gcs19"
++run_dump_test "property-gcs20"
++run_dump_test "property-gcs21"
++run_dump_test "property-gcs22"
+diff --git a/ld/testsuite/ld-aarch64/property-bti-pac1.d b/ld/testsuite/ld-aarch64/property-bti-pac1.d
+index 59fa695165a..c28a0cbf850 100644
+--- a/ld/testsuite/ld-aarch64/property-bti-pac1.d
++++ b/ld/testsuite/ld-aarch64/property-bti-pac1.d
+@@ -8,4 +8,4 @@
+ Displaying notes found in: .note.gnu.property
+ [ ]+Owner[ ]+Data size[ ]+Description
+ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
+- Properties: AArch64 feature: BTI, PAC
++ Properties: AArch64 feature: BTI, PAC, GCS
+diff --git a/ld/testsuite/ld-aarch64/property-bti-pac1.s b/ld/testsuite/ld-aarch64/property-bti-pac1.s
+index 414c9277f1d..42156917d58 100644
+--- a/ld/testsuite/ld-aarch64/property-bti-pac1.s
++++ b/ld/testsuite/ld-aarch64/property-bti-pac1.s
+@@ -12,6 +12,20 @@ _start:
+ .long 5f - 2f /* data length */
+ .long 5 /* note type */
+ 0: .asciz "GNU" /* vendor name */
++1:
++ .p2align 3
++2: .long 0xc0000000 /* pr_type. */
++ .long 4f - 3f /* pr_datasz. */
++3:
++ .long 0x4 /* GCS. */
++4:
++ .p2align 3
++5:
++ .p2align 3
++ .long 1f - 0f /* name length */
++ .long 5f - 2f /* data length */
++ .long 5 /* note type */
++0: .asciz "GNU" /* vendor name */
+ 1:
+ .p2align 3
+ 2: .long 0xc0000000 /* pr_type. */
+diff --git a/ld/testsuite/ld-aarch64/property-gcs.s b/ld/testsuite/ld-aarch64/property-gcs.s
+new file mode 100644
+index 00000000000..bc7e66e8933
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs.s
+@@ -0,0 +1,25 @@
++ .text
++ .globl _start
++ .type _start,@function
++_start:
++ mov x1, #2
++.ifndef __mult__
++ bl foo
++.endif
++.ifdef __property_gcs__
++ .section ".note.gnu.property", "a"
++ .p2align 3
++ .long 1f - 0f /* name length */
++ .long 5f - 2f /* data length */
++ .long 5 /* note type */
++0: .asciz "GNU" /* vendor name */
++1:
++ .p2align 3
++2: .long 0xc0000000 /* pr_type. */
++ .long 4f - 3f /* pr_datasz. */
++3:
++ .long 0x4 /* GCS. */
++4:
++ .p2align 3
++5:
++.endif
+diff --git a/ld/testsuite/ld-aarch64/property-gcs1.d b/ld/testsuite/ld-aarch64/property-gcs1.d
+new file mode 100644
+index 00000000000..c724ac56ca3
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs1.d
+@@ -0,0 +1,6 @@
++#name: GNU Property (input without gcs)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -shared
++#readelf: -n
+diff --git a/ld/testsuite/ld-aarch64/property-gcs10.d b/ld/testsuite/ld-aarch64/property-gcs10.d
+new file mode 100644
+index 00000000000..4b6deedc0c2
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs10.d
+@@ -0,0 +1,6 @@
++#name: GNU Property (input without gcs ouput forced with experimental-gcs=always experimental-gcs-report=error)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs=always -z experimental-gcs-report=error
++#error: .*property-gcs.*: error: GCS turned on by -z experimental-gcs on the output when all inputs do not have GCS in NOTE section.
+diff --git a/ld/testsuite/ld-aarch64/property-gcs11.d b/ld/testsuite/ld-aarch64/property-gcs11.d
+new file mode 100644
+index 00000000000..8abacf28eb1
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs11.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs output forced with experimental-gcs)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs12.d b/ld/testsuite/ld-aarch64/property-gcs12.d
+new file mode 100644
+index 00000000000..0fe246dfa3a
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs12.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs ouput forced with experimental-gcs=always)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs=always
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs13.d b/ld/testsuite/ld-aarch64/property-gcs13.d
+new file mode 100644
+index 00000000000..c6077aeaa5a
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs13.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs ouput forced with experimental-gcs experimental-gcs-report=none)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs -z experimental-gcs-report=none
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs14.d b/ld/testsuite/ld-aarch64/property-gcs14.d
+new file mode 100644
+index 00000000000..0f7490ef4a5
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs14.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs ouput forced with experimental-gcs experimental-gcs-report=warning)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs -z experimental-gcs-report=warning
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs15.d b/ld/testsuite/ld-aarch64/property-gcs15.d
+new file mode 100644
+index 00000000000..d1e723e0ea6
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs15.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs ouput forced with experimental-gcs experimental-gcs-report=error)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs -z experimental-gcs-report=error
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs16.d b/ld/testsuite/ld-aarch64/property-gcs16.d
+new file mode 100644
+index 00000000000..340577f1758
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs16.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs ouput forced with experimental-gcs=always experimental-gcs-report=none)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs=always -z experimental-gcs-report=none
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs17.d b/ld/testsuite/ld-aarch64/property-gcs17.d
+new file mode 100644
+index 00000000000..4ba9583ee92
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs17.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs ouput forced with experimental-gcs=always experimental-gcs-report=warning)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs=always -z experimental-gcs-report=warning
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs18.d b/ld/testsuite/ld-aarch64/property-gcs18.d
+new file mode 100644
+index 00000000000..f71c10e2523
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs18.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs ouput forced with experimental-gcs=always experimental-gcs-report=error)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs=always -z experimental-gcs-report=error
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs19.d b/ld/testsuite/ld-aarch64/property-gcs19.d
+new file mode 100644
+index 00000000000..468f96edcf1
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs19.d
+@@ -0,0 +1,6 @@
++#name: GNU Property (input without gcs output forced with experimental-gcs=never)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs=never
++#readelf: -n
+diff --git a/ld/testsuite/ld-aarch64/property-gcs2.d b/ld/testsuite/ld-aarch64/property-gcs2.d
+new file mode 100644
+index 00000000000..ed545a180b3
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs2.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -shared
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs2.s b/ld/testsuite/ld-aarch64/property-gcs2.s
+new file mode 100644
+index 00000000000..6db7d8396c8
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs2.s
+@@ -0,0 +1,33 @@
++ .text
++ .global foo
++ .type foo, %function
++foo:
++ sub sp, sp, #16
++ mov w0, 9
++ str w0, [sp, 12]
++ ldr w0, [sp, 12]
++ add w0, w0, 4
++ str w0, [sp, 12]
++ nop
++ add sp, sp, 16
++ ret
++ .size foo, .-foo
++ .global bar
++ .type bar, %function
++.ifdef __property_gcs__
++ .section ".note.gnu.property", "a"
++ .p2align 3
++ .long 1f - 0f /* name length */
++ .long 5f - 2f /* data length */
++ .long 5 /* note type */
++0: .asciz "GNU" /* vendor name */
++1:
++ .p2align 3
++2: .long 0xc0000000 /* pr_type. */
++ .long 4f - 3f /* pr_datasz. */
++3:
++ .long 0x4 /* GCS. */
++4:
++ .p2align 3
++5:
++.endif
+diff --git a/ld/testsuite/ld-aarch64/property-gcs20.d b/ld/testsuite/ld-aarch64/property-gcs20.d
+new file mode 100644
+index 00000000000..2bdff88a27a
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs20.d
+@@ -0,0 +1,6 @@
++#name: GNU Property (input without gcs output forced with experimental-gcs=implicit)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs=implicit
++#readelf: -n
+diff --git a/ld/testsuite/ld-aarch64/property-gcs21.d b/ld/testsuite/ld-aarch64/property-gcs21.d
+new file mode 100644
+index 00000000000..b42b11d14ea
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs21.d
+@@ -0,0 +1,6 @@
++#name: GNU Property (input with gcs output forced with experimental-gcs=never)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs=never
++#readelf: -n
+diff --git a/ld/testsuite/ld-aarch64/property-gcs22.d b/ld/testsuite/ld-aarch64/property-gcs22.d
+new file mode 100644
+index 00000000000..431fc1ed35b
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs22.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input with gcs output forced with experimental-gcs=implicit)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0 -defsym __property_gcs__=1
++#ld: -z experimental-gcs=implicit
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs3.d b/ld/testsuite/ld-aarch64/property-gcs3.d
+new file mode 100644
+index 00000000000..68d50be0823
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs3.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input without gcs output forced with experimental-gcs)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs4.d b/ld/testsuite/ld-aarch64/property-gcs4.d
+new file mode 100644
+index 00000000000..cd5711e3da3
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs4.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input without gcs ouput forced with experimental-gcs=always)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs=always
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs5.d b/ld/testsuite/ld-aarch64/property-gcs5.d
+new file mode 100644
+index 00000000000..b7a751c0276
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs5.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input without gcs ouput forced with experimental-gcs experimental-gcs-report=none)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs -z experimental-gcs-report=none
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs6.d b/ld/testsuite/ld-aarch64/property-gcs6.d
+new file mode 100644
+index 00000000000..5abf8126d89
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs6.d
+@@ -0,0 +1,12 @@
++#name: GNU Property (input without gcs ouput forced with experimental-gcs experimental-gcs-report=warning)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs -z experimental-gcs-report=warning
++#readelf: -n
++#warning: .*property-gcs.*: warning: GCS turned on by -z experimental-gcs on the output when all inputs do not have GCS in NOTE section.
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs7.d b/ld/testsuite/ld-aarch64/property-gcs7.d
+new file mode 100644
+index 00000000000..4df5693a27b
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs7.d
+@@ -0,0 +1,6 @@
++#name: GNU Property (input without gcs ouput forced with experimental-gcs experimental-gcs-report=error)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs -z experimental-gcs-report=error
++#error: .*property-gcs.*: error: GCS turned on by -z experimental-gcs on the output when all inputs do not have GCS in NOTE section.
+diff --git a/ld/testsuite/ld-aarch64/property-gcs8.d b/ld/testsuite/ld-aarch64/property-gcs8.d
+new file mode 100644
+index 00000000000..463c3ad4197
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs8.d
+@@ -0,0 +1,11 @@
++#name: GNU Property (input without gcs ouput forced with experimental-gcs=always experimental-gcs-report=none)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs=always -z experimental-gcs-report=none
++#readelf: -n
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+diff --git a/ld/testsuite/ld-aarch64/property-gcs9.d b/ld/testsuite/ld-aarch64/property-gcs9.d
+new file mode 100644
+index 00000000000..c3083675c8f
+--- /dev/null
++++ b/ld/testsuite/ld-aarch64/property-gcs9.d
+@@ -0,0 +1,12 @@
++#name: GNU Property (input without gcs ouput forced with experimental-gcs=always experimental-gcs-report=warning)
++#source: property-gcs.s
++#alltargets: [check_shared_lib_support] *linux*
++#as: -march=armv9.4-a+gcs -defsym __mult__=0
++#ld: -z experimental-gcs=always -z experimental-gcs-report=warning
++#readelf: -n
++#warning: .*property-gcs.*: warning: GCS turned on by -z experimental-gcs on the output when all inputs do not have GCS in NOTE section.
++
++Displaying notes found in: .note.gnu.property
++[ ]+Owner[ ]+Data size[ ]+Description
++ GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0
++ Properties: AArch64 feature: GCS
+--
+2.34.1
+
diff --git a/meta-arm-gcs/recipes-devtools/gcc/files/gcs.patch b/meta-arm-gcs/recipes-devtools/gcc/files/gcs.patch
new file mode 100644
index 00000000..b3f012d0
--- /dev/null
+++ b/meta-arm-gcs/recipes-devtools/gcc/files/gcs.patch
@@ -0,0 +1,1872 @@
+Upstream-Status: Pending [vendors/ARM/gcs in gcc git]
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+
+From e66be9a852ed255469f34dcd5ecf4c30e8721b66 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Mon, 19 Jun 2023 12:57:56 +0100
+Subject: [PATCH 01/19] aarch64: Add -mbranch-protection=gcs option
+
+This enables Guarded Control Stack (GCS) compatible code generation.
+
+The "standard" branch-protection type enables it, and the default
+depends on the compiler default.
+
+TODO: gcs compatibility marking is missing.
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64-protos.h (aarch_gcs_enabled): Declare.
+ * config/aarch64/aarch64.cc (aarch_gcs_enabled): Define.
+ (aarch_handle_no_branch_protection): Handle gcs.
+ (aarch_handle_standard_branch_protection): Handle gcs.
+ (aarch_handle_gcs_protection): New.
+ * config/aarch64/aarch64.opt: Add aarch_enable_gcs.
+ * configure: Regenerate.
+ * configure.ac: Handle gcs in --enable-standard-branch-protection.
+ * doc/invoke.texi: Document -mbranch-protection=gcs.
+---
+ gcc/config/aarch64/aarch64-protos.h | 2 ++
+ gcc/config/aarch64/aarch64.cc | 24 ++++++++++++++++++++++++
+ gcc/config/aarch64/aarch64.opt | 3 +++
+ gcc/configure | 2 +-
+ gcc/configure.ac | 2 +-
+ gcc/doc/invoke.texi | 5 +++--
+ 6 files changed, 34 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index bd719b992a5..2802bc935c9 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -1113,4 +1113,6 @@ extern void aarch64_adjust_reg_alloc_order ();
+ bool aarch64_optimize_mode_switching (aarch64_mode_entity);
+ void aarch64_restore_za (rtx);
+
++extern bool aarch64_gcs_enabled ();
++
+ #endif /* GCC_AARCH64_PROTOS_H */
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 1ea84c8bd73..73969721906 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -8375,6 +8375,13 @@ aarch_bti_j_insn_p (rtx_insn *insn)
+ return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
+ }
+
++/* Return TRUE if Guarded Control Stack is enabled. */
++bool
++aarch64_gcs_enabled (void)
++{
++ return (aarch64_enable_gcs == 1);
++}
++
+ /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
+ bool
+ aarch_pac_insn_p (rtx x)
+@@ -18694,6 +18701,7 @@ aarch64_handle_no_branch_protection (void)
+ {
+ aarch_ra_sign_scope = AARCH_FUNCTION_NONE;
+ aarch_enable_bti = 0;
++ aarch64_enable_gcs = 0;
+ }
+
+ static void
+@@ -18702,6 +18710,7 @@ aarch64_handle_standard_branch_protection (void)
+ aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF;
+ aarch64_ra_sign_key = AARCH64_KEY_A;
+ aarch_enable_bti = 1;
++ aarch64_enable_gcs = 1;
+ }
+
+ static void
+@@ -18728,6 +18737,11 @@ aarch64_handle_bti_protection (void)
+ {
+ aarch_enable_bti = 1;
+ }
++static void
++aarch64_handle_gcs_protection (void)
++{
++ aarch64_enable_gcs = 1;
++}
+
+ static const struct aarch_branch_protect_type aarch64_pac_ret_subtypes[] = {
+ { "leaf", false, aarch64_handle_pac_ret_leaf, NULL, 0 },
+@@ -18742,6 +18756,7 @@ static const struct aarch_branch_protect_type aarch64_branch_protect_types[] =
+ { "pac-ret", false, aarch64_handle_pac_ret_protection,
+ aarch64_pac_ret_subtypes, ARRAY_SIZE (aarch64_pac_ret_subtypes) },
+ { "bti", false, aarch64_handle_bti_protection, NULL, 0 },
++ { "gcs", false, aarch64_handle_gcs_protection, NULL, 0 },
+ { NULL, false, NULL, NULL, 0 }
+ };
+
+@@ -18842,6 +18857,15 @@ aarch64_override_options (void)
+ #endif
+ }
+
++ if (aarch64_enable_gcs == 2)
++ {
++#ifdef TARGET_ENABLE_GCS
++ aarch64_enable_gcs = 1;
++#else
++ aarch64_enable_gcs = 0;
++#endif
++ }
++
+ /* Return address signing is currently not supported for ILP32 targets. For
+ LP64 targets use the configured option in the absence of a command-line
+ option for -mbranch-protection. */
+diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+index 6356c419399..aeb710449fb 100644
+--- a/gcc/config/aarch64/aarch64.opt
++++ b/gcc/config/aarch64/aarch64.opt
+@@ -39,6 +39,9 @@ aarch64_feature_flags aarch64_isa_flags = 0
+ TargetVariable
+ unsigned aarch_enable_bti = 2
+
++TargetVariable
++unsigned aarch64_enable_gcs = 2
++
+ TargetVariable
+ enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A
+
+diff --git a/gcc/configure b/gcc/configure
+index 266ab8f84b2..45725639fd2 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -28221,7 +28221,7 @@ if test "${enable_standard_branch_protection+set}" = set; then :
+ enableval=$enable_standard_branch_protection;
+ case $enableval in
+ yes)
+- tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 TARGET_ENABLE_PAC_RET=1"
++ tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 TARGET_ENABLE_PAC_RET=1 TARGET_ENABLE_GCS=1"
+ ;;
+ no)
+ ;;
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index a5aec1bc967..30d59ce7949 100644
+--- a/gcc/configure.ac
++++ b/gcc/configure.ac
+@@ -4440,7 +4440,7 @@ AS_HELP_STRING([--disable-standard-branch-protection],
+ [
+ case $enableval in
+ yes)
+- tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 TARGET_ENABLE_PAC_RET=1"
++ tm_defines="${tm_defines} TARGET_ENABLE_BTI=1 TARGET_ENABLE_PAC_RET=1 TARGET_ENABLE_GCS=1"
+ ;;
+ no)
+ ;;
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 1006510fc6a..b5e2697193f 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -801,7 +801,7 @@ Objective-C and Objective-C++ Dialects}.
+ -mpc-relative-literal-loads
+ -msign-return-address=@var{scope}
+ -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}
+-+@var{b-key}]|@var{bti}
+++@var{b-key}]|@var{bti}|@var{gcs}
+ -mharden-sls=@var{opts}
+ -march=@var{name} -mcpu=@var{name} -mtune=@var{name}
+ -moverride=@var{string} -mverbose-cost-dump
+@@ -21408,7 +21408,7 @@ default value is @samp{none}. This option has been deprecated by
+ -mbranch-protection.
+
+ @opindex mbranch-protection
+-@item -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}+@var{b-key}]|@var{bti}
++@item -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}+@var{b-key}]|@var{bti}|@var{gcs}
+ Select the branch protection features to use.
+ @samp{none} is the default and turns off all types of branch protection.
+ @samp{standard} turns on all types of branch protection features. If a feature
+@@ -21421,6 +21421,7 @@ argument @samp{leaf} can be used to extend the signing to include leaf
+ functions. The optional argument @samp{b-key} can be used to sign the functions
+ with the B-key instead of the A-key.
+ @samp{bti} turns on branch target identification mechanism.
++@samp{gcs} turns on guarded control stack compatible code generation.
+
+ @opindex mharden-sls
+ @item -mharden-sls=@var{opts}
+--
+2.34.1
+
+
+From c947c0551c793aeff90139718eb7d731d543ee08 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 30 Jun 2023 16:31:23 +0100
+Subject: [PATCH 02/19] aarch64: Add branch-protection target pragma tests
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add branch-protection
+ tests.
+---
+ .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 50 +++++++++++++++++++
+ 1 file changed, 50 insertions(+)
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+index 23ebe5e4f50..8e707630774 100644
+--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+@@ -83,3 +83,53 @@
+ #ifndef __ARM_FEATURE_SME_F64F64
+ #error Foo
+ #endif
++
++#pragma GCC target "branch-protection=standard"
++#ifndef __ARM_FEATURE_BTI_DEFAULT
++#error Foo
++#endif
++#if __ARM_FEATURE_PAC_DEFAULT != 1
++#error Foo
++#endif
++
++#pragma GCC target ("branch-protection=none")
++#ifdef __ARM_FEATURE_BTI_DEFAULT
++#error Foo
++#endif
++#ifdef __ARM_FEATURE_PAC_DEFAULT
++#error Foo
++#endif
++
++#pragma GCC push_options
++#pragma GCC target "branch-protection=bti+pac-ret"
++#ifndef __ARM_FEATURE_BTI_DEFAULT
++#error Foo
++#endif
++#pragma GCC pop_options
++#ifdef __ARM_FEATURE_BTI_DEFAULT
++#error Foo
++#endif
++
++#pragma GCC target "branch-protection=bti"
++#ifndef __ARM_FEATURE_BTI_DEFAULT
++#error Foo
++#endif
++#ifdef __ARM_FEATURE_PAC_DEFAULT
++#error Foo
++#endif
++
++#pragma GCC target "branch-protection=pac-ret"
++#ifdef __ARM_FEATURE_BTI_DEFAULT
++#error Foo
++#endif
++#if __ARM_FEATURE_PAC_DEFAULT != 1
++#error Foo
++#endif
++
++#pragma GCC target "branch-protection=pac-ret+leaf+b-key"
++#ifdef __ARM_FEATURE_BTI_DEFAULT
++#error Foo
++#endif
++#if __ARM_FEATURE_PAC_DEFAULT != 6
++#error Foo
++#endif
+--
+2.34.1
+
+
+From 99367f7410b3c328d67051734145f3970b84ee6f Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 30 Jun 2023 16:50:23 +0100
+Subject: [PATCH 03/19] aarch64: Add target pragma tests for gcs
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add gcs specific
+ tests.
+---
+ .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 35 +++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+index 8e707630774..417293d4d5a 100644
+--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+@@ -91,6 +91,9 @@
+ #if __ARM_FEATURE_PAC_DEFAULT != 1
+ #error Foo
+ #endif
++#ifndef __ARM_FEATURE_GCS_DEFAULT
++#error Foo
++#endif
+
+ #pragma GCC target ("branch-protection=none")
+ #ifdef __ARM_FEATURE_BTI_DEFAULT
+@@ -99,6 +102,9 @@
+ #ifdef __ARM_FEATURE_PAC_DEFAULT
+ #error Foo
+ #endif
++#ifdef __ARM_FEATURE_GCS_DEFAULT
++#error Foo
++#endif
+
+ #pragma GCC push_options
+ #pragma GCC target "branch-protection=bti+pac-ret"
+@@ -117,6 +123,9 @@
+ #ifdef __ARM_FEATURE_PAC_DEFAULT
+ #error Foo
+ #endif
++#ifdef __ARM_FEATURE_GCS_DEFAULT
++#error Foo
++#endif
+
+ #pragma GCC target "branch-protection=pac-ret"
+ #ifdef __ARM_FEATURE_BTI_DEFAULT
+@@ -133,3 +142,29 @@
+ #if __ARM_FEATURE_PAC_DEFAULT != 6
+ #error Foo
+ #endif
++
++#pragma GCC target "branch-protection=gcs"
++#ifdef __ARM_FEATURE_BTI_DEFAULT
++#error Foo
++#endif
++#ifdef __ARM_FEATURE_PAC_DEFAULT
++#error Foo
++#endif
++#ifndef __ARM_FEATURE_GCS_DEFAULT
++#error Foo
++#endif
++
++#pragma GCC target "arch=armv8.8-a+gcs"
++#ifndef __ARM_FEATURE_GCS
++#error Foo
++#endif
++
++#pragma GCC target "arch=armv8.8-a+nogcs"
++#ifdef __ARM_FEATURE_GCS
++#error Foo
++#endif
++
++#pragma GCC target "arch=armv8.8-a"
++#ifdef __ARM_FEATURE_GCS
++#error Foo
++#endif
+--
+2.34.1
+
+
+From f2a000e3b40953985a778875d1a908822ca9ffbd Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 9 May 2023 15:37:49 +0100
+Subject: [PATCH 04/19] aarch64: Add support for chkfeat insn
+
+This is a hint space instruction to check for enabled HW features and
+update the x16 register accordingly.
+
+Use unspec_volatile to prevent reordering it around calls since calls
+can enable or disable HW features.
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64.md (aarch64_chkfeat): New.
+---
+ gcc/config/aarch64/aarch64.md | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index 385a669b9b3..a20462303b5 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -378,6 +378,7 @@
+ UNSPECV_BTI_C ; Represent BTI c.
+ UNSPECV_BTI_J ; Represent BTI j.
+ UNSPECV_BTI_JC ; Represent BTI jc.
++ UNSPECV_CHKFEAT ; Represent CHKFEAT X16.
+ UNSPECV_TSTART ; Represent transaction start.
+ UNSPECV_TCOMMIT ; Represent transaction commit.
+ UNSPECV_TCANCEL ; Represent transaction cancel.
+@@ -8258,6 +8259,14 @@
+ "msr\tnzcv, %0"
+ )
+
++;; CHKFEAT instruction
++(define_insn "aarch64_chkfeat"
++ [(set (reg:DI R16_REGNUM)
++ (unspec_volatile:DI [(reg:DI R16_REGNUM)] UNSPECV_CHKFEAT))]
++ ""
++ "hint\\t40 // chkfeat x16"
++)
++
+ ;; AdvSIMD Stuff
+ (include "aarch64-simd.md")
+
+--
+2.34.1
+
+
+From 6a5358558591549fb2fb5b210c9681b0d694d1af Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 9 May 2023 15:24:18 +0100
+Subject: [PATCH 05/19] aarch64: Add __builtin_aarch64_chkfeat
+
+Builtin for chkfeat: the input argument is used to initialize x16 then
+execute chkfeat and return the updated x16.
+
+Note: ACLE __chkfeat(x) plans to flip the bits to be more intuitive
+(xor the input to output), but for the builtin that seems unnecessary
+complication.
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
+ Define AARCH64_BUILTIN_CHKFEAT.
+ (aarch64_general_init_builtins): Handle chkfeat.
+ (aarch64_general_expand_builtin): Handle chkfeat.
+---
+ gcc/config/aarch64/aarch64-builtins.cc | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
+index 75d21de1401..1c08f56ab6b 100644
+--- a/gcc/config/aarch64/aarch64-builtins.cc
++++ b/gcc/config/aarch64/aarch64-builtins.cc
+@@ -788,6 +788,8 @@ enum aarch64_builtins
+ AARCH64_PLDX,
+ AARCH64_PLI,
+ AARCH64_PLIX,
++ /* Armv8.9-A / Armv9.4-A builtins. */
++ AARCH64_BUILTIN_CHKFEAT,
+ AARCH64_BUILTIN_MAX
+ };
+
+@@ -2084,6 +2086,12 @@ aarch64_general_init_builtins (void)
+ if (TARGET_MEMTAG)
+ aarch64_init_memtag_builtins ();
+
++ tree ftype_chkfeat
++ = build_function_type_list (uint64_type_node, uint64_type_node, NULL);
++ aarch64_builtin_decls[AARCH64_BUILTIN_CHKFEAT]
++ = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat,
++ AARCH64_BUILTIN_CHKFEAT);
++
+ if (in_lto_p)
+ handle_arm_acle_h ();
+ }
+@@ -3137,6 +3145,16 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
+ case AARCH64_PLIX:
+ aarch64_expand_prefetch_builtin (exp, fcode);
+ return target;
++
++ case AARCH64_BUILTIN_CHKFEAT:
++ {
++ rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
++ op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
++ emit_move_insn (x16_reg, op0);
++ expand_insn (CODE_FOR_aarch64_chkfeat, 0, 0);
++ emit_move_insn (target, x16_reg);
++ return target;
++ }
+ }
+
+ if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
+--
+2.34.1
+
+
+From fd2868c17ad14374147adc83d62ef24b60718509 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 2 Jun 2023 16:15:25 +0100
+Subject: [PATCH 06/19] aarch64: Add __builtin_aarch64_chkfeat tests
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/aarch64/chkfeat-1.c: New test.
+ * gcc.target/aarch64/chkfeat-2.c: New test.
+---
+ gcc/testsuite/gcc.target/aarch64/chkfeat-1.c | 75 ++++++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/chkfeat-2.c | 15 ++++
+ 2 files changed, 90 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/chkfeat-1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/chkfeat-2.c
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c b/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c
+new file mode 100644
+index 00000000000..2fae81e740f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/chkfeat-1.c
+@@ -0,0 +1,75 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mbranch-protection=none" } */
++/* { dg-final { check-function-bodies "**" "" "" } } */
++
++/*
++**foo1:
++** mov x16, 1
++** hint 40 // chkfeat x16
++** mov x0, x16
++** ret
++*/
++unsigned long long
++foo1 (void)
++{
++ return __builtin_aarch64_chkfeat (1);
++}
++
++/*
++**foo2:
++** mov x16, 1
++** movk x16, 0x5678, lsl 32
++** movk x16, 0x1234, lsl 48
++** hint 40 // chkfeat x16
++** mov x0, x16
++** ret
++*/
++unsigned long long
++foo2 (void)
++{
++ return __builtin_aarch64_chkfeat (0x1234567800000001);
++}
++
++/*
++**foo3:
++** mov x16, x0
++** hint 40 // chkfeat x16
++** mov x0, x16
++** ret
++*/
++unsigned long long
++foo3 (unsigned long long x)
++{
++ return __builtin_aarch64_chkfeat (x);
++}
++
++/*
++**foo4:
++** ldr x16, \[x0\]
++** hint 40 // chkfeat x16
++** str x16, \[x0\]
++** ret
++*/
++void
++foo4 (unsigned long long *p)
++{
++ *p = __builtin_aarch64_chkfeat (*p);
++}
++
++/*
++**foo5:
++** mov x16, 1
++** hint 40 // chkfeat x16
++** cmp x16, 0
++**(
++** csel w0, w1, w0, eq
++**|
++** csel w0, w0, w1, ne
++**)
++** ret
++*/
++int
++foo5 (int x, int y)
++{
++ return __builtin_aarch64_chkfeat (1) ? x : y;
++}
+diff --git a/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c b/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c
+new file mode 100644
+index 00000000000..682524e244f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/chkfeat-2.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++/* { dg-final { scan-assembler-times {hint\t40 // chkfeat x16} 2 } } */
++
++void bar (void);
++
++/* Extern call may change enabled HW features. */
++unsigned long long
++foo (void)
++{
++ unsigned long long a = __builtin_aarch64_chkfeat (1);
++ bar ();
++ unsigned long long b = __builtin_aarch64_chkfeat (1);
++ return a + b;
++}
+--
+2.34.1
+
+
+From 409aac824e3a69848a60daafcaeedc5f18c357dd Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 9 May 2023 16:00:01 +0100
+Subject: [PATCH 07/19] aarch64: Add GCS instructions
+
+Add instructions for the Guarded Control Stack extension.
+
+GCSSS1 and GCSSS2 are modelled as a single GCSSS unspec, because they
+are always used together in the compiler.
+
+Before GCSPOPM and GCSSS2 an extra "mov xn, 0" is added to clear the
+output register, this is needed to get reasonable result when GCS is
+disabled, when the instructions are NOPs. Since the instructions are
+expecetd to be used behind runtime feature checks, this is mainly
+relevant if GCS can be disabled asynchronously.
+
+The output of GCSPOPM is usually not needed, so a separate gcspopm_xzr
+was added to model that. Did not do the same for GCSSS as it is a less
+common operation.
+
+The used mnemonics do not depend on updated assembler since these
+instructions can be used without new -march setting behind a runtime
+check.
+
+Reading the GCSPR is modelled as unspec_volatile so it does not get
+reordered wrt the other instructions changing the GCSPR.
+
+TODO:
+- Do we care about async disable?
+- Do we need GCSSS_xzr? (to avoid the mov x,0)
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64.md (aarch64_load_gcspr): New.
+ (aarch64_gcspopm): New.
+ (aarch64_gcspopm_xzr): New.
+ (aarch64_gcsss): New.
+---
+ gcc/config/aarch64/aarch64.md | 35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index a20462303b5..8defd6e0582 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -379,6 +379,9 @@
+ UNSPECV_BTI_J ; Represent BTI j.
+ UNSPECV_BTI_JC ; Represent BTI jc.
+ UNSPECV_CHKFEAT ; Represent CHKFEAT X16.
++ UNSPECV_GCSPR ; Represent MRS Xn, GCSPR_EL0
++ UNSPECV_GCSPOPM ; Represent GCSPOPM.
++ UNSPECV_GCSSS ; Represent GCSSS1 and GCSSS2.
+ UNSPECV_TSTART ; Represent transaction start.
+ UNSPECV_TCOMMIT ; Represent transaction commit.
+ UNSPECV_TCANCEL ; Represent transaction cancel.
+@@ -8267,6 +8270,38 @@
+ "hint\\t40 // chkfeat x16"
+ )
+
++;; Guarded Control Stack (GCS) instructions
++(define_insn "aarch64_load_gcspr"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (unspec_volatile:DI [(const_int 0)] UNSPECV_GCSPR))]
++ ""
++ "mrs\\t%0, s3_3_c2_c5_1 // gcspr_el0"
++ [(set_attr "type" "mrs")]
++)
++
++(define_insn "aarch64_gcspopm"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (unspec_volatile:DI [(const_int 0)] UNSPECV_GCSPOPM))]
++ ""
++ "mov\\t%0, 0\;sysl\\t%0, #3, c7, c7, #1 // gcspopm"
++ [(set_attr "length" "8")]
++)
++
++(define_insn "aarch64_gcspopm_xzr"
++ [(unspec_volatile [(const_int 0)] UNSPECV_GCSPOPM)]
++ ""
++ "sysl\\txzr, #3, c7, c7, #1 // gcspopm"
++)
++
++(define_insn "aarch64_gcsss"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")]
++ UNSPECV_GCSSS))]
++ ""
++ "sys\\t#3, c7, c7, #2, %1 // gcsss1\;mov\\t%0, 0\;sysl\\t%0, #3, c7, c7, #3 // gcsss2"
++ [(set_attr "length" "12")]
++)
++
+ ;; AdvSIMD Stuff
+ (include "aarch64-simd.md")
+
+--
+2.34.1
+
+
+From 9e6a37fd99e22e3cd3d685100763c9ed201019ee Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 9 May 2023 16:21:28 +0100
+Subject: [PATCH 08/19] aarch64: Add GCS builtins
+
+Add new builtins for GCS:
+
+ void *__builtin_aarch64_gcspr (void)
+ uint64_t __builtin_aarch64_gcspopm (void)
+ void *__builtin_aarch64_gcsss (void *)
+
+The builtins are always enabled, but should be used behind runtime
+checks in case the target does not support GCS. They are thin
+wrappers around the corresponding instructions.
+
+The GCS pointer is modelled with void * type (normal stores do not
+work on GCS memory, but it is writable via the gcsss operation or
+via GCSSTR if enabled so not const) and an entry on the GCS is
+modelled with uint64_t (since it has fixed size and can be a token
+that's not a pointer).
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins): Add
+ AARCH64_BUILTIN_GCSPR, AARCH64_BUILTIN_GCSPOPM, AARCH64_BUILTIN_GCSSS.
+ (aarch64_init_gcs_builtins): New.
+ (aarch64_general_init_builtins): Call aarch64_init_gcs_builtins.
+ (aarch64_expand_gcs_builtin): New.
+ (aarch64_general_expand_builtin): Call aarch64_expand_gcs_builtin.
+---
+ gcc/config/aarch64/aarch64-builtins.cc | 70 ++++++++++++++++++++++++++
+ 1 file changed, 70 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
+index 1c08f56ab6b..30c977586f9 100644
+--- a/gcc/config/aarch64/aarch64-builtins.cc
++++ b/gcc/config/aarch64/aarch64-builtins.cc
+@@ -790,6 +790,9 @@ enum aarch64_builtins
+ AARCH64_PLIX,
+ /* Armv8.9-A / Armv9.4-A builtins. */
+ AARCH64_BUILTIN_CHKFEAT,
++ AARCH64_BUILTIN_GCSPR,
++ AARCH64_BUILTIN_GCSPOPM,
++ AARCH64_BUILTIN_GCSSS,
+ AARCH64_BUILTIN_MAX
+ };
+
+@@ -2041,6 +2044,29 @@ aarch64_init_fpsr_fpcr_builtins (void)
+ AARCH64_BUILTIN_SET_FPSR64);
+ }
+
++/* Add builtins for Guarded Control Stack instructions. */
++
++static void
++aarch64_init_gcs_builtins (void)
++{
++ tree ftype;
++
++ ftype = build_function_type_list (ptr_type_node, NULL);
++ aarch64_builtin_decls[AARCH64_BUILTIN_GCSPR]
++ = aarch64_general_add_builtin ("__builtin_aarch64_gcspr", ftype,
++ AARCH64_BUILTIN_GCSPR);
++
++ ftype = build_function_type_list (uint64_type_node, NULL);
++ aarch64_builtin_decls[AARCH64_BUILTIN_GCSPOPM]
++ = aarch64_general_add_builtin ("__builtin_aarch64_gcspopm", ftype,
++ AARCH64_BUILTIN_GCSPOPM);
++
++ ftype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
++ aarch64_builtin_decls[AARCH64_BUILTIN_GCSSS]
++ = aarch64_general_add_builtin ("__builtin_aarch64_gcsss", ftype,
++ AARCH64_BUILTIN_GCSSS);
++}
++
+ /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
+
+ void
+@@ -2092,6 +2118,8 @@ aarch64_general_init_builtins (void)
+ = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat,
+ AARCH64_BUILTIN_CHKFEAT);
+
++ aarch64_init_gcs_builtins ();
++
+ if (in_lto_p)
+ handle_arm_acle_h ();
+ }
+@@ -3020,6 +3048,43 @@ aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
+ return op.value;
+ }
+
++/* Expand GCS builtin EXP with code FCODE, putting the result
++ int TARGET. If IGNORE is true the return value is ignored. */
++
++rtx
++aarch64_expand_gcs_builtin (tree exp, rtx target, int fcode, int ignore)
++{
++ if (fcode == AARCH64_BUILTIN_GCSPR)
++ {
++ expand_operand op;
++ create_output_operand (&op, target, DImode);
++ expand_insn (CODE_FOR_aarch64_load_gcspr, 1, &op);
++ return op.value;
++ }
++ if (fcode == AARCH64_BUILTIN_GCSPOPM && ignore)
++ {
++ expand_insn (CODE_FOR_aarch64_gcspopm_xzr, 0, 0);
++ return target;
++ }
++ if (fcode == AARCH64_BUILTIN_GCSPOPM)
++ {
++ expand_operand op;
++ create_output_operand (&op, target, Pmode);
++ expand_insn (CODE_FOR_aarch64_gcspopm, 1, &op);
++ return op.value;
++ }
++ if (fcode == AARCH64_BUILTIN_GCSSS)
++ {
++ expand_operand ops[2];
++ rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 0));
++ create_output_operand (&ops[0], target, Pmode);
++ create_input_operand (&ops[1], op1, Pmode);
++ expand_insn (CODE_FOR_aarch64_gcsss, 2, ops);
++ return ops[0].value;
++ }
++ gcc_unreachable ();
++}
++
+ /* Expand an expression EXP that calls built-in function FCODE,
+ with result going to TARGET if that's convenient. IGNORE is true
+ if the result of the builtin is ignored. */
+@@ -3155,6 +3220,11 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
+ emit_move_insn (target, x16_reg);
+ return target;
+ }
++
++ case AARCH64_BUILTIN_GCSPR:
++ case AARCH64_BUILTIN_GCSPOPM:
++ case AARCH64_BUILTIN_GCSSS:
++ return aarch64_expand_gcs_builtin (exp, target, fcode, ignore);
+ }
+
+ if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
+--
+2.34.1
+
+
+From d4d950feefc4f55da32be812eb882a2f66aadcaf Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 6 Jun 2023 17:35:51 +0100
+Subject: [PATCH 09/19] aarch64: Add __builtin_aarch64_gcs* tests
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/aarch64/gcspopm-1.c: New test.
+ * gcc.target/aarch64/gcspr-1.c: New test.
+ * gcc.target/aarch64/gcsss-1.c: New test.
+---
+ gcc/testsuite/gcc.target/aarch64/gcspopm-1.c | 69 ++++++++++++++++++++
+ gcc/testsuite/gcc.target/aarch64/gcspr-1.c | 31 +++++++++
+ gcc/testsuite/gcc.target/aarch64/gcsss-1.c | 49 ++++++++++++++
+ 3 files changed, 149 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/gcspopm-1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/gcspr-1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/gcsss-1.c
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c b/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c
+new file mode 100644
+index 00000000000..6e6add39cf7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/gcspopm-1.c
+@@ -0,0 +1,69 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mbranch-protection=none" } */
++/* { dg-final { check-function-bodies "**" "" "" } } */
++
++/*
++**foo1:
++** sysl xzr, #3, c7, c7, #1 // gcspopm
++** ret
++*/
++void
++foo1 (void)
++{
++ __builtin_aarch64_gcspopm ();
++}
++
++/*
++**foo2:
++** mov x0, 0
++** sysl x0, #3, c7, c7, #1 // gcspopm
++** ret
++*/
++unsigned long long
++foo2 (void)
++{
++ return __builtin_aarch64_gcspopm ();
++}
++
++/*
++**foo3:
++** mov x16, 1
++** (
++** mov x0, 0
++** hint 40 // chkfeat x16
++** |
++** hint 40 // chkfeat x16
++** mov x0, 0
++** )
++** cbz x16, .*
++** ret
++** mov x0, 0
++** sysl x0, #3, c7, c7, #1 // gcspopm
++** ret
++*/
++unsigned long long
++foo3 (void)
++{
++ if (__builtin_aarch64_chkfeat (1) == 0)
++ return __builtin_aarch64_gcspopm ();
++ return 0;
++}
++
++/*
++**foo4:
++** sysl xzr, #3, c7, c7, #1 // gcspopm
++** mov x0, 0
++** sysl x0, #3, c7, c7, #1 // gcspopm
++** sysl xzr, #3, c7, c7, #1 // gcspopm
++** ret
++*/
++unsigned long long
++foo4 (void)
++{
++ unsigned long long a = __builtin_aarch64_gcspopm ();
++ unsigned long long b = __builtin_aarch64_gcspopm ();
++ unsigned long long c = __builtin_aarch64_gcspopm ();
++ (void) a;
++ (void) c;
++ return b;
++}
+diff --git a/gcc/testsuite/gcc.target/aarch64/gcspr-1.c b/gcc/testsuite/gcc.target/aarch64/gcspr-1.c
+new file mode 100644
+index 00000000000..0e651979551
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/gcspr-1.c
+@@ -0,0 +1,31 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mbranch-protection=none" } */
++/* { dg-final { check-function-bodies "**" "" "" } } */
++
++/*
++**foo1:
++** mrs x0, s3_3_c2_c5_1 // gcspr_el0
++** ret
++*/
++void *
++foo1 (void)
++{
++ return __builtin_aarch64_gcspr ();
++}
++
++/*
++**foo2:
++** mrs x[0-9]*, s3_3_c2_c5_1 // gcspr_el0
++** sysl xzr, #3, c7, c7, #1 // gcspopm
++** mrs x[0-9]*, s3_3_c2_c5_1 // gcspr_el0
++** sub x0, x[0-9]*, x[0-9]*
++** ret
++*/
++long
++foo2 (void)
++{
++ const char *p = __builtin_aarch64_gcspr ();
++ __builtin_aarch64_gcspopm ();
++ const char *q = __builtin_aarch64_gcspr ();
++ return p - q;
++}
+diff --git a/gcc/testsuite/gcc.target/aarch64/gcsss-1.c b/gcc/testsuite/gcc.target/aarch64/gcsss-1.c
+new file mode 100644
+index 00000000000..025c7fee647
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/gcsss-1.c
+@@ -0,0 +1,49 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mbranch-protection=none" } */
++/* { dg-final { check-function-bodies "**" "" "" } } */
++
++/*
++**foo1:
++** sys #3, c7, c7, #2, x0 // gcsss1
++** mov x[0-9]*, 0
++** sysl x[0-9]*, #3, c7, c7, #3 // gcsss2
++** ret
++*/
++void
++foo1 (void *p)
++{
++ __builtin_aarch64_gcsss (p);
++}
++
++/*
++**foo2:
++** sys #3, c7, c7, #2, x0 // gcsss1
++** mov x0, 0
++** sysl x0, #3, c7, c7, #3 // gcsss2
++** ret
++*/
++void *
++foo2 (void *p)
++{
++ return __builtin_aarch64_gcsss (p);
++}
++
++/*
++**foo3:
++** mov x16, 1
++** hint 40 // chkfeat x16
++** cbnz x16, .*
++** sys #3, c7, c7, #2, x0 // gcsss1
++** mov x0, 0
++** sysl x0, #3, c7, c7, #3 // gcsss2
++** ret
++** mov x0, 0
++** ret
++*/
++void *
++foo3 (void *p)
++{
++ if (__builtin_aarch64_chkfeat (1) == 0)
++ return __builtin_aarch64_gcsss (p);
++ return 0;
++}
+--
+2.34.1
+
+
+From 57b7b8934997cbd3eceb84b8de30b83f05760ecc Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 14 Apr 2023 18:23:52 +0100
+Subject: [PATCH 10/19] aarch64: Add GCS support for nonlocal stack save
+
+Nonlocal stack save and restore has to also save and restore the GCS
+pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto.
+
+The GCS specific code is only emitted if GCS branch-protection is
+enabled and the code always checks at runtime if GCS is enabled.
+
+The new -mbranch-protection=gcs and old -mbranch-protection=none code
+are ABI compatible: jmpbuf for __builtin_setjmp has space for 5
+pointers, the layout is
+
+ old layout: fp, pc, sp, unused, unused
+ new layout: fp, pc, sp, gcsp, unused
+
+Note: the ILP32 code generation is wrong as it saves the pointers with
+Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is
+for 5 pointers (4 bytes per pointer), this is not fixed.
+
+The nonlocal goto has no ABI compatibility issues as the goto and its
+destination are in the same translation unit.
+
+TODO:
+- can we simplify the define_expand rtls?
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for gcs.
+ * config/aarch64/aarch64.md (save_stack_nonlocal): New.
+ (restore_stack_nonlocal): New.
+---
+ gcc/config/aarch64/aarch64.h | 7 +++
+ gcc/config/aarch64/aarch64.md | 82 +++++++++++++++++++++++++++++++++++
+ 2 files changed, 89 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 45e901cda64..3238452f53f 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -1294,6 +1294,13 @@ typedef struct
+ #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+
++/* Have space for both SP and GCSPR in the NONLOCAL case in
++ emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp
++ and __builtin_nonlocal_goto.
++ Note: On ILP32 the documented buf size is not enough PR84150. */
++#define STACK_SAVEAREA_MODE(LEVEL) \
++ ((LEVEL) == SAVE_NONLOCAL ? TImode : Pmode)
++
+ #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
+
+ #define RETURN_ADDR_RTX aarch64_return_addr
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index 8defd6e0582..2d36af12cfb 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -1183,6 +1183,88 @@
+ (const_int 1)))]
+ )
+
++(define_expand "save_stack_nonlocal"
++ [(set (match_operand 0 "memory_operand")
++ (match_operand 1 "register_operand"))]
++ ""
++{
++ rtx stack_slot = adjust_address (operands[0], Pmode, 0);
++ emit_move_insn (stack_slot, operands[1]);
++
++ if (aarch64_gcs_enabled ())
++ {
++ /* Save GCS with code like
++ mov x16, 1
++ chkfeat x16
++ tbnz x16, 0, .L_done
++ mrs tmp, gcspr_el0
++ str tmp, [%0, 8]
++ .L_done: */
++
++ rtx done_label = gen_label_rtx ();
++ rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
++ emit_move_insn (r16, const1_rtx);
++ emit_insn (gen_aarch64_chkfeat ());
++ emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
++ rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
++ rtx gcs = force_reg (Pmode, const0_rtx);
++ emit_insn (gen_aarch64_load_gcspr (gcs));
++ emit_move_insn (gcs_slot, gcs);
++ emit_label (done_label);
++ }
++ DONE;
++})
++
++(define_expand "restore_stack_nonlocal"
++ [(set (match_operand 0 "register_operand" "")
++ (match_operand 1 "memory_operand" ""))]
++ ""
++{
++ rtx stack_slot = adjust_address (operands[1], Pmode, 0);
++ emit_move_insn (operands[0], stack_slot);
++
++ if (aarch64_gcs_enabled ())
++ {
++ /* Restore GCS with code like
++ mov x16, 1
++ chkfeat x16
++ tbnz x16, 0, .L_done
++ ldr tmp1, [%1, 8]
++ mrs tmp2, gcspr_el0
++ subs tmp2, tmp1, tmp2
++ b.eq .L_done
++ .L_loop:
++ gcspopm
++ subs tmp2, tmp2, 8
++ b.ne .L_loop
++ .L_done: */
++
++ rtx loop_label = gen_label_rtx ();
++ rtx done_label = gen_label_rtx ();
++ rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
++ emit_move_insn (r16, const1_rtx);
++ emit_insn (gen_aarch64_chkfeat ());
++ emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
++ rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
++ rtx gcs_old = force_reg (Pmode, const0_rtx);
++ emit_move_insn (gcs_old, gcs_slot);
++ rtx gcs_now = force_reg (Pmode, const0_rtx);
++ emit_insn (gen_aarch64_load_gcspr (gcs_now));
++ emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
++ rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
++ rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx);
++ emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label));
++ emit_label (loop_label);
++ emit_insn (gen_aarch64_gcspopm_xzr ());
++ emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8)));
++ cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
++ cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx);
++ emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label));
++ emit_label (done_label);
++ }
++ DONE;
++})
++
+ ;; -------------------------------------------------------------------
+ ;; Subroutine calls and sibcalls
+ ;; -------------------------------------------------------------------
+--
+2.34.1
+
+
+From 44b1847be5f2a53727a4a64f2d895b1c1d65e460 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Wed, 7 Jun 2023 10:58:06 +0100
+Subject: [PATCH 11/19] aarch64: Add non-local goto and jump tests for GCS
+
+These are scan asm tests only, relying on existing execution tests
+for runtime coverage.
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/aarch64/gcs-nonlocal-1.c: New test.
+ * gcc.target/aarch64/gcs-nonlocal-2.c: New test.
+---
+ .../gcc.target/aarch64/gcs-nonlocal-1.c | 25 +++++++++++++++++++
+ .../gcc.target/aarch64/gcs-nonlocal-2.c | 21 ++++++++++++++++
+ 2 files changed, 46 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c
+new file mode 100644
+index 00000000000..821fab816f9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-1.c
+@@ -0,0 +1,25 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mbranch-protection=gcs" } */
++/* { dg-final { scan-assembler-times "hint\\t40 // chkfeat x16" 2 } } */
++/* { dg-final { scan-assembler-times "mrs\\tx\[0-9\]+, s3_3_c2_c5_1 // gcspr_el0" 2 } } */
++/* { dg-final { scan-assembler-times "sysl\\txzr, #3, c7, c7, #1 // gcspopm" 1 } } */
++
++int bar1 (int);
++int bar2 (int);
++
++void foo (int cmd)
++{
++ __label__ start;
++ int x = 0;
++
++ void nonlocal_goto (void)
++ {
++ x++;
++ goto start;
++ }
++
++start:
++ while (bar1 (x))
++ if (bar2 (x))
++ nonlocal_goto ();
++}
+diff --git a/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c
+new file mode 100644
+index 00000000000..63dbce36e1e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/gcs-nonlocal-2.c
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mbranch-protection=gcs" } */
++/* { dg-final { scan-assembler-times "hint\\t40 // chkfeat x16" 2 } } */
++/* { dg-final { scan-assembler-times "mrs\\tx\[0-9\]+, s3_3_c2_c5_1 // gcspr_el0" 2 } } */
++/* { dg-final { scan-assembler-times "sysl\\txzr, #3, c7, c7, #1 // gcspopm" 1 } } */
++
++void longj (void *buf)
++{
++ __builtin_longjmp (buf, 1);
++}
++
++void foo (void);
++void bar (void);
++
++void setj (void *buf)
++{
++ if (__builtin_setjmp (buf))
++ foo ();
++ else
++ bar ();
++}
+--
+2.34.1
+
+
+From a9ec10c065ac9d932e30de54d68363f30ed864fe Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 9 May 2023 17:04:34 +0100
+Subject: [PATCH 12/19] aarch64: Add ACLE feature macros for GCS
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define
+ macros for GCS.
+---
+ gcc/config/aarch64/aarch64-c.cc | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
+index b5a6917d06d..132d5c86413 100644
+--- a/gcc/config/aarch64/aarch64-c.cc
++++ b/gcc/config/aarch64/aarch64-c.cc
+@@ -246,6 +246,9 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
+
+ aarch64_def_or_undef (TARGET_PAUTH, "__ARM_FEATURE_PAUTH", pfile);
+ aarch64_def_or_undef (TARGET_BTI, "__ARM_FEATURE_BTI", pfile);
++ aarch64_def_or_undef (aarch64_gcs_enabled (),
++ "__ARM_FEATURE_GCS_DEFAULT", pfile);
++ aarch64_def_or_undef (TARGET_GCS, "__ARM_FEATURE_GCS", pfile);
+ aarch64_def_or_undef (TARGET_I8MM, "__ARM_FEATURE_MATMUL_INT8", pfile);
+ aarch64_def_or_undef (TARGET_BF16_SIMD,
+ "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
+--
+2.34.1
+
+
+From ff3d447772dcf9b2c7293a6b1ac458898a493cdf Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Wed, 7 Jun 2023 16:17:53 +0100
+Subject: [PATCH 13/19] aarch64: Add test for GCS ACLE defs
+
+gcc/testsuite/ChangeLog:
+
+ * gcc.target/aarch64/pragma_cpp_predefs_1.c: GCS test.
+---
+ .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 30 +++++++++++++++++++
+ 1 file changed, 30 insertions(+)
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+index 307fa3d67da..6122cd55d66 100644
+--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+@@ -268,6 +268,36 @@
+ #error "__ARM_FEATURE_RCPC is not defined but should be!"
+ #endif
+
++#pragma GCC target ("arch=armv8.8-a+gcs")
++#ifndef __ARM_FEATURE_GCS
++#error "__ARM_FEATURE_GCS is not defined but should be!"
++#endif
++
++#pragma GCC target ("arch=armv8.8-a+nogcs")
++#ifdef __ARM_FEATURE_GCS
++#error "__ARM_FEATURE_GCS is defined but should not be!"
++#endif
++
++#pragma GCC target ("arch=armv8.8-a")
++#ifdef __ARM_FEATURE_GCS
++#error "__ARM_FEATURE_GCS is defined but should not be!"
++#endif
++
++#pragma GCC target ("branch-protection=gcs")
++#ifndef __ARM_FEATURE_GCS_DEFAULT
++#error "__ARM_FEATURE_GCS_DEFAULT is not defined but should be!"
++#endif
++
++#pragma GCC target ("branch-protection=none")
++#ifdef __ARM_FEATURE_GCS_DEFAULT
++#error "__ARM_FEATURE_GCS_DEFAULT is defined but should not be!"
++#endif
++
++#pragma GCC target ("branch-protection=standard")
++#ifndef __ARM_FEATURE_GCS_DEFAULT
++#error "__ARM_FEATURE_GCS_DEFAULT is not defined but should be!"
++#endif
++
+ int
+ foo (int a)
+ {
+--
+2.34.1
+
+
+From 8187d08bc815c5d45fa5b7cc6a970811ca4c6efe Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Wed, 19 Apr 2023 14:01:36 +0100
+Subject: [PATCH 14/19] aarch64: Add GCS support to the unwinder
+
+TODO:
+- Follows the current linux ABI that uses single signal entry token
+ and shared shadow stack between thread and alt stack.
+- Could be behind __ARM_FEATURE_GCS_DEFAULT ifdef (only do anything
+ special with gcs compat codegen) but there is a runtime check anyway.
+
+libgcc/ChangeLog:
+
+ * config/aarch64/aarch64-unwind.h (_Unwind_Frames_Extra): Update.
+ (_Unwind_Frames_Increment): Define.
+---
+ libgcc/config/aarch64/aarch64-unwind.h | 59 +++++++++++++++++++++++++-
+ 1 file changed, 58 insertions(+), 1 deletion(-)
+
+diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h
+index daf96624b5e..c22a3fc20d2 100644
+--- a/libgcc/config/aarch64/aarch64-unwind.h
++++ b/libgcc/config/aarch64/aarch64-unwind.h
+@@ -78,6 +78,9 @@ aarch64_demangle_return_addr (struct _Unwind_Context *context,
+ return addr;
+ }
+
++/* GCS enable flag for chkfeat instruction. */
++#define CHKFEAT_GCS 1
++
+ /* SME runtime function local to libgcc, streaming compatible
+ and preserves more registers than the base PCS requires, but
+ we don't rely on that here. */
+@@ -85,12 +88,66 @@ __attribute__ ((visibility ("hidden")))
+ void __libgcc_arm_za_disable (void);
+
+ /* Disable the SME ZA state in case an unwound frame used the ZA
+- lazy saving scheme. */
++ lazy saving scheme. And unwind the GCS for EH. */
+ #undef _Unwind_Frames_Extra
+ #define _Unwind_Frames_Extra(x) \
+ do \
+ { \
+ __libgcc_arm_za_disable (); \
++ if (__builtin_aarch64_chkfeat (CHKFEAT_GCS) == 0) \
++ { \
++ for (_Unwind_Word n = (x); n != 0; n--) \
++ __builtin_aarch64_gcspopm (); \
++ } \
++ } \
++ while (0)
++
++/* On signal entry the OS places a token on the GCS that can be used to
++ verify the integrity of the GCS pointer on signal return. It also
++ places the signal handler return address (the restorer that calls the
++ signal return syscall) on the GCS so the handler can return.
++ Because of this token, each stack frame visited during unwinding has
++ exactly one corresponding entry on the GCS, so the frame count is
++ the number of entries that will have to be popped at EH return time.
++
++ Note: This depends on the GCS signal ABI of the OS.
++
++ When unwinding across a stack frame for each frame the corresponding
++ entry is checked on the GCS against the computed return address from
++ the normal stack. If they don't match then _URC_FATAL_PHASE2_ERROR
++ is returned. This check is omitted if
++
++ 1. GCS is disabled. Note: asynchronous GCS disable is supported here
++ if GCSPR and the GCS remains readable.
++ 2. Non-catchable exception where exception_class == 0. Note: the
++ pthread cancellation implementation in glibc sets exception_class
++ to 0 when the unwinder is used for cancellation cleanup handling,
++ so this allows the GCS to get out of sync during cancellation.
++ This weakens security but avoids an ABI break in glibc.
++ 3. Zero return address which marks the outermost stack frame.
++ 4. Signal stack frame, the GCS entry is an OS specific token then
++ with the top bit set.
++ */
++#undef _Unwind_Frames_Increment
++#define _Unwind_Frames_Increment(exc, context, frames) \
++ do \
++ { \
++ frames++; \
++ if (__builtin_aarch64_chkfeat (CHKFEAT_GCS) != 0 \
++ || exc->exception_class == 0 \
++ || _Unwind_GetIP (context) == 0) \
++ break; \
++ const _Unwind_Word *gcs = __builtin_aarch64_gcspr (); \
++ if (_Unwind_IsSignalFrame (context)) \
++ { \
++ if (gcs[frames] >> 63 == 0) \
++ return _URC_FATAL_PHASE2_ERROR; \
++ } \
++ else \
++ { \
++ if (gcs[frames] != _Unwind_GetIP (context)) \
++ return _URC_FATAL_PHASE2_ERROR; \
++ } \
+ } \
+ while (0)
+
+--
+2.34.1
+
+
+From f183fe2dd597f300b24151034d68d168401ab83e Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 9 May 2023 14:32:46 +0100
+Subject: [PATCH 15/19] aarch64: Emit GNU property NOTE for GCS
+
+TODO: relies on experimental binutils ABI, should use build attributes.
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64.cc (GNU_PROPERTY_AARCH64_FEATURE_1_GCS):
+ Define.
+ (aarch64_file_end_indicate_exec_stack): Set GCS property bit.
+---
+ gcc/config/aarch64/aarch64.cc | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 73969721906..0119cfdd67b 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -28962,6 +28962,7 @@ aarch64_can_tag_addresses ()
+ #define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+ #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+ #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
++#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS (1U << 2)
+ void
+ aarch64_file_end_indicate_exec_stack ()
+ {
+@@ -28974,6 +28975,9 @@ aarch64_file_end_indicate_exec_stack ()
+ if (aarch_ra_sign_scope != AARCH_FUNCTION_NONE)
+ feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+
++ if (aarch64_gcs_enabled ())
++ feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
++
+ if (feature_1_and)
+ {
+ /* Generate .note.gnu.property section. */
+@@ -29005,6 +29009,7 @@ aarch64_file_end_indicate_exec_stack ()
+ assemble_align (POINTER_SIZE);
+ }
+ }
++#undef GNU_PROPERTY_AARCH64_FEATURE_1_GCS
+ #undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+ #undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+ #undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
+--
+2.34.1
+
+
+From 966dbf5b1816a49eebc7b9a52abe706e34ee67d3 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 22 Dec 2023 13:44:19 +0000
+Subject: [PATCH 16/19] aarch64: libgcc: add GCS marking to asm
+
+libgcc/ChangeLog:
+
+ * config/aarch64/aarch64-asm.h (FEATURE_1_GCS): Define.
+ (GCS_FLAG): Define if GCS is enabled.
+ (GNU_PROPERTY): Add GCS_FLAG.
+---
+ libgcc/config/aarch64/aarch64-asm.h | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/libgcc/config/aarch64/aarch64-asm.h b/libgcc/config/aarch64/aarch64-asm.h
+index 83c2e5944b3..86a9a0e662e 100644
+--- a/libgcc/config/aarch64/aarch64-asm.h
++++ b/libgcc/config/aarch64/aarch64-asm.h
+@@ -38,6 +38,7 @@
+ #define FEATURE_1_AND 0xc0000000
+ #define FEATURE_1_BTI 1
+ #define FEATURE_1_PAC 2
++#define FEATURE_1_GCS 4
+
+ /* Supported features based on the code generation options. */
+ #if defined(__ARM_FEATURE_BTI_DEFAULT)
+@@ -58,6 +59,12 @@
+ # define AUTIASP
+ #endif
+
++#if __ARM_FEATURE_GCS_DEFAULT
++# define GCS_FLAG FEATURE_1_GCS
++#else
++# define GCS_FLAG 0
++#endif
++
+ #ifdef __ELF__
+ #define HIDDEN(name) .hidden name
+ #define SYMBOL_SIZE(name) .size name, .-name
+@@ -88,8 +95,8 @@
+ .previous
+
+ /* Add GNU property note if built with branch protection. */
+-# if (BTI_FLAG|PAC_FLAG) != 0
+-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
++# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
++GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
+ # endif
+ #endif
+
+--
+2.34.1
+
+
+From bb609f49cbd69259513d9c39a74ad61730e3c87a Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Fri, 22 Dec 2023 15:11:25 +0000
+Subject: [PATCH 17/19] aarch64: libatomic: add GCS marking to asm
+
+libatomic/ChangeLog:
+
+ * config/linux/aarch64/atomic_16.S (FEATURE_1_GCS): Define.
+ (GCS_FLAG): Define if GCS is enabled.
+ (GNU_PROPERTY): Add GCS_FLAG.
+---
+ libatomic/config/linux/aarch64/atomic_16.S | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
+index 4e3fa870b03..d6f34eee146 100644
+--- a/libatomic/config/linux/aarch64/atomic_16.S
++++ b/libatomic/config/linux/aarch64/atomic_16.S
+@@ -790,6 +790,7 @@ ALIAS2 (test_and_set_16)
+ #define FEATURE_1_AND 0xc0000000
+ #define FEATURE_1_BTI 1
+ #define FEATURE_1_PAC 2
++#define FEATURE_1_GCS 4
+
+ /* Supported features based on the code generation options. */
+ #if defined(__ARM_FEATURE_BTI_DEFAULT)
+@@ -804,6 +805,12 @@ ALIAS2 (test_and_set_16)
+ # define PAC_FLAG 0
+ #endif
+
++#if __ARM_FEATURE_GCS_DEFAULT
++# define GCS_FLAG FEATURE_1_GCS
++#else
++# define GCS_FLAG 0
++#endif
++
+ /* Add a NT_GNU_PROPERTY_TYPE_0 note. */
+ #define GNU_PROPERTY(type, value) \
+ .section .note.gnu.property, "a"; \
+@@ -821,7 +828,7 @@ ALIAS2 (test_and_set_16)
+ .section .note.GNU-stack, "", %progbits
+
+ /* Add GNU property note if built with branch protection. */
+-# if (BTI_FLAG|PAC_FLAG) != 0
+-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
++# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
++GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
+ # endif
+ #endif
+--
+2.34.1
+
+
+From 1d54b68e7c410b83fec17708ee84e7c64a461d3d Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Tue, 2 Apr 2024 15:43:23 +0100
+Subject: [PATCH 18/19] aarch64: libitm: Add GCS support
+
+Transaction begin and abort use setjmp/longjmp like operations that
+need to be updated for GCS compatibility. We use similar logic to
+libc setjmp/longjmp that support switching stack and thus switching
+GCS (e.g. due to longjmp out of a makecontext stack), this is kept
+even though it is likely not required for transaction aborts.
+
+The gtm_jmpbuf is internal to libitm so we can change its layout
+without breaking ABI.
+
+libitm/ChangeLog:
+
+ * config/aarch64/sjlj.S: Add GCS support and mark GCS compatible.
+ * config/aarch64/target.h: Add gcs field to gtm_jmpbuf.
+---
+ libitm/config/aarch64/sjlj.S | 60 ++++++++++++++++++++++++++++++++--
+ libitm/config/aarch64/target.h | 1 +
+ 2 files changed, 58 insertions(+), 3 deletions(-)
+
+diff --git a/libitm/config/aarch64/sjlj.S b/libitm/config/aarch64/sjlj.S
+index 6b248f7c040..e21d751ef21 100644
+--- a/libitm/config/aarch64/sjlj.S
++++ b/libitm/config/aarch64/sjlj.S
+@@ -29,6 +29,13 @@
+ #define AUTIASP hint 29
+ #define PACIBSP hint 27
+ #define AUTIBSP hint 31
++#define CHKFEAT_X16 hint 40
++#define MRS_GCSPR(x) mrs x, s3_3_c2_c5_1
++#define GCSPOPM(x) sysl x, #3, c7, c7, #1
++#define GCSSS1(x) sys #3, c7, c7, #2, x
++#define GCSSS2(x) sysl x, #3, c7, c7, #3
++
++#define L(name) .L##name
+
+ #if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
+ # define cfi_window_save .cfi_window_save
+@@ -80,7 +87,16 @@ _ITM_beginTransaction:
+ stp d10, d11, [sp, 7*16]
+ stp d12, d13, [sp, 8*16]
+ stp d14, d15, [sp, 9*16]
+- str x1, [sp, 10*16]
++
++ /* GCS support. */
++ mov x2, 0
++ mov x16, 1
++ CHKFEAT_X16
++ tbnz x16, 0, L(gcs_done_sj)
++ MRS_GCSPR (x2)
++ add x2, x2, 8 /* GCS after _ITM_beginTransaction returns. */
++L(gcs_done_sj):
++ stp x2, x1, [sp, 10*16]
+
+ /* Invoke GTM_begin_transaction with the struct we just built. */
+ mov x1, sp
+@@ -117,7 +133,38 @@ GTM_longjmp:
+ ldp d10, d11, [x1, 7*16]
+ ldp d12, d13, [x1, 8*16]
+ ldp d14, d15, [x1, 9*16]
++
++ /* GCS support. */
++ mov x16, 1
++ CHKFEAT_X16
++ tbnz x16, 0, L(gcs_done_lj)
++ MRS_GCSPR (x7)
+ ldr x3, [x1, 10*16]
++ mov x4, x3
++ /* x7: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */
++L(gcs_scan):
++ cmp x7, x4
++ b.eq L(gcs_pop)
++ sub x4, x4, 8
++ /* Check for a cap token. */
++ ldr x5, [x4]
++ and x6, x4, 0xfffffffffffff000
++ orr x6, x6, 1
++ cmp x5, x6
++ b.ne L(gcs_scan)
++L(gcs_switch):
++ add x7, x4, 8
++ GCSSS1 (x4)
++ GCSSS2 (xzr)
++L(gcs_pop):
++ cmp x7, x3
++ b.eq L(gcs_done_lj)
++ GCSPOPM (xzr)
++ add x7, x7, 8
++ b L(gcs_pop)
++L(gcs_done_lj):
++
++ ldr x3, [x1, 10*16 + 8]
+ ldp x29, x30, [x1]
+ cfi_def_cfa(x1, 0)
+ CFI_PAC_TOGGLE
+@@ -132,6 +179,7 @@ GTM_longjmp:
+ #define FEATURE_1_AND 0xc0000000
+ #define FEATURE_1_BTI 1
+ #define FEATURE_1_PAC 2
++#define FEATURE_1_GCS 4
+
+ /* Supported features based on the code generation options. */
+ #if defined(__ARM_FEATURE_BTI_DEFAULT)
+@@ -146,6 +194,12 @@ GTM_longjmp:
+ # define PAC_FLAG 0
+ #endif
+
++#if __ARM_FEATURE_GCS_DEFAULT
++# define GCS_FLAG FEATURE_1_GCS
++#else
++# define GCS_FLAG 0
++#endif
++
+ /* Add a NT_GNU_PROPERTY_TYPE_0 note. */
+ #define GNU_PROPERTY(type, value) \
+ .section .note.gnu.property, "a"; \
+@@ -163,7 +217,7 @@ GTM_longjmp:
+ .section .note.GNU-stack, "", %progbits
+
+ /* Add GNU property note if built with branch protection. */
+-# if (BTI_FLAG|PAC_FLAG) != 0
+-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
++# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
++GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
+ # endif
+ #endif
+diff --git a/libitm/config/aarch64/target.h b/libitm/config/aarch64/target.h
+index 3d99197bfab..a1f39b4bf7a 100644
+--- a/libitm/config/aarch64/target.h
++++ b/libitm/config/aarch64/target.h
+@@ -30,6 +30,7 @@ typedef struct gtm_jmpbuf
+ unsigned long long pc; /* x30 */
+ unsigned long long gr[10]; /* x19-x28 */
+ unsigned long long vr[8]; /* d8-d15 */
++ void *gcs; /* GCSPR_EL0 */
+ void *cfa;
+ } gtm_jmpbuf;
+
+--
+2.34.1
+
+
+From c3a32ae0e30d2f4c51659751bc784b1c84ca1351 Mon Sep 17 00:00:00 2001
+From: Szabolcs Nagy <szabolcs.nagy@arm.com>
+Date: Thu, 28 Dec 2023 13:37:38 +0000
+Subject: [PATCH 19/19] aarch64: Introduce indirect_return attribute
+
+Tail calls of indirect_return functions from non-indirect_return
+functions are disallowed even if BTI is disabled, since the call
+site may have BTI enabled.
+
+Following x86, mismatching attribute on function pointers is not
+a type error even though this can lead to bugs.
+
+Needed for swapcontext within the same function when GCS is enabled.
+
+TODO: arm? docs, tests. feature detection?
+
+gcc/ChangeLog:
+
+ * config/aarch64/aarch64.cc (aarch64_gnu_attributes): Add
+ indirect_return.
+ (aarch64_function_ok_for_sibcall): Disallow tail calls if caller
+ is non-indirect_return but callee is indirect_return.
+ (aarch64_comp_type_attributes): Check indirect_return attribute.
+ * config/arm/aarch-bti-insert.cc (call_needs_bti_j): New.
+ (rest_of_insert_bti): Use call_needs_bti_j.
+---
+ gcc/config/aarch64/aarch64.cc | 11 +++++++++
+ gcc/config/arm/aarch-bti-insert.cc | 36 ++++++++++++++++++++++++++----
+ 2 files changed, 43 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 0119cfdd67b..593b107c8a5 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -850,6 +850,7 @@ static const attribute_spec aarch64_gnu_attributes[] =
+ affects_type_identity, handler, exclude } */
+ { "aarch64_vector_pcs", 0, 0, false, true, true, true,
+ handle_aarch64_vector_pcs_attribute, NULL },
++ { "indirect_return", 0, 0, false, true, true, false, NULL, NULL },
+ { "arm_sve_vector_bits", 1, 1, false, true, false, true,
+ aarch64_sve::handle_arm_sve_vector_bits_attribute,
+ NULL },
+@@ -6340,6 +6341,14 @@ aarch64_function_ok_for_sibcall (tree, tree exp)
+ if (bool (aarch64_cfun_shared_flags (state))
+ != bool (aarch64_fntype_shared_flags (fntype, state)))
+ return false;
++
++ /* BTI J is needed where indirect_return functions may return
++ if bti is enabled there. */
++ if (lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (fntype))
++ && !lookup_attribute ("indirect_return",
++ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
++ return false;
++
+ return true;
+ }
+
+@@ -28855,6 +28864,8 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2)
+
+ if (!check_attr ("gnu", "aarch64_vector_pcs"))
+ return 0;
++ if (!check_attr ("gnu", "indirect_return"))
++ return 0;
+ if (!check_attr ("gnu", "Advanced SIMD type"))
+ return 0;
+ if (!check_attr ("gnu", "SVE type"))
+diff --git a/gcc/config/arm/aarch-bti-insert.cc b/gcc/config/arm/aarch-bti-insert.cc
+index 14d36971cd4..403afff9120 100644
+--- a/gcc/config/arm/aarch-bti-insert.cc
++++ b/gcc/config/arm/aarch-bti-insert.cc
+@@ -92,6 +92,35 @@ const pass_data pass_data_insert_bti =
+ 0, /* todo_flags_finish. */
+ };
+
++/* Decide if BTI J is needed after a call instruction. */
++static bool
++call_needs_bti_j (rtx_insn *insn)
++{
++ /* Call returns twice, one of which may be indirect. */
++ if (find_reg_note (insn, REG_SETJMP, NULL))
++ return true;
++
++ /* Tail call does not return. */
++ if (SIBLING_CALL_P (insn))
++ return false;
++
++ /* Check if the function is marked to return indirectly. */
++ rtx call = get_call_rtx_from (insn);
++ rtx fnaddr = XEXP (call, 0);
++ tree fndecl = NULL_TREE;
++ if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
++ fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
++ if (fndecl == NULL_TREE)
++ fndecl = MEM_EXPR (fnaddr);
++ if (!fndecl)
++ return false;
++ if (TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
++ && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
++ return false;
++ tree fntype = TREE_TYPE (fndecl);
++ return lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (fntype));
++}
++
+ /* Insert the BTI instruction. */
+ /* This is implemented as a late RTL pass that runs before branch
+ shortening and does the following. */
+@@ -147,10 +176,9 @@ rest_of_insert_bti (void)
+ }
+ }
+
+- /* Also look for calls to setjmp () which would be marked with
+- REG_SETJMP note and put a BTI J after. This is where longjump ()
+- will return. */
+- if (CALL_P (insn) && (find_reg_note (insn, REG_SETJMP, NULL)))
++ /* Also look for calls that may return indirectly, such as setjmp,
++ and put a BTI J after them. */
++ if (CALL_P (insn) && call_needs_bti_j (insn))
+ {
+ bti_insn = aarch_gen_bti_j ();
+ emit_insn_after (bti_insn, insn);
+--
+2.34.1
+
diff --git a/meta-arm-gcs/recipes-devtools/gcc/gcc-source_14.0.bbappend b/meta-arm-gcs/recipes-devtools/gcc/gcc-source_14.0.bbappend
new file mode 100644
index 00000000..b3aba4f9
--- /dev/null
+++ b/meta-arm-gcs/recipes-devtools/gcc/gcc-source_14.0.bbappend
@@ -0,0 +1,3 @@
+FILESEXTRAPATHS:prepend := "${THISDIR}/files:"
+
+SRC_URI += "file://gcs.patch"
diff --git a/meta-arm-gcs/recipes-kernel/linux/files/gcs.patch b/meta-arm-gcs/recipes-kernel/linux/files/gcs.patch
new file mode 100644
index 00000000..9000123a
--- /dev/null
+++ b/meta-arm-gcs/recipes-kernel/linux/files/gcs.patch
@@ -0,0 +1,7723 @@
+Upstream-Status: Submitted [https://git.kernel.org/pub/scm/linux/kernel/git/broonie/misc.git/log/?h=arm64-gcs]
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+
+From acd6dd784ab9ef8a30a45d6145b5bc17c4373d65 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 18 Jan 2024 21:30:07 +0000
+Subject: [PATCH 02/47] Documentation: userspace-api: Add shadow stack API
+ documentation
+
+There are a number of architectures with shadow stack features which we are
+presenting to userspace with as consistent an API as we can (though there
+are some architecture specifics). Especially given that there are some
+important considerations for userspace code interacting directly with the
+feature let's provide some documentation covering the common aspects.
+
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ Documentation/userspace-api/index.rst | 1 +
+ Documentation/userspace-api/shadow_stack.rst | 41 ++++++++++++++++++++
+ 2 files changed, 42 insertions(+)
+ create mode 100644 Documentation/userspace-api/shadow_stack.rst
+
+diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
+index 09f61bd2ac2e..c142183d9c98 100644
+--- a/Documentation/userspace-api/index.rst
++++ b/Documentation/userspace-api/index.rst
+@@ -27,6 +27,7 @@ place where this information is gathered.
+ iommufd
+ media/index
+ netlink/index
++ shadow_stack
+ sysfs-platform_profile
+ vduse
+ futex2
+diff --git a/Documentation/userspace-api/shadow_stack.rst b/Documentation/userspace-api/shadow_stack.rst
+new file mode 100644
+index 000000000000..c576ad3d7ec1
+--- /dev/null
++++ b/Documentation/userspace-api/shadow_stack.rst
+@@ -0,0 +1,41 @@
++=============
++Shadow Stacks
++=============
++
++Introduction
++============
++
++Several architectures have features which provide backward edge
++control flow protection through a hardware maintained stack, only
++writeable by userspace through very limited operations. This feature
++is referred to as shadow stacks on Linux, on x86 it is part of Intel
++Control Enforcement Technology (CET), on arm64 it is Guarded Control
++Stacks feature (FEAT_GCS) and for RISC-V it is the Zicfiss extension.
++It is expected that this feature will normally be managed by the
++system dynamic linker and libc in ways broadly transparent to
++application code, this document covers interfaces and considerations.
++
++
++Enabling
++========
++
++Shadow stacks default to disabled when a userspace process is
++executed, they can be enabled for the current thread with a syscall:
++
++ - For x86 the ARCH_SHSTK_ENABLE arch_prctl()
++
++It is expected that this will normally be done by the dynamic linker.
++Any new threads created by a thread with shadow stacks enabled will
++themselves have shadow stacks enabled.
++
++
++Enablement considerations
++=========================
++
++- Returning from the function that enables shadow stacks without first
++ disabling them will cause a shadow stack exception. This includes
++ any syscall wrapper or other library functions, the syscall will need
++ to be inlined.
++- A lock feature allows userspace to prevent disabling of shadow stacks.
++- Those that change the stack context like longjmp() or use of ucontext
++ changes on signal return will need support from libc.
+--
+2.34.1
+
+
+From 4963da85eea04bd35672dfe2b43306b451c32bcd Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Mon, 29 Jan 2024 22:29:38 +0000
+Subject: [PATCH 03/47] selftests: Provide helper header for shadow stack
+ testing
+
+While almost all users of shadow stacks should be relying on the dynamic
+linker and libc to enable the feature there are several low level test
+programs where it is useful to enable without any libc support, allowing
+testing without full system enablement. This low level testing is helpful
+during bringup of the support itself, and also in enabling coverage by
+automated testing without needing all system components in the target root
+filesystems to have enablement.
+
+Provide a header with helpers for this purpose, intended for use only by
+test programs directly exercising shadow stack interfaces.
+
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/ksft_shstk.h | 63 ++++++++++++++++++++++++++++
+ 1 file changed, 63 insertions(+)
+ create mode 100644 tools/testing/selftests/ksft_shstk.h
+
+diff --git a/tools/testing/selftests/ksft_shstk.h b/tools/testing/selftests/ksft_shstk.h
+new file mode 100644
+index 000000000000..85d0747c1802
+--- /dev/null
++++ b/tools/testing/selftests/ksft_shstk.h
+@@ -0,0 +1,63 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Helpers for shadow stack enablement, this is intended to only be
++ * used by low level test programs directly exercising interfaces for
++ * working with shadow stacks.
++ *
++ * Copyright (C) 2024 ARM Ltd.
++ */
++
++#ifndef __KSFT_SHSTK_H
++#define __KSFT_SHSTK_H
++
++#include <asm/mman.h>
++
++/* This is currently only defined for x86 */
++#ifndef SHADOW_STACK_SET_TOKEN
++#define SHADOW_STACK_SET_TOKEN (1ULL << 0)
++#endif
++
++static bool shadow_stack_enabled;
++
++#ifdef __x86_64__
++#define ARCH_SHSTK_ENABLE 0x5001
++#define ARCH_SHSTK_SHSTK (1ULL << 0)
++
++#define ARCH_PRCTL(arg1, arg2) \
++({ \
++ long _ret; \
++ register long _num asm("eax") = __NR_arch_prctl; \
++ register long _arg1 asm("rdi") = (long)(arg1); \
++ register long _arg2 asm("rsi") = (long)(arg2); \
++ \
++ asm volatile ( \
++ "syscall\n" \
++ : "=a"(_ret) \
++ : "r"(_arg1), "r"(_arg2), \
++ "0"(_num) \
++ : "rcx", "r11", "memory", "cc" \
++ ); \
++ _ret; \
++})
++
++#define ENABLE_SHADOW_STACK
++static inline __attribute__((always_inline)) void enable_shadow_stack(void)
++{
++ int ret = ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK);
++ if (ret == 0)
++ shadow_stack_enabled = true;
++}
++
++#endif
++
++#ifndef __NR_map_shadow_stack
++#define __NR_map_shadow_stack 453
++#endif
++
++#ifndef ENABLE_SHADOW_STACK
++static inline void enable_shadow_stack(void) { }
++#endif
++
++#endif
++
++
+--
+2.34.1
+
+
+From dd5a2bea25b99868e19cf250f87fcefff2851857 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 19 Oct 2023 17:43:34 +0100
+Subject: [PATCH 04/47] mm: Introduce ARCH_HAS_USER_SHADOW_STACK
+
+Since multiple architectures have support for shadow stacks and we need to
+select support for this feature in several places in the generic code
+provide a generic config option that the architectures can select.
+
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/x86/Kconfig | 1 +
+ fs/proc/task_mmu.c | 2 +-
+ include/linux/mm.h | 2 +-
+ mm/Kconfig | 6 ++++++
+ 4 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 5edec175b9bf..34553911d07d 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -1952,6 +1952,7 @@ config X86_USER_SHADOW_STACK
+ depends on AS_WRUSS
+ depends on X86_64
+ select ARCH_USES_HIGH_VMA_FLAGS
++ select ARCH_HAS_USER_SHADOW_STACK
+ select X86_CET
+ help
+ Shadow stack protection is a hardware feature that detects function
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index 3f78ebbb795f..ff2c601f7d1c 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -700,7 +700,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+ [ilog2(VM_UFFD_MINOR)] = "ui",
+ #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+-#ifdef CONFIG_X86_USER_SHADOW_STACK
++#ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK
+ [ilog2(VM_SHADOW_STACK)] = "ss",
+ #endif
+ };
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index f5a97dec5169..c0a782eda803 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -341,7 +341,7 @@ extern unsigned int kobjsize(const void *objp);
+ #endif
+ #endif /* CONFIG_ARCH_HAS_PKEYS */
+
+-#ifdef CONFIG_X86_USER_SHADOW_STACK
++#ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK
+ /*
+ * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
+ * support core mm.
+diff --git a/mm/Kconfig b/mm/Kconfig
+index ffc3a2ba3a8c..9119e016777a 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -1261,6 +1261,12 @@ config LOCK_MM_AND_FIND_VMA
+ config IOMMU_MM_DATA
+ bool
+
++config ARCH_HAS_USER_SHADOW_STACK
++ bool
++ help
++ The architecture has hardware support for userspace shadow call
++ stacks (eg, x86 CET, arm64 GCS or RISC-V Zicfiss).
++
+ source "mm/damon/Kconfig"
+
+ endmenu
+--
+2.34.1
+
+
+From 8433014f7ee3beaba9ff2e37d1c517f4625db150 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Mon, 16 Oct 2023 19:40:40 +0100
+Subject: [PATCH 05/47] fork: Add shadow stack support to clone3()
+
+Unlike with the normal stack there is no API for configuring the the shadow
+stack for a new thread, instead the kernel will dynamically allocate a new
+shadow stack with the same size as the normal stack. This appears to be due
+to the shadow stack series having been in development since before the more
+extensible clone3() was added rather than anything more deliberate.
+
+Add a parameter to clone3() specifying the size of a shadow stack for
+the newly created process. If no shadow stack is specified then the
+existing implicit allocation behaviour is maintained.
+
+If the architecture does not support shadow stacks the shadow stack size
+parameter must be zero, architectures that do support the feature are
+expected to enforce the same requirement on individual systems that lack
+shadow stack support.
+
+Update the existing x86 implementation to pay attention to the newly added
+arguments, in order to maintain compatibility we use the existing behaviour
+if no shadow stack is specified. Minimal validation is done of the supplied
+parameters, detailed enforcement is left to when the thread is executed.
+Since we are now using more fields from the kernel_clone_args we pass that
+into the shadow stack code rather than individual fields.
+
+At present this implemntation does not consume the shadow stack token
+atomically as would be desirable.
+
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/x86/include/asm/shstk.h | 11 +++--
+ arch/x86/kernel/process.c | 2 +-
+ arch/x86/kernel/shstk.c | 94 +++++++++++++++++++++++++++---------
+ include/linux/sched/task.h | 2 +
+ include/uapi/linux/sched.h | 13 +++--
+ kernel/fork.c | 61 ++++++++++++++++++-----
+ 6 files changed, 140 insertions(+), 43 deletions(-)
+
+diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
+index 42fee8959df7..8be7b0a909c3 100644
+--- a/arch/x86/include/asm/shstk.h
++++ b/arch/x86/include/asm/shstk.h
+@@ -6,6 +6,7 @@
+ #include <linux/types.h>
+
+ struct task_struct;
++struct kernel_clone_args;
+ struct ksignal;
+
+ #ifdef CONFIG_X86_USER_SHADOW_STACK
+@@ -16,8 +17,8 @@ struct thread_shstk {
+
+ long shstk_prctl(struct task_struct *task, int option, unsigned long arg2);
+ void reset_thread_features(void);
+-unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clone_flags,
+- unsigned long stack_size);
++unsigned long shstk_alloc_thread_stack(struct task_struct *p,
++ const struct kernel_clone_args *args);
+ void shstk_free(struct task_struct *p);
+ int setup_signal_shadow_stack(struct ksignal *ksig);
+ int restore_signal_shadow_stack(void);
+@@ -26,8 +27,10 @@ static inline long shstk_prctl(struct task_struct *task, int option,
+ unsigned long arg2) { return -EINVAL; }
+ static inline void reset_thread_features(void) {}
+ static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
+- unsigned long clone_flags,
+- unsigned long stack_size) { return 0; }
++ const struct kernel_clone_args *args)
++{
++ return 0;
++}
+ static inline void shstk_free(struct task_struct *p) {}
+ static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
+ static inline int restore_signal_shadow_stack(void) { return 0; }
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index ab49ade31b0d..d2bfcd44de05 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -207,7 +207,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+ * is disabled, new_ssp will remain 0, and fpu_clone() will know not to
+ * update it.
+ */
+- new_ssp = shstk_alloc_thread_stack(p, clone_flags, args->stack_size);
++ new_ssp = shstk_alloc_thread_stack(p, args);
+ if (IS_ERR_VALUE(new_ssp))
+ return PTR_ERR((void *)new_ssp);
+
+diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c
+index 59e15dd8d0f8..935ced6cf4f5 100644
+--- a/arch/x86/kernel/shstk.c
++++ b/arch/x86/kernel/shstk.c
+@@ -191,44 +191,92 @@ void reset_thread_features(void)
+ current->thread.features_locked = 0;
+ }
+
+-unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
+- unsigned long stack_size)
++static bool shstk_consume_token(struct task_struct *tsk,
++ unsigned long addr)
++{
++ /*
++ * SSP is aligned, so reserved bits and mode bit are a zero, just mark
++ * the token 64-bit.
++ */
++ u64 expected = (addr - SS_FRAME_SIZE) | BIT(0);
++ u64 val;
++
++ /* This should really be an atomic cpmxchg. It is not. */
++ __get_user(val, (__user u64 *)addr);
++ if (val != expected)
++ return false;
++
++ if (write_user_shstk_64((u64 __user *)addr, 0))
++ return false;
++
++ return true;
++}
++
++unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
++ const struct kernel_clone_args *args)
+ {
+ struct thread_shstk *shstk = &tsk->thread.shstk;
++ unsigned long clone_flags = args->flags;
+ unsigned long addr, size;
+
+ /*
+ * If shadow stack is not enabled on the new thread, skip any
+- * switch to a new shadow stack.
++ * implicit switch to a new shadow stack and reject attempts to
++ * explciitly specify one.
+ */
+- if (!features_enabled(ARCH_SHSTK_SHSTK))
+- return 0;
++ if (!features_enabled(ARCH_SHSTK_SHSTK)) {
++ if (args->shadow_stack || args->shadow_stack_size)
++ return (unsigned long)ERR_PTR(-EINVAL);
+
+- /*
+- * For CLONE_VFORK the child will share the parents shadow stack.
+- * Make sure to clear the internal tracking of the thread shadow
+- * stack so the freeing logic run for child knows to leave it alone.
+- */
+- if (clone_flags & CLONE_VFORK) {
+- shstk->base = 0;
+- shstk->size = 0;
+ return 0;
+ }
+
+ /*
+- * For !CLONE_VM the child will use a copy of the parents shadow
+- * stack.
++ * If the user specified a shadow stack then do some basic
++ * validation and use it, otherwise fall back to a default
++ * shadow stack size if the clone_flags don't indicate an
++ * allocation is unneeded.
+ */
+- if (!(clone_flags & CLONE_VM))
+- return 0;
++ if (args->shadow_stack) {
++ addr = args->shadow_stack;
++ size = args->shadow_stack_size;
++
++ /* There should be a valid token at the top of the stack. */
++ if (!shstk_consume_token(tsk, addr + size - sizeof(u64))) {
++ shstk->base = 0;
++ shstk->size = 0;
++ return (unsigned long)ERR_PTR(-EINVAL);
++ }
++ } else {
++ /*
++ * For CLONE_VFORK the child will share the parents
++ * shadow stack. Make sure to clear the internal
++ * tracking of the thread shadow stack so the freeing
++ * logic run for child knows to leave it alone.
++ */
++ if (clone_flags & CLONE_VFORK) {
++ shstk->base = 0;
++ shstk->size = 0;
++ return 0;
++ }
+
+- size = adjust_shstk_size(stack_size);
+- addr = alloc_shstk(0, size, 0, false);
+- if (IS_ERR_VALUE(addr))
+- return addr;
++ /*
++ * For !CLONE_VM the child will use a copy of the
++ * parents shadow stack.
++ */
++ if (!(clone_flags & CLONE_VM))
++ return 0;
+
+- shstk->base = addr;
+- shstk->size = size;
++ size = args->stack_size;
++ size = adjust_shstk_size(size);
++ addr = alloc_shstk(0, size, 0, false);
++ if (IS_ERR_VALUE(addr))
++ return addr;
++
++ /* We allocated the shadow stack, we should deallocate it. */
++ shstk->base = addr;
++ shstk->size = size;
++ }
+
+ return addr + size;
+ }
+diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
+index d362aacf9f89..dd577e8dc881 100644
+--- a/include/linux/sched/task.h
++++ b/include/linux/sched/task.h
+@@ -43,6 +43,8 @@ struct kernel_clone_args {
+ void *fn_arg;
+ struct cgroup *cgrp;
+ struct css_set *cset;
++ unsigned long shadow_stack;
++ unsigned long shadow_stack_size;
+ };
+
+ /*
+diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
+index 3bac0a8ceab2..8b7af52548fd 100644
+--- a/include/uapi/linux/sched.h
++++ b/include/uapi/linux/sched.h
+@@ -84,6 +84,10 @@
+ * kernel's limit of nested PID namespaces.
+ * @cgroup: If CLONE_INTO_CGROUP is specified set this to
+ * a file descriptor for the cgroup.
++ * @shadow_stack: Pointer to the memory allocated for the child
++ * shadow stack.
++ * @shadow_stack_size: Specify the size of the shadow stack for
++ * the child process.
+ *
+ * The structure is versioned by size and thus extensible.
+ * New struct members must go at the end of the struct and
+@@ -101,12 +105,15 @@ struct clone_args {
+ __aligned_u64 set_tid;
+ __aligned_u64 set_tid_size;
+ __aligned_u64 cgroup;
++ __aligned_u64 shadow_stack;
++ __aligned_u64 shadow_stack_size;
+ };
+ #endif
+
+-#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+-#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+-#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
++#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
++#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
++#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
++#define CLONE_ARGS_SIZE_VER3 104 /* sizeof fourth published struct */
+
+ /*
+ * Scheduling policies
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 0d944e92a43f..fca041cc2b8a 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -123,6 +123,11 @@
+ */
+ #define MAX_THREADS FUTEX_TID_MASK
+
++/*
++ * Require that shadow stacks can store at least one element
++ */
++#define SHADOW_STACK_SIZE_MIN sizeof(void *)
++
+ /*
+ * Protected counters by write_lock_irq(&tasklist_lock)
+ */
+@@ -3062,7 +3067,9 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
+ CLONE_ARGS_SIZE_VER1);
+ BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) !=
+ CLONE_ARGS_SIZE_VER2);
+- BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2);
++ BUILD_BUG_ON(offsetofend(struct clone_args, shadow_stack_size) !=
++ CLONE_ARGS_SIZE_VER3);
++ BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER3);
+
+ if (unlikely(usize > PAGE_SIZE))
+ return -E2BIG;
+@@ -3095,16 +3102,18 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
+ return -EINVAL;
+
+ *kargs = (struct kernel_clone_args){
+- .flags = args.flags,
+- .pidfd = u64_to_user_ptr(args.pidfd),
+- .child_tid = u64_to_user_ptr(args.child_tid),
+- .parent_tid = u64_to_user_ptr(args.parent_tid),
+- .exit_signal = args.exit_signal,
+- .stack = args.stack,
+- .stack_size = args.stack_size,
+- .tls = args.tls,
+- .set_tid_size = args.set_tid_size,
+- .cgroup = args.cgroup,
++ .flags = args.flags,
++ .pidfd = u64_to_user_ptr(args.pidfd),
++ .child_tid = u64_to_user_ptr(args.child_tid),
++ .parent_tid = u64_to_user_ptr(args.parent_tid),
++ .exit_signal = args.exit_signal,
++ .stack = args.stack,
++ .stack_size = args.stack_size,
++ .tls = args.tls,
++ .set_tid_size = args.set_tid_size,
++ .cgroup = args.cgroup,
++ .shadow_stack = args.shadow_stack,
++ .shadow_stack_size = args.shadow_stack_size,
+ };
+
+ if (args.set_tid &&
+@@ -3145,6 +3154,34 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
+ return true;
+ }
+
++/**
++ * clone3_shadow_stack_valid - check and prepare shadow stack
++ * @kargs: kernel clone args
++ *
++ * Verify that shadow stacks are only enabled if supported.
++ */
++static inline bool clone3_shadow_stack_valid(struct kernel_clone_args *kargs)
++{
++ if (kargs->shadow_stack) {
++ if (!kargs->shadow_stack_size)
++ return false;
++
++ if (kargs->shadow_stack_size < SHADOW_STACK_SIZE_MIN)
++ return false;
++
++ if (kargs->shadow_stack_size > rlimit(RLIMIT_STACK))
++ return false;
++
++ /*
++ * The architecture must check support on the specific
++ * machine.
++ */
++ return IS_ENABLED(CONFIG_ARCH_HAS_USER_SHADOW_STACK);
++ } else {
++ return !kargs->shadow_stack_size;
++ }
++}
++
+ static bool clone3_args_valid(struct kernel_clone_args *kargs)
+ {
+ /* Verify that no unknown flags are passed along. */
+@@ -3167,7 +3204,7 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
+ kargs->exit_signal)
+ return false;
+
+- if (!clone3_stack_valid(kargs))
++ if (!clone3_stack_valid(kargs) || !clone3_shadow_stack_valid(kargs))
+ return false;
+
+ return true;
+--
+2.34.1
+
+
+From 3f6f2af71e1803c3e2d48f08c3f364efdaec5fcd Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 19 Oct 2023 15:43:49 +0100
+Subject: [PATCH 06/47] selftests/clone3: Factor more of main loop into
+ test_clone3()
+
+In order to make it easier to add more configuration for the tests and
+more support for runtime detection of when tests can be run pass the
+structure describing the tests into test_clone3() rather than picking
+the arguments out of it and have that function do all the per-test work.
+
+No functional change.
+
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/clone3/clone3.c | 77 ++++++++++++-------------
+ 1 file changed, 37 insertions(+), 40 deletions(-)
+
+diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
+index 3c9bf0cd82a8..1108bd8e36d6 100644
+--- a/tools/testing/selftests/clone3/clone3.c
++++ b/tools/testing/selftests/clone3/clone3.c
+@@ -30,6 +30,19 @@ enum test_mode {
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG,
+ };
+
++typedef bool (*filter_function)(void);
++typedef size_t (*size_function)(void);
++
++struct test {
++ const char *name;
++ uint64_t flags;
++ size_t size;
++ size_function size_function;
++ int expected;
++ enum test_mode test_mode;
++ filter_function filter;
++};
++
+ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
+ {
+ struct __clone_args args = {
+@@ -104,30 +117,40 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
+ return 0;
+ }
+
+-static bool test_clone3(uint64_t flags, size_t size, int expected,
+- enum test_mode test_mode)
++static void test_clone3(const struct test *test)
+ {
++ size_t size;
+ int ret;
+
++ if (test->filter && test->filter()) {
++ ksft_test_result_skip("%s\n", test->name);
++ return;
++ }
++
++ if (test->size_function)
++ size = test->size_function();
++ else
++ size = test->size;
++
++ ksft_print_msg("Running test '%s'\n", test->name);
++
+ ksft_print_msg(
+ "[%d] Trying clone3() with flags %#" PRIx64 " (size %zu)\n",
+- getpid(), flags, size);
+- ret = call_clone3(flags, size, test_mode);
++ getpid(), test->flags, size);
++ ret = call_clone3(test->flags, size, test->test_mode);
+ ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n",
+- getpid(), ret, expected);
+- if (ret != expected) {
++ getpid(), ret, test->expected);
++ if (ret != test->expected) {
+ ksft_print_msg(
+ "[%d] Result (%d) is different than expected (%d)\n",
+- getpid(), ret, expected);
+- return false;
++ getpid(), ret, test->expected);
++ ksft_test_result_fail("%s\n", test->name);
++ return;
+ }
+
+- return true;
++ ksft_test_result_pass("%s\n", test->name);
+ }
+
+-typedef bool (*filter_function)(void);
+-typedef size_t (*size_function)(void);
+-
+ static bool not_root(void)
+ {
+ if (getuid() != 0) {
+@@ -155,16 +178,6 @@ static size_t page_size_plus_8(void)
+ return getpagesize() + 8;
+ }
+
+-struct test {
+- const char *name;
+- uint64_t flags;
+- size_t size;
+- size_function size_function;
+- int expected;
+- enum test_mode test_mode;
+- filter_function filter;
+-};
+-
+ static const struct test tests[] = {
+ {
+ .name = "simple clone3()",
+@@ -314,24 +327,8 @@ int main(int argc, char *argv[])
+ ksft_set_plan(ARRAY_SIZE(tests));
+ test_clone3_supported();
+
+- for (i = 0; i < ARRAY_SIZE(tests); i++) {
+- if (tests[i].filter && tests[i].filter()) {
+- ksft_test_result_skip("%s\n", tests[i].name);
+- continue;
+- }
+-
+- if (tests[i].size_function)
+- size = tests[i].size_function();
+- else
+- size = tests[i].size;
+-
+- ksft_print_msg("Running test '%s'\n", tests[i].name);
+-
+- ksft_test_result(test_clone3(tests[i].flags, size,
+- tests[i].expected,
+- tests[i].test_mode),
+- "%s\n", tests[i].name);
+- }
++ for (i = 0; i < ARRAY_SIZE(tests); i++)
++ test_clone3(&tests[i]);
+
+ ksft_finished();
+ }
+--
+2.34.1
+
+
+From 19b4898b0f2850497f787d1e5a3d7a6910d3ca57 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 19 Oct 2023 16:15:08 +0100
+Subject: [PATCH 07/47] selftests/clone3: Allow tests to flag if -E2BIG is a
+ valid error code
+
+The clone_args structure is extensible, with the syscall passing in the
+length of the structure. Inside the kernel we use copy_struct_from_user()
+to read the struct but this has the unfortunate side effect of silently
+accepting some overrun in the structure size providing the extra data is
+all zeros. This means that we can't discover the clone3() features that
+the running kernel supports by simply probing with various struct sizes.
+We need to check this for the benefit of test systems which run newer
+kselftests on old kernels.
+
+Add a flag which can be set on a test to indicate that clone3() may return
+-E2BIG due to the use of newer struct versions. Currently no tests need
+this but it will become an issue for testing clone3() support for shadow
+stacks, the support for shadow stacks is already present on x86.
+
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/clone3/clone3.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
+index 1108bd8e36d6..6adbfd14c841 100644
+--- a/tools/testing/selftests/clone3/clone3.c
++++ b/tools/testing/selftests/clone3/clone3.c
+@@ -39,6 +39,7 @@ struct test {
+ size_t size;
+ size_function size_function;
+ int expected;
++ bool e2big_valid;
+ enum test_mode test_mode;
+ filter_function filter;
+ };
+@@ -141,6 +142,11 @@ static void test_clone3(const struct test *test)
+ ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n",
+ getpid(), ret, test->expected);
+ if (ret != test->expected) {
++ if (test->e2big_valid && ret == -E2BIG) {
++ ksft_print_msg("Test reported -E2BIG\n");
++ ksft_test_result_skip("%s\n", test->name);
++ return;
++ }
+ ksft_print_msg(
+ "[%d] Result (%d) is different than expected (%d)\n",
+ getpid(), ret, test->expected);
+--
+2.34.1
+
+
+From 295f1b6a27b9621402b0d3abeb15a3d2c39a7ddb Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 18 Oct 2023 23:09:49 +0100
+Subject: [PATCH 08/47] selftests/clone3: Test shadow stack support
+
+Add basic test coverage for specifying the shadow stack for a newly
+created thread via clone3(), including coverage of the newly extended
+argument structure.
+
+In order to facilitate testing on systems without userspace shadow stack
+support we manually enable shadow stacks on startup, this is architecture
+specific due to the use of an arch_prctl() on x86. Due to interactions with
+potential userspace locking of features we actually detect support for
+shadow stacks on the running system by attempting to allocate a shadow
+stack page during initialisation using map_shadow_stack(), warning if this
+succeeds when the enable failed.
+
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/clone3/clone3.c | 128 ++++++++++++++++++
+ .../selftests/clone3/clone3_selftests.h | 8 ++
+ 2 files changed, 136 insertions(+)
+
+diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
+index 6adbfd14c841..c468d9b87bd5 100644
+--- a/tools/testing/selftests/clone3/clone3.c
++++ b/tools/testing/selftests/clone3/clone3.c
+@@ -3,6 +3,7 @@
+ /* Based on Christian Brauner's clone3() example */
+
+ #define _GNU_SOURCE
++#include <asm/mman.h>
+ #include <errno.h>
+ #include <inttypes.h>
+ #include <linux/types.h>
+@@ -11,6 +12,7 @@
+ #include <stdint.h>
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <sys/mman.h>
+ #include <sys/syscall.h>
+ #include <sys/types.h>
+ #include <sys/un.h>
+@@ -19,8 +21,12 @@
+ #include <sched.h>
+
+ #include "../kselftest.h"
++#include "../ksft_shstk.h"
+ #include "clone3_selftests.h"
+
++static bool shadow_stack_supported;
++static size_t max_supported_args_size;
++
+ enum test_mode {
+ CLONE3_ARGS_NO_TEST,
+ CLONE3_ARGS_ALL_0,
+@@ -28,6 +34,10 @@ enum test_mode {
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG,
++ CLONE3_ARGS_SHADOW_STACK,
++ CLONE3_ARGS_SHADOW_STACK_NO_SIZE,
++ CLONE3_ARGS_SHADOW_STACK_NO_POINTER,
++ CLONE3_ARGS_SHADOW_STACK_NO_TOKEN,
+ };
+
+ typedef bool (*filter_function)(void);
+@@ -44,6 +54,43 @@ struct test {
+ filter_function filter;
+ };
+
++/*
++ * We check for shadow stack support by attempting to use
++ * map_shadow_stack() since features may have been locked by the
++ * dynamic linker resulting in spurious errors when we attempt to
++ * enable on startup. We warn if the enable failed.
++ */
++static void test_shadow_stack_supported(void)
++{
++ long ret;
++
++ ret = syscall(__NR_map_shadow_stack, 0, getpagesize(), 0);
++ if (ret == -1) {
++ ksft_print_msg("map_shadow_stack() not supported\n");
++ } else if ((void *)ret == MAP_FAILED) {
++ ksft_print_msg("Failed to map shadow stack\n");
++ } else {
++ ksft_print_msg("Shadow stack supportd\n");
++ shadow_stack_supported = true;
++
++ if (!shadow_stack_enabled)
++ ksft_print_msg("Mapped but did not enable shadow stack\n");
++ }
++}
++
++static unsigned long long get_shadow_stack_page(unsigned long flags)
++{
++ unsigned long long page;
++
++ page = syscall(__NR_map_shadow_stack, 0, getpagesize(), flags);
++ if ((void *)page == MAP_FAILED) {
++ ksft_print_msg("map_shadow_stack() failed: %d\n", errno);
++ return 0;
++ }
++
++ return page;
++}
++
+ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
+ {
+ struct __clone_args args = {
+@@ -89,6 +136,20 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG:
+ args.exit_signal = 0x00000000000000f0ULL;
+ break;
++ case CLONE3_ARGS_SHADOW_STACK:
++ args.shadow_stack = get_shadow_stack_page(SHADOW_STACK_SET_TOKEN);
++ args.shadow_stack_size = getpagesize();
++ break;
++ case CLONE3_ARGS_SHADOW_STACK_NO_POINTER:
++ args.shadow_stack_size = getpagesize();
++ break;
++ case CLONE3_ARGS_SHADOW_STACK_NO_SIZE:
++ args.shadow_stack = get_shadow_stack_page(SHADOW_STACK_SET_TOKEN);
++ break;
++ case CLONE3_ARGS_SHADOW_STACK_NO_TOKEN:
++ args.shadow_stack = get_shadow_stack_page(0);
++ args.shadow_stack_size = getpagesize();
++ break;
+ }
+
+ memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
+@@ -179,6 +240,26 @@ static bool no_timenamespace(void)
+ return true;
+ }
+
++static bool have_shadow_stack(void)
++{
++ if (shadow_stack_supported) {
++ ksft_print_msg("Shadow stack supported\n");
++ return true;
++ }
++
++ return false;
++}
++
++static bool no_shadow_stack(void)
++{
++ if (!shadow_stack_supported) {
++ ksft_print_msg("Shadow stack not supported\n");
++ return true;
++ }
++
++ return false;
++}
++
+ static size_t page_size_plus_8(void)
+ {
+ return getpagesize() + 8;
+@@ -322,6 +403,50 @@ static const struct test tests[] = {
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
++ {
++ .name = "Shadow stack on system with shadow stack",
++ .flags = CLONE_VM,
++ .size = 0,
++ .expected = 0,
++ .e2big_valid = true,
++ .test_mode = CLONE3_ARGS_SHADOW_STACK,
++ .filter = no_shadow_stack,
++ },
++ {
++ .name = "Shadow stack with no pointer",
++ .flags = CLONE_VM,
++ .size = 0,
++ .expected = -EINVAL,
++ .e2big_valid = true,
++ .test_mode = CLONE3_ARGS_SHADOW_STACK_NO_POINTER,
++ },
++ {
++ .name = "Shadow stack with no size",
++ .flags = CLONE_VM,
++ .size = 0,
++ .expected = -EINVAL,
++ .e2big_valid = true,
++ .test_mode = CLONE3_ARGS_SHADOW_STACK_NO_SIZE,
++ .filter = no_shadow_stack,
++ },
++ {
++ .name = "Shadow stack with no token",
++ .flags = CLONE_VM,
++ .size = 0,
++ .expected = -EINVAL,
++ .e2big_valid = true,
++ .test_mode = CLONE3_ARGS_SHADOW_STACK_NO_TOKEN,
++ .filter = no_shadow_stack,
++ },
++ {
++ .name = "Shadow stack on system without shadow stack",
++ .flags = CLONE_VM,
++ .size = 0,
++ .expected = -EINVAL,
++ .e2big_valid = true,
++ .test_mode = CLONE3_ARGS_SHADOW_STACK,
++ .filter = have_shadow_stack,
++ },
+ };
+
+ int main(int argc, char *argv[])
+@@ -329,9 +454,12 @@ int main(int argc, char *argv[])
+ size_t size;
+ int i;
+
++ enable_shadow_stack();
++
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ test_clone3_supported();
++ test_shadow_stack_supported();
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++)
+ test_clone3(&tests[i]);
+diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
+index 3d2663fe50ba..1011dae85098 100644
+--- a/tools/testing/selftests/clone3/clone3_selftests.h
++++ b/tools/testing/selftests/clone3/clone3_selftests.h
+@@ -31,6 +31,14 @@ struct __clone_args {
+ __aligned_u64 set_tid;
+ __aligned_u64 set_tid_size;
+ __aligned_u64 cgroup;
++#ifndef CLONE_ARGS_SIZE_VER2
++#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
++#endif
++ __aligned_u64 shadow_stack;
++ __aligned_u64 shadow_stack_size;
++#ifndef CLONE_ARGS_SIZE_VER3
++#define CLONE_ARGS_SIZE_VER3 104 /* sizeof fourth published struct */
++#endif
+ };
+
+ static pid_t sys_clone3(struct __clone_args *args, size_t size)
+--
+2.34.1
+
+
+From e6c930b757134d3ad80f5b1a04ddba670b212abb Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 16 Aug 2023 17:33:47 +0100
+Subject: [PATCH 10/47] arm64/mm: Restructure arch_validate_flags() for
+ extensibility
+
+Currently arch_validate_flags() is written in a very non-extensible
+fashion, returning immediately if MTE is not supported and writing the MTE
+check as a direct return. Since we will want to add more checks for GCS
+refactor the existing code to be more extensible, no functional change
+intended.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/mman.h | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
+index 5966ee4a6154..c21849ffdd88 100644
+--- a/arch/arm64/include/asm/mman.h
++++ b/arch/arm64/include/asm/mman.h
+@@ -52,11 +52,17 @@ static inline bool arch_validate_prot(unsigned long prot,
+
+ static inline bool arch_validate_flags(unsigned long vm_flags)
+ {
+- if (!system_supports_mte())
+- return true;
++ if (system_supports_mte()) {
++ /*
++ * only allow VM_MTE if VM_MTE_ALLOWED has been set
++ * previously
++ */
++ if ((vm_flags & VM_MTE) && !(vm_flags & VM_MTE_ALLOWED))
++ return false;
++ }
++
++ return true;
+
+- /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */
+- return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED);
+ }
+ #define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags)
+
+--
+2.34.1
+
+
+From 33a83dfb0883de5bb5e1577423a213193aff4677 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Sun, 12 Feb 2023 20:53:44 -0800
+Subject: [PATCH 11/47] prctl: arch-agnostic prctl for shadow stack
+
+Three architectures (x86, aarch64, riscv) have announced support for
+shadow stacks with fairly similar functionality. While x86 is using
+arch_prctl() to control the functionality neither arm64 nor riscv uses
+that interface so this patch adds arch-agnostic prctl() support to
+get and set status of shadow stacks and lock the current configuation to
+prevent further changes, with support for turning on and off individual
+subfeatures so applications can limit their exposure to features that
+they do not need. The features are:
+
+ - PR_SHADOW_STACK_ENABLE: Tracking and enforcement of shadow stacks,
+ including allocation of a shadow stack if one is not already
+ allocated.
+ - PR_SHADOW_STACK_WRITE: Writes to specific addresses in the shadow
+ stack.
+ - PR_SHADOW_STACK_PUSH: Push additional values onto the shadow stack.
+
+These features are expected to be inherited by new threads and cleared
+on exec(), unknown features should be rejected for enable but accepted
+for locking (in order to allow for future proofing).
+
+This is based on a patch originally written by Deepak Gupta but modified
+fairly heavily, support for indirect landing pads is removed, additional
+modes added and the locking interface reworked. The set status prctl()
+is also reworked to just set flags, if setting/reading the shadow stack
+pointer is required this could be a separate prctl.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ include/linux/mm.h | 4 ++++
+ include/uapi/linux/prctl.h | 22 ++++++++++++++++++++++
+ kernel/sys.c | 30 ++++++++++++++++++++++++++++++
+ 3 files changed, 56 insertions(+)
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index c0a782eda803..0b1139c5df60 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -4182,4 +4182,8 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
+ return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE);
+ }
+
++int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status);
++int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status);
++int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
++
+ #endif /* _LINUX_MM_H */
+diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
+index 370ed14b1ae0..3c66ed8f46d8 100644
+--- a/include/uapi/linux/prctl.h
++++ b/include/uapi/linux/prctl.h
+@@ -306,4 +306,26 @@ struct prctl_mm_map {
+ # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
+ # define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
+
++/*
++ * Get the current shadow stack configuration for the current thread,
++ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
++ */
++#define PR_GET_SHADOW_STACK_STATUS 71
++
++/*
++ * Set the current shadow stack configuration. Enabling the shadow
++ * stack will cause a shadow stack to be allocated for the thread.
++ */
++#define PR_SET_SHADOW_STACK_STATUS 72
++# define PR_SHADOW_STACK_ENABLE (1UL << 0)
++# define PR_SHADOW_STACK_WRITE (1UL << 1)
++# define PR_SHADOW_STACK_PUSH (1UL << 2)
++
++/*
++ * Prevent further changes to the specified shadow stack
++ * configuration. All bits may be locked via this call, including
++ * undefined bits.
++ */
++#define PR_LOCK_SHADOW_STACK_STATUS 73
++
+ #endif /* _LINUX_PRCTL_H */
+diff --git a/kernel/sys.c b/kernel/sys.c
+index f8e543f1e38a..242e9f147791 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -2315,6 +2315,21 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
+ return -EINVAL;
+ }
+
++int __weak arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status)
++{
++ return -EINVAL;
++}
++
++int __weak arch_set_shadow_stack_status(struct task_struct *t, unsigned long status)
++{
++ return -EINVAL;
++}
++
++int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status)
++{
++ return -EINVAL;
++}
++
+ #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
+
+ #ifdef CONFIG_ANON_VMA_NAME
+@@ -2757,6 +2772,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
+ case PR_RISCV_V_GET_CONTROL:
+ error = RISCV_V_GET_CONTROL();
+ break;
++ case PR_GET_SHADOW_STACK_STATUS:
++ if (arg3 || arg4 || arg5)
++ return -EINVAL;
++ error = arch_get_shadow_stack_status(me, (unsigned long __user *) arg2);
++ break;
++ case PR_SET_SHADOW_STACK_STATUS:
++ if (arg3 || arg4 || arg5)
++ return -EINVAL;
++ error = arch_set_shadow_stack_status(me, arg2);
++ break;
++ case PR_LOCK_SHADOW_STACK_STATUS:
++ if (arg3 || arg4 || arg5)
++ return -EINVAL;
++ error = arch_lock_shadow_stack_status(me, arg2);
++ break;
+ default:
+ error = -EINVAL;
+ break;
+--
+2.34.1
+
+
+From d16e43b333735c0ce01575c280197da1989e9739 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 4 Aug 2023 14:50:18 +0100
+Subject: [PATCH 12/47] mman: Add map_shadow_stack() flags
+
+In preparation for adding arm64 GCS support make the map_shadow_stack()
+SHADOW_STACK_SET_TOKEN flag generic and add _SET_MARKER. The existing
+flag indicats that a token usable for stack switch should be added to
+the top of the newly mapped GCS region while the new flag indicates that
+a top of stack marker suitable for use by unwinders should be added
+above that.
+
+For arm64 the top of stack marker is all bits 0.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/x86/include/uapi/asm/mman.h | 3 ---
+ include/uapi/asm-generic/mman.h | 4 ++++
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
+index 46cdc941f958..ac1e6277212b 100644
+--- a/arch/x86/include/uapi/asm/mman.h
++++ b/arch/x86/include/uapi/asm/mman.h
+@@ -5,9 +5,6 @@
+ #define MAP_32BIT 0x40 /* only give out 32bit addresses */
+ #define MAP_ABOVE4G 0x80 /* only map above 4GB */
+
+-/* Flags for map_shadow_stack(2) */
+-#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
+-
+ #include <asm-generic/mman.h>
+
+ #endif /* _ASM_X86_MMAN_H */
+diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
+index 57e8195d0b53..d6a282687af5 100644
+--- a/include/uapi/asm-generic/mman.h
++++ b/include/uapi/asm-generic/mman.h
+@@ -19,4 +19,8 @@
+ #define MCL_FUTURE 2 /* lock all future mappings */
+ #define MCL_ONFAULT 4 /* lock all pages that are faulted in */
+
++#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
++#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack merker in the shadow stack */
++
++
+ #endif /* __ASM_GENERIC_MMAN_H */
+--
+2.34.1
+
+
+From ff25ae9e38129288ebbeabf5a53360a074157b0a Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 3 Mar 2023 17:16:43 +0000
+Subject: [PATCH 13/47] arm64: Document boot requirements for Guarded Control
+ Stacks
+
+FEAT_GCS introduces a number of new system registers, we require that
+access to these registers is not trapped when we identify that the feature
+is detected.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ Documentation/arch/arm64/booting.rst | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
+index b57776a68f15..de3679770c64 100644
+--- a/Documentation/arch/arm64/booting.rst
++++ b/Documentation/arch/arm64/booting.rst
+@@ -411,6 +411,28 @@ Before jumping into the kernel, the following conditions must be met:
+
+ - HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
+
++ - For features with Guarded Control Stacks (FEAT_GCS):
++
++ - If EL3 is present:
++
++ - SCR_EL3.GCSEn (bit 39) must be initialised to 0b1.
++
++ - If the kernel is entered at EL1 and EL2 is present:
++
++ - HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1.
++
++ - HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1.
++
++ - HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1.
++
++ - HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
++
++ - HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
++
++ - HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
++
++ - HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
++
+ The requirements described above for CPU mode, caches, MMUs, architected
+ timers, coherency and system registers apply to all CPUs. All CPUs must
+ enter the kernel in the same exception level. Where the values documented
+--
+2.34.1
+
+
+From 5867bb3606500d88935829c424bcbd0c1afe0277 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Tue, 4 Jul 2023 00:17:55 +0100
+Subject: [PATCH 14/47] arm64/gcs: Document the ABI for Guarded Control Stacks
+
+Add some documentation of the userspace ABI for Guarded Control Stacks.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ Documentation/arch/arm64/gcs.rst | 233 +++++++++++++++++++++++++++++
+ Documentation/arch/arm64/index.rst | 1 +
+ 2 files changed, 234 insertions(+)
+ create mode 100644 Documentation/arch/arm64/gcs.rst
+
+diff --git a/Documentation/arch/arm64/gcs.rst b/Documentation/arch/arm64/gcs.rst
+new file mode 100644
+index 000000000000..c45c0326836a
+--- /dev/null
++++ b/Documentation/arch/arm64/gcs.rst
+@@ -0,0 +1,233 @@
++===============================================
++Guarded Control Stack support for AArch64 Linux
++===============================================
++
++This document outlines briefly the interface provided to userspace by Linux in
++order to support use of the ARM Guarded Control Stack (GCS) feature.
++
++This is an outline of the most important features and issues only and not
++intended to be exhaustive.
++
++
++
++1. General
++-----------
++
++* GCS is an architecture feature intended to provide greater protection
++ against return oriented programming (ROP) attacks and to simplify the
++ implementation of features that need to collect stack traces such as
++ profiling.
++
++* When GCS is enabled a separate guarded control stack is maintained by the
++ PE which is writeable only through specific GCS operations. This
++ stores the call stack only, when a procedure call instruction is
++ performed the current PC is pushed onto the GCS and on RET the
++ address in the LR is verified against that on the top of the GCS.
++
++* When active current GCS pointer is stored in the system register
++ GCSPR_EL0. This is readable by userspace but can only be updated
++ via specific GCS instructions.
++
++* The architecture provides instructions for switching between guarded
++ control stacks with checks to ensure that the new stack is a valid
++ target for switching.
++
++* The functionality of GCS is similar to that provided by the x86 Shadow
++ Stack feature, due to sharing of userspace interfaces the ABI refers to
++ shadow stacks rather than GCS.
++
++* Support for GCS is reported to userspace via HWCAP2_GCS in the aux vector
++ AT_HWCAP2 entry.
++
++* GCS is enabled per thread. While there is support for disabling GCS
++ at runtime this should be done with great care.
++
++* GCS memory access faults are reported as normal memory access faults.
++
++* GCS specific errors (those reported with EC 0x2d) will be reported as
++ SIGSEGV with a si_code of SEGV_CPERR (control protection error).
++
++* GCS is supported only for AArch64.
++
++* On systems where GCS is supported GCSPR_EL0 is always readable by EL0
++ regardless of the GCS configuration for the thread.
++
++* The architecture supports enabling GCS without verifying that return values
++ in LR match those in the GCS, the LR will be ignored. This is not supported
++ by Linux.
++
++* EL0 GCS entries with bit 63 set are reserved for use, one such use is defined
++ below for signals and should be ignored when parsing the stack if not
++ understood.
++
++
++2. Enabling and disabling Guarded Control Stacks
++-------------------------------------------------
++
++* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS
++ prctl(), this takes a single flags argument specifying which GCS features
++ should be used.
++
++* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack
++ and enables GCS for the thread, enabling the functionality controlled by
++ GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}.
++
++* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled
++ by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes.
++
++* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled
++ by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack.
++
++* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL.
++
++* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same
++ values as used for PR_SET_SHADOW_STACK_STATUS. Any future changes to the
++ status of the specified GCS mode bits will be rejected.
++
++* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows
++ userspace to prevent changes to any future features.
++
++* There is no support for a process to remove a lock that has been set for
++ it.
++
++* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the
++ thread that called them, any other running threads will be unaffected.
++
++* New threads inherit the GCS configuration of the thread that created them.
++
++* GCS is disabled on exec().
++
++* The current GCS configuration for a thread may be read with the
++ PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that
++ are passed to PR_SET_SHADOW_STACK_STATUS.
++
++* If GCS is disabled for a thread after having previously been enabled then
++ the stack will remain allocated for the lifetime of the thread. At present
++ any attempt to reenable GCS for the thread will be rejected, this may be
++ revisited in future.
++
++* It should be noted that since enabling GCS will result in GCS becoming
++ active immediately it is not normally possible to return from the function
++ that invoked the prctl() that enabled GCS. It is expected that the normal
++ usage will be that GCS is enabled very early in execution of a program.
++
++
++
++3. Allocation of Guarded Control Stacks
++----------------------------------------
++
++* When GCS is enabled for a thread a new Guarded Control Stack will be
++ allocated for it of size RLIMIT_STACK or 4 gigabytes, whichever is
++ smaller.
++
++* When a new thread is created by a thread which has GCS enabled then a
++ new Guarded Control Stack will be allocated for the new thread with
++ half the size of the standard stack.
++
++* When a stack is allocated by enabling GCS or during thread creation then
++ the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will
++ be set to point to the address of this 0 value, this can be used to
++ detect the top of the stack.
++
++* Additional Guarded Control Stacks can be allocated using the
++ map_shadow_stack() system call.
++
++* Stacks allocated using map_shadow_stack() can optionally have an end of
++ stack marker and cap placed at the top of the stack. If the flag
++ SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack,
++ if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8
++ bytes of the stack and if it is specified then the cap will be the next
++ 8 bytes. While specifying just SHADOW_STACK_SET_MARKER by itself is
++ valid since the marker is all bits 0 it has no observable effect.
++
++* Stacks allocated using map_shadow_stack() must have a size which is a
++ multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned.
++
++* An address can be specified to map_shadow_stack(), if one is provided then
++ it must be aligned to a page boundary.
++
++* When a thread is freed the Guarded Control Stack initially allocated for
++ that thread will be freed. Note carefully that if the stack has been
++ switched this may not be the stack currently in use by the thread.
++
++
++4. Signal handling
++--------------------
++
++* A new signal frame record gcs_context encodes the current GCS mode and
++ pointer for the interrupted context on signal delivery. This will always
++ be present on systems that support GCS.
++
++* The record contains a flag field which reports the current GCS configuration
++ for the interrupted context as PR_GET_SHADOW_STACK_STATUS would.
++
++* The signal handler is run with the same GCS configuration as the interrupted
++ context.
++
++* When GCS is enabled for the interrupted thread a signal handling specific
++ GCS cap token will be written to the GCS, this is an architectural GCS cap
++ token with bit 63 set and the token type (bits 0..11) all clear. The
++ GCSPR_EL0 reported in the signal frame will point to this cap token.
++
++* The signal handler will use the same GCS as the interrupted context.
++
++* When GCS is enabled on signal entry a frame with the address of the signal
++ return handler will be pushed onto the GCS, allowing return from the signal
++ handler via RET as normal. This will not be reported in the gcs_context in
++ the signal frame.
++
++
++5. Signal return
++-----------------
++
++When returning from a signal handler:
++
++* If there is a gcs_context record in the signal frame then the GCS flags
++ and GCSPR_EL0 will be restored from that context prior to further
++ validation.
++
++* If there is no gcs_context record in the signal frame then the GCS
++ configuration will be unchanged.
++
++* If GCS is enabled on return from a signal handler then GCSPR_EL0 must
++ point to a valid GCS signal cap record, this will be popped from the
++ GCS prior to signal return.
++
++* If the GCS configuration is locked when returning from a signal then any
++ attempt to change the GCS configuration will be treated as an error. This
++ is true even if GCS was not enabled prior to signal entry.
++
++* GCS may be disabled via signal return but any attempt to enable GCS via
++ signal return will be rejected.
++
++
++6. ptrace extensions
++---------------------
++
++* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and
++ PTRACE_SETREGSET.
++
++* Due to the complexity surrounding allocation and deallocation of stacks and
++ lack of practical application it is not possible to enable GCS via ptrace.
++ GCS may be disabled via the ptrace interface.
++
++* Other GCS modes may be configured via ptrace.
++
++* Configuration via ptrace ignores locking of GCS mode bits.
++
++
++7. ELF coredump extensions
++---------------------------
++
++* NT_ARM_GCS notes will be added to each coredump for each thread of the
++ dumped process. The contents will be equivalent to the data that would
++ have been read if a PTRACE_GETREGSET of the corresponding type were
++ executed for each thread when the coredump was generated.
++
++
++
++8. /proc extensions
++--------------------
++
++* Guarded Control Stack pages will include "ss" in their VmFlags in
++ /proc/<pid>/smaps.
+diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst
+index d08e924204bf..dcf3ee3eb8c0 100644
+--- a/Documentation/arch/arm64/index.rst
++++ b/Documentation/arch/arm64/index.rst
+@@ -14,6 +14,7 @@ ARM64 Architecture
+ booting
+ cpu-feature-registers
+ elf_hwcaps
++ gcs
+ hugetlbpage
+ kdump
+ legacy_instructions
+--
+2.34.1
+
+
+From 41a7e3b42b2776185f78b4a23ac7a5d3019eb203 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Tue, 20 Jun 2023 19:28:37 +0100
+Subject: [PATCH 15/47] arm64/sysreg: Add definitions for architected GCS caps
+
+The architecture defines a format for guarded control stack caps, used
+to mark the top of an unused GCS in order to limit the potential for
+exploitation via stack switching. Add definitions associated with these.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/sysreg.h | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
+index c3b19b376c86..6ed813e856c1 100644
+--- a/arch/arm64/include/asm/sysreg.h
++++ b/arch/arm64/include/asm/sysreg.h
+@@ -1064,6 +1064,26 @@
+ #define POE_RXW UL(0x7)
+ #define POE_MASK UL(0xf)
+
++/*
++ * Definitions for Guarded Control Stack
++ */
++
++#define GCS_CAP_ADDR_MASK GENMASK(63, 12)
++#define GCS_CAP_ADDR_SHIFT 12
++#define GCS_CAP_ADDR_WIDTH 52
++#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x)
++
++#define GCS_CAP_TOKEN_MASK GENMASK(11, 0)
++#define GCS_CAP_TOKEN_SHIFT 0
++#define GCS_CAP_TOKEN_WIDTH 12
++#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x)
++
++#define GCS_CAP_VALID_TOKEN 0x1
++#define GCS_CAP_IN_PROGRESS_TOKEN 0x5
++
++#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
++ GCS_CAP_VALID_TOKEN)
++
+ #define ARM64_FEATURE_FIELD_BITS 4
+
+ /* Defined for compatibility only, do not add new users. */
+--
+2.34.1
+
+
+From 0c5c6e7f9c231a904a1d04ea1d1a9b1729544fe3 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Tue, 20 Jun 2023 19:31:24 +0100
+Subject: [PATCH 16/47] arm64/gcs: Add manual encodings of GCS instructions
+
+Define C callable functions for GCS instructions used by the kernel. In
+order to avoid ambitious toolchain requirements for GCS support these are
+manually encoded, this means we have fixed register numbers which will be
+a bit limiting for the compiler but none of these should be used in
+sufficiently fast paths for this to be a problem.
+
+Note that GCSSTTR is used to store to EL0.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/gcs.h | 51 ++++++++++++++++++++++++++++++++
+ arch/arm64/include/asm/uaccess.h | 22 ++++++++++++++
+ 2 files changed, 73 insertions(+)
+ create mode 100644 arch/arm64/include/asm/gcs.h
+
+diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
+new file mode 100644
+index 000000000000..7c5e95218db6
+--- /dev/null
++++ b/arch/arm64/include/asm/gcs.h
+@@ -0,0 +1,51 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright (C) 2023 ARM Ltd.
++ */
++#ifndef __ASM_GCS_H
++#define __ASM_GCS_H
++
++#include <asm/types.h>
++#include <asm/uaccess.h>
++
++static inline void gcsb_dsync(void)
++{
++ asm volatile(".inst 0xd503227f" : : : "memory");
++}
++
++static inline void gcsstr(u64 *addr, u64 val)
++{
++ register u64 *_addr __asm__ ("x0") = addr;
++ register long _val __asm__ ("x1") = val;
++
++ /* GCSSTTR x1, x0 */
++ asm volatile(
++ ".inst 0xd91f1c01\n"
++ :
++ : "rZ" (_val), "r" (_addr)
++ : "memory");
++}
++
++static inline void gcsss1(u64 Xt)
++{
++ asm volatile (
++ "sys #3, C7, C7, #2, %0\n"
++ :
++ : "rZ" (Xt)
++ : "memory");
++}
++
++static inline u64 gcsss2(void)
++{
++ u64 Xt;
++
++ asm volatile(
++ "SYSL %0, #3, C7, C7, #3\n"
++ : "=r" (Xt)
++ :
++ : "memory");
++
++ return Xt;
++}
++
++#endif
+diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
+index 14be5000c5a0..22e10e79f56a 100644
+--- a/arch/arm64/include/asm/uaccess.h
++++ b/arch/arm64/include/asm/uaccess.h
+@@ -425,4 +425,26 @@ static inline size_t probe_subpage_writeable(const char __user *uaddr,
+
+ #endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
+
++#ifdef CONFIG_ARM64_GCS
++
++static inline int gcssttr(unsigned long __user *addr, unsigned long val)
++{
++ register unsigned long __user *_addr __asm__ ("x0") = addr;
++ register unsigned long _val __asm__ ("x1") = val;
++ int err = 0;
++
++ /* GCSSTTR x1, x0 */
++ asm volatile(
++ "1: .inst 0xd91f1c01\n"
++ "2: \n"
++ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)
++ : "+r" (err)
++ : "rZ" (_val), "r" (_addr)
++ : "memory");
++
++ return err;
++}
++
++#endif /* CONFIG_ARM64_GCS */
++
+ #endif /* __ASM_UACCESS_H */
+--
+2.34.1
+
+
+From d182ff2531f97a9b48dd0a35f8c36a5b2d541d52 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Sun, 16 Jul 2023 14:43:47 +0100
+Subject: [PATCH 17/47] arm64/gcs: Provide put_user_gcs()
+
+In order for EL1 to write to an EL0 GCS it must use the GCSSTTR instruction
+rather than a normal STTR. Provide a put_user_gcs() which does this.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/uaccess.h | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
+index 22e10e79f56a..e118c3d772c8 100644
+--- a/arch/arm64/include/asm/uaccess.h
++++ b/arch/arm64/include/asm/uaccess.h
+@@ -445,6 +445,24 @@ static inline int gcssttr(unsigned long __user *addr, unsigned long val)
+ return err;
+ }
+
++static inline void put_user_gcs(unsigned long val, unsigned long __user *addr,
++ int *err)
++{
++ int ret;
++
++ if (!access_ok((char __user *)addr, sizeof(u64))) {
++ *err = -EFAULT;
++ return;
++ }
++
++ uaccess_ttbr0_enable();
++ ret = gcssttr(addr, val);
++ if (ret != 0)
++ *err = ret;
++ uaccess_ttbr0_disable();
++}
++
++
+ #endif /* CONFIG_ARM64_GCS */
+
+ #endif /* __ASM_UACCESS_H */
+--
+2.34.1
+
+
+From 98f4b4d4c95150730f81cff8a1a56cec4d3bd9af Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Tue, 7 Mar 2023 22:35:56 +0000
+Subject: [PATCH 18/47] arm64/cpufeature: Runtime detection of Guarded Control
+ Stack (GCS)
+
+Add a cpufeature for GCS, allowing other code to conditionally support it
+at runtime.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/cpufeature.h | 6 ++++++
+ arch/arm64/kernel/cpufeature.c | 16 ++++++++++++++++
+ arch/arm64/tools/cpucaps | 1 +
+ 3 files changed, 23 insertions(+)
+
+diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
+index bd8d4ca81a48..f81a69991394 100644
+--- a/arch/arm64/include/asm/cpufeature.h
++++ b/arch/arm64/include/asm/cpufeature.h
+@@ -825,6 +825,12 @@ static inline bool system_supports_lpa2(void)
+ return cpus_have_final_cap(ARM64_HAS_LPA2);
+ }
+
++static inline bool system_supports_gcs(void)
++{
++ return IS_ENABLED(CONFIG_ARM64_GCS) &&
++ alternative_has_cap_unlikely(ARM64_HAS_GCS);
++}
++
+ int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
+ bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
+
+diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
+index 8d1a634a403e..b606842ab8c1 100644
+--- a/arch/arm64/kernel/cpufeature.c
++++ b/arch/arm64/kernel/cpufeature.c
+@@ -255,6 +255,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+ };
+
+ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
++ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
++ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
+@@ -2250,6 +2252,12 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
+ }
+
++static void cpu_enable_gcs(const struct arm64_cpu_capabilities *__unused)
++{
++ /* GCS is not currently used at EL1 */
++ write_sysreg_s(0, SYS_GCSCR_EL1);
++}
++
+ /* Internal helper functions to match cpu capability type */
+ static bool
+ cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
+@@ -2739,6 +2747,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_lpa2,
+ },
++ {
++ .desc = "Guarded Control Stack (GCS)",
++ .capability = ARM64_HAS_GCS,
++ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
++ .cpu_enable = cpu_enable_gcs,
++ .matches = has_cpuid_feature,
++ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP)
++ },
+ {},
+ };
+
+diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
+index b912b1409fc0..148734504295 100644
+--- a/arch/arm64/tools/cpucaps
++++ b/arch/arm64/tools/cpucaps
+@@ -28,6 +28,7 @@ HAS_EPAN
+ HAS_EVT
+ HAS_FGT
+ HAS_FPSIMD
++HAS_GCS
+ HAS_GENERIC_AUTH
+ HAS_GENERIC_AUTH_ARCH_QARMA3
+ HAS_GENERIC_AUTH_ARCH_QARMA5
+--
+2.34.1
+
+
+From cfa2c80233b74b7e487afbd8fe4e22cdd7c6bb93 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 21 Apr 2023 19:37:37 +0100
+Subject: [PATCH 19/47] arm64/mm: Allocate PIE slots for EL0 guarded control
+ stack
+
+Pages used for guarded control stacks need to be described to the hardware
+using the Permission Indirection Extension, GCS is not supported without
+PIE. In order to support copy on write for guarded stacks we allocate two
+values, one for active GCSs and one for GCS pages marked as read only prior
+to copy.
+
+Since the actual effect is defined using PIE the specific bit pattern used
+does not matter to the hardware but we choose two values which differ only
+in PTE_WRITE in order to help share code with non-PIE cases.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/pgtable-prot.h | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
+index 483dbfa39c4c..14a33e0bece3 100644
+--- a/arch/arm64/include/asm/pgtable-prot.h
++++ b/arch/arm64/include/asm/pgtable-prot.h
+@@ -129,15 +129,23 @@ extern bool arm64_use_ng_mappings;
+ /* 6: PTE_PXN | PTE_WRITE */
+ /* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */
+ /* 8: PAGE_KERNEL_ROX PTE_UXN */
+-/* 9: PTE_UXN | PTE_USER */
++/* 9: PAGE_GCS_RO PTE_UXN | PTE_USER */
+ /* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */
+-/* b: PTE_UXN | PTE_WRITE | PTE_USER */
++/* b: PAGE_GCS PTE_UXN | PTE_WRITE | PTE_USER */
+ /* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */
+ /* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */
+ /* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */
+ /* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */
+
++#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER)
++#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER)
++
++#define PAGE_GCS __pgprot(_PAGE_GCS)
++#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
++
+ #define PIE_E0 ( \
++ PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
++ PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \
+@@ -145,6 +153,8 @@ extern bool arm64_use_ng_mappings;
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW))
+
+ #define PIE_E1 ( \
++ PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \
++ PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
+--
+2.34.1
+
+
+From d3fb78871759fd9e703384609bf1ccce903bdca2 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 14 Apr 2023 20:29:18 +0100
+Subject: [PATCH 20/47] mm: Define VM_SHADOW_STACK for arm64 when we support
+ GCS
+
+Use VM_HIGH_ARCH_5 for guarded control stack pages.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ Documentation/filesystems/proc.rst | 2 +-
+ fs/proc/task_mmu.c | 3 +++
+ include/linux/mm.h | 12 +++++++++++-
+ 3 files changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
+index 104c6d047d9b..0392c3b74650 100644
+--- a/Documentation/filesystems/proc.rst
++++ b/Documentation/filesystems/proc.rst
+@@ -570,7 +570,7 @@ encoded manner. The codes are the following:
+ mt arm64 MTE allocation tags are enabled
+ um userfaultfd missing tracking
+ uw userfaultfd wr-protect tracking
+- ss shadow stack page
++ ss shadow/guarded control stack page
+ == =======================================
+
+ Note that there is no guarantee that every flag and associated mnemonic will
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index ff2c601f7d1c..fb0633d8e309 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -702,6 +702,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
+ #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+ #ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK
+ [ilog2(VM_SHADOW_STACK)] = "ss",
++#endif
++#ifdef CONFIG_ARM64_GCS
++ [ilog2(VM_SHADOW_STACK)] = "ss",
+ #endif
+ };
+ size_t i;
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 0b1139c5df60..6cc304c90c63 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -352,7 +352,17 @@ extern unsigned int kobjsize(const void *objp);
+ * for more details on the guard size.
+ */
+ # define VM_SHADOW_STACK VM_HIGH_ARCH_5
+-#else
++#endif
++
++#if defined(CONFIG_ARM64_GCS)
++/*
++ * arm64's Guarded Control Stack implements similar functionality and
++ * has similar constraints to shadow stacks.
++ */
++# define VM_SHADOW_STACK VM_HIGH_ARCH_5
++#endif
++
++#ifndef VM_SHADOW_STACK
+ # define VM_SHADOW_STACK VM_NONE
+ #endif
+
+--
+2.34.1
+
+
+From 4eb47474ec4e4776a45110f9e9e853f69492ed3f Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 21 Apr 2023 20:53:01 +0100
+Subject: [PATCH 21/47] arm64/mm: Map pages for guarded control stack
+
+Map pages flagged as being part of a GCS as such rather than using the
+full set of generic VM flags.
+
+This is done using a conditional rather than extending the size of
+protection_map since that would make for a very sparse array.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/mman.h | 9 +++++++++
+ arch/arm64/mm/mmap.c | 13 ++++++++++++-
+ 2 files changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
+index c21849ffdd88..6d3fe6433a62 100644
+--- a/arch/arm64/include/asm/mman.h
++++ b/arch/arm64/include/asm/mman.h
+@@ -61,6 +61,15 @@ static inline bool arch_validate_flags(unsigned long vm_flags)
+ return false;
+ }
+
++ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
++ /*
++ * An executable GCS isn't a good idea, and the mm
++ * core can't cope with a shared GCS.
++ */
++ if (vm_flags & (VM_EXEC | VM_ARM64_BTI | VM_SHARED))
++ return false;
++ }
++
+ return true;
+
+ }
+diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
+index 645fe60d000f..e44ce6fcfad9 100644
+--- a/arch/arm64/mm/mmap.c
++++ b/arch/arm64/mm/mmap.c
+@@ -79,9 +79,20 @@ arch_initcall(adjust_protection_map);
+
+ pgprot_t vm_get_page_prot(unsigned long vm_flags)
+ {
+- pteval_t prot = pgprot_val(protection_map[vm_flags &
++ pteval_t prot;
++
++ /* If this is a GCS then only interpret VM_WRITE. */
++ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
++ if (vm_flags & VM_WRITE)
++ prot = _PAGE_GCS;
++ else
++ prot = _PAGE_GCS_RO;
++ } else {
++ prot = pgprot_val(protection_map[vm_flags &
+ (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
++ }
+
++ /* VM_ARM64_BTI on a GCS is rejected in arch_validate_flags() */
+ if (vm_flags & VM_ARM64_BTI)
+ prot |= PTE_GP;
+
+--
+2.34.1
+
+
+From e505761a54185aa1c4de33454fca255918036af0 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 8 Mar 2023 00:40:28 +0000
+Subject: [PATCH 22/47] KVM: arm64: Manage GCS registers for guests
+
+GCS introduces a number of system registers for EL1 and EL0, on systems
+with GCS we need to context switch them and expose them to VMMs to allow
+guests to use GCS, as well as describe their fine grained traps to
+nested virtualisation. Traps are already disabled.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/kvm_host.h | 12 ++++++++++++
+ arch/arm64/kvm/emulate-nested.c | 4 ++++
+ arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 17 +++++++++++++++++
+ arch/arm64/kvm/sys_regs.c | 22 ++++++++++++++++++++++
+ 4 files changed, 55 insertions(+)
+
+diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
+index 21c57b812569..6c7ea7f9cd92 100644
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -388,6 +388,12 @@ enum vcpu_sysreg {
+ GCR_EL1, /* Tag Control Register */
+ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+
++ /* Guarded Control Stack registers */
++ GCSCRE0_EL1, /* Guarded Control Stack Control (EL0) */
++ GCSCR_EL1, /* Guarded Control Stack Control (EL1) */
++ GCSPR_EL0, /* Guarded Control Stack Pointer (EL0) */
++ GCSPR_EL1, /* Guarded Control Stack Pointer (EL1) */
++
+ /* 32bit specific registers. */
+ DACR32_EL2, /* Domain Access Control Register */
+ IFSR32_EL2, /* Instruction Fault Status Register */
+@@ -1221,6 +1227,12 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
+
+ #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
+
++static inline bool has_gcs(void)
++{
++ return IS_ENABLED(CONFIG_ARM64_GCS) &&
++ cpus_have_final_cap(ARM64_HAS_GCS);
++}
++
+ int kvm_trng_call(struct kvm_vcpu *vcpu);
+ #ifdef CONFIG_KVM
+ extern phys_addr_t hyp_mem_base;
+diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
+index 431fd429932d..24eb7eccbae4 100644
+--- a/arch/arm64/kvm/emulate-nested.c
++++ b/arch/arm64/kvm/emulate-nested.c
+@@ -1098,8 +1098,12 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
+ SR_FGT(SYS_ESR_EL1, HFGxTR, ESR_EL1, 1),
+ SR_FGT(SYS_DCZID_EL0, HFGxTR, DCZID_EL0, 1),
+ SR_FGT(SYS_CTR_EL0, HFGxTR, CTR_EL0, 1),
++ SR_FGT(SYS_GCSPR_EL0, HFGxTR, nGCS_EL0, 1),
+ SR_FGT(SYS_CSSELR_EL1, HFGxTR, CSSELR_EL1, 1),
+ SR_FGT(SYS_CPACR_EL1, HFGxTR, CPACR_EL1, 1),
++ SR_FGT(SYS_GCSCR_EL1, HFGxTR, nGCS_EL1, 1),
++ SR_FGT(SYS_GCSPR_EL1, HFGxTR, nGCS_EL1, 1),
++ SR_FGT(SYS_GCSCRE0_EL1, HFGxTR, nGCS_EL0, 1),
+ SR_FGT(SYS_CONTEXTIDR_EL1, HFGxTR, CONTEXTIDR_EL1, 1),
+ SR_FGT(SYS_CLIDR_EL1, HFGxTR, CLIDR_EL1, 1),
+ SR_FGT(SYS_CCSIDR_EL1, HFGxTR, CCSIDR_EL1, 1),
+diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+index bb6b571ec627..ec34d4a90717 100644
+--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
++++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+@@ -25,6 +25,8 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+ {
+ ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0);
+ ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
++ if (has_gcs())
++ ctxt_sys_reg(ctxt, GCSPR_EL0) = read_sysreg_s(SYS_GCSPR_EL0);
+ }
+
+ static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+@@ -62,6 +64,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
+ ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
+ ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
+
++ if (has_gcs()) {
++ ctxt_sys_reg(ctxt, GCSPR_EL1) = read_sysreg_el1(SYS_GCSPR);
++ ctxt_sys_reg(ctxt, GCSCR_EL1) = read_sysreg_el1(SYS_GCSCR);
++ ctxt_sys_reg(ctxt, GCSCRE0_EL1) = read_sysreg_s(SYS_GCSCRE0_EL1);
++ }
++
+ if (ctxt_has_mte(ctxt)) {
+ ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
+ ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
+@@ -95,6 +103,8 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+ {
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0);
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0);
++ if (has_gcs())
++ write_sysreg_s(ctxt_sys_reg(ctxt, GCSPR_EL0), SYS_GCSPR_EL0);
+ }
+
+ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+@@ -138,6 +148,13 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+ write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
+
++ if (has_gcs()) {
++ write_sysreg_el1(ctxt_sys_reg(ctxt, GCSPR_EL1), SYS_GCSPR);
++ write_sysreg_el1(ctxt_sys_reg(ctxt, GCSCR_EL1), SYS_GCSCR);
++ write_sysreg_s(ctxt_sys_reg(ctxt, GCSCRE0_EL1),
++ SYS_GCSCRE0_EL1);
++ }
++
+ if (ctxt_has_mte(ctxt)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
+ write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
+diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
+index 30253bd19917..83ba767e75d2 100644
+--- a/arch/arm64/kvm/sys_regs.c
++++ b/arch/arm64/kvm/sys_regs.c
+@@ -2000,6 +2000,23 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
+ .visibility = mte_visibility, \
+ }
+
++static unsigned int gcs_visibility(const struct kvm_vcpu *vcpu,
++ const struct sys_reg_desc *rd)
++{
++ if (has_gcs())
++ return 0;
++
++ return REG_HIDDEN;
++}
++
++#define GCS_REG(name) { \
++ SYS_DESC(SYS_##name), \
++ .access = undef_access, \
++ .reset = reset_unknown, \
++ .reg = name, \
++ .visibility = gcs_visibility, \
++}
++
+ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+ {
+@@ -2376,6 +2393,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
+ PTRAUTH_KEY(APDB),
+ PTRAUTH_KEY(APGA),
+
++ GCS_REG(GCSCR_EL1),
++ GCS_REG(GCSPR_EL1),
++ GCS_REG(GCSCRE0_EL1),
++
+ { SYS_DESC(SYS_SPSR_EL1), access_spsr},
+ { SYS_DESC(SYS_ELR_EL1), access_elr},
+
+@@ -2462,6 +2483,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
+ { SYS_DESC(SYS_SMIDR_EL1), undef_access },
+ { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
+ { SYS_DESC(SYS_CTR_EL0), access_ctr },
++ GCS_REG(GCSPR_EL0),
+ { SYS_DESC(SYS_SVCR), undef_access },
+
+ { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
+--
+2.34.1
+
+
+From 371653e2b075a2b4c3f2549d02366d4c168c29c6 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 15 Mar 2023 18:48:06 +0000
+Subject: [PATCH 23/47] arm64/gcs: Allow GCS usage at EL0 and EL1
+
+There is a control HCRX_EL2.GCSEn which must be set to allow GCS
+features to take effect at lower ELs and also fine grained traps for GCS
+usage at EL0 and EL1. Configure all these to allow GCS usage by EL0 and
+EL1.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/el2_setup.h | 17 +++++++++++++++++
+ arch/arm64/include/asm/kvm_arm.h | 4 ++--
+ 2 files changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
+index b7afaa026842..17672563e333 100644
+--- a/arch/arm64/include/asm/el2_setup.h
++++ b/arch/arm64/include/asm/el2_setup.h
+@@ -27,6 +27,14 @@
+ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
+ cbz x0, .Lskip_hcrx_\@
+ mov_q x0, HCRX_HOST_FLAGS
++
++ /* Enable GCS if supported */
++ mrs_s x1, SYS_ID_AA64PFR1_EL1
++ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
++ cbz x1, .Lset_hcrx_\@
++ orr x0, x0, #HCRX_EL2_GCSEn
++
++.Lset_hcrx_\@:
+ msr_s SYS_HCRX_EL2, x0
+ .Lskip_hcrx_\@:
+ .endm
+@@ -190,6 +198,15 @@
+ orr x0, x0, #HFGxTR_EL2_nPIR_EL1
+ orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1
+
++ /* GCS depends on PIE so we don't check it if PIE is absent */
++ mrs_s x1, SYS_ID_AA64PFR1_EL1
++ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
++ cbz x1, .Lset_fgt_\@
++
++ /* Disable traps of access to GCS registers at EL0 and EL1 */
++ orr x0, x0, #HFGxTR_EL2_nGCS_EL1_MASK
++ orr x0, x0, #HFGxTR_EL2_nGCS_EL0_MASK
++
+ .Lset_fgt_\@:
+ msr_s SYS_HFGRTR_EL2, x0
+ msr_s SYS_HFGWTR_EL2, x0
+diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
+index 3c6f8ba1e479..a9354c237a97 100644
+--- a/arch/arm64/include/asm/kvm_arm.h
++++ b/arch/arm64/include/asm/kvm_arm.h
+@@ -103,9 +103,9 @@
+ #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+
+ #define HCRX_GUEST_FLAGS \
+- (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
++ (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | HCRX_EL2_GCSEn |\
+ (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
+-#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
++#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_GCSEn)
+
+ /* TCR_EL2 Registers bits */
+ #define TCR_EL2_DS (1UL << 32)
+--
+2.34.1
+
+
+From d50f122180261521787ac2a91c705554eea2e77a Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 15 Mar 2023 18:52:09 +0000
+Subject: [PATCH 24/47] arm64/idreg: Add overrride for GCS
+
+Hook up an override for GCS, allowing it to be disabled from the command
+line by specifying arm64.nogcs in case there are problems.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
+ arch/arm64/kernel/idreg-override.c | 2 ++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 31b3a25680d0..e86160251d23 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -429,9 +429,15 @@
+ arm64.nobti [ARM64] Unconditionally disable Branch Target
+ Identification support
+
++ arm64.nogcs [ARM64] Unconditionally disable Guarded Control Stack
++ support
++
+ arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
+ Set instructions support
+
++ arm64.nopauth [ARM64] Unconditionally disable Pointer Authentication
++ support
++
+ arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
+ support
+
+diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
+index e30fd9e32ef3..00bcdad53ba9 100644
+--- a/arch/arm64/kernel/idreg-override.c
++++ b/arch/arm64/kernel/idreg-override.c
+@@ -110,6 +110,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = {
+ .override = &id_aa64pfr1_override,
+ .fields = {
+ FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
++ FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL),
+ FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
+ FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
+ {}
+@@ -190,6 +191,7 @@ static const struct {
+ { "arm64.nosve", "id_aa64pfr0.sve=0" },
+ { "arm64.nosme", "id_aa64pfr1.sme=0" },
+ { "arm64.nobti", "id_aa64pfr1.bt=0" },
++ { "arm64.nogcs", "id_aa64pfr1.gcs=0" },
+ { "arm64.nopauth",
+ "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
+ "id_aa64isar1.api=0 id_aa64isar1.apa=0 "
+--
+2.34.1
+
+
+From cf891db5ab3aad787c1deff23058d51e24b19ce1 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Mon, 20 Mar 2023 18:21:38 +0000
+Subject: [PATCH 25/47] arm64/hwcap: Add hwcap for GCS
+
+Provide a hwcap to enable userspace to detect support for GCS.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ Documentation/arch/arm64/elf_hwcaps.rst | 3 +++
+ arch/arm64/include/asm/hwcap.h | 1 +
+ arch/arm64/include/uapi/asm/hwcap.h | 1 +
+ arch/arm64/kernel/cpufeature.c | 3 +++
+ arch/arm64/kernel/cpuinfo.c | 1 +
+ 5 files changed, 9 insertions(+)
+
+diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst
+index ced7b335e2e0..86d4ace9c75c 100644
+--- a/Documentation/arch/arm64/elf_hwcaps.rst
++++ b/Documentation/arch/arm64/elf_hwcaps.rst
+@@ -317,6 +317,9 @@ HWCAP2_LRCPC3
+ HWCAP2_LSE128
+ Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011.
+
++HWCAP2_GCS
++ Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1
++
+ 4. Unused AT_HWCAP bits
+ -----------------------
+
+diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
+index cd71e09ea14d..e01e6b72a839 100644
+--- a/arch/arm64/include/asm/hwcap.h
++++ b/arch/arm64/include/asm/hwcap.h
+@@ -142,6 +142,7 @@
+ #define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16)
+ #define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3)
+ #define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128)
++#define KERNEL_HWCAP_GCS __khwcap2_feature(GCS)
+
+ /*
+ * This yields a mask that user programs can use to figure out what
+diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
+index 5023599fa278..996b5b5d4c4e 100644
+--- a/arch/arm64/include/uapi/asm/hwcap.h
++++ b/arch/arm64/include/uapi/asm/hwcap.h
+@@ -107,5 +107,6 @@
+ #define HWCAP2_SVE_B16B16 (1UL << 45)
+ #define HWCAP2_LRCPC3 (1UL << 46)
+ #define HWCAP2_LSE128 (1UL << 47)
++#define HWCAP2_GCS (1UL << 48)
+
+ #endif /* _UAPI__ASM_HWCAP_H */
+diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
+index b606842ab8c1..1a92c4502a0b 100644
+--- a/arch/arm64/kernel/cpufeature.c
++++ b/arch/arm64/kernel/cpufeature.c
+@@ -2867,6 +2867,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
+ HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
++#endif
++#ifdef CONFIG_ARM64_GCS
++ HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS),
+ #endif
+ HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+ #ifdef CONFIG_ARM64_BTI
+diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
+index 47043c0d95ec..b3ec0b89c9e0 100644
+--- a/arch/arm64/kernel/cpuinfo.c
++++ b/arch/arm64/kernel/cpuinfo.c
+@@ -128,6 +128,7 @@ static const char *const hwcap_str[] = {
+ [KERNEL_HWCAP_SVE_B16B16] = "sveb16b16",
+ [KERNEL_HWCAP_LRCPC3] = "lrcpc3",
+ [KERNEL_HWCAP_LSE128] = "lse128",
++ [KERNEL_HWCAP_GCS] = "gcs",
+ };
+
+ #ifdef CONFIG_COMPAT
+--
+2.34.1
+
+
+From bccc68b34269e6ccc69fdbbca6d17131093170f7 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 14 Apr 2023 20:57:45 +0100
+Subject: [PATCH 26/47] arm64/traps: Handle GCS exceptions
+
+A new exception code is defined for GCS specific faults other than
+standard load/store faults, for example GCS token validation failures,
+add handling for this. These faults are reported to userspace as
+segfaults with code SEGV_CPERR (protection error), mirroring the
+reporting for x86 shadow stack errors.
+
+GCS faults due to memory load/store operations generate data aborts with
+a flag set, these will be handled separately as part of the data abort
+handling.
+
+Since we do not currently enable GCS for EL1 we should not get any faults
+there but while we're at it we wire things up there, treating any GCS
+fault as fatal.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/esr.h | 28 +++++++++++++++++++++++++++-
+ arch/arm64/include/asm/exception.h | 2 ++
+ arch/arm64/kernel/entry-common.c | 23 +++++++++++++++++++++++
+ arch/arm64/kernel/traps.c | 11 +++++++++++
+ 4 files changed, 63 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
+index 353fe08546cf..20ee9f531864 100644
+--- a/arch/arm64/include/asm/esr.h
++++ b/arch/arm64/include/asm/esr.h
+@@ -51,7 +51,8 @@
+ #define ESR_ELx_EC_FP_EXC32 (0x28)
+ /* Unallocated EC: 0x29 - 0x2B */
+ #define ESR_ELx_EC_FP_EXC64 (0x2C)
+-/* Unallocated EC: 0x2D - 0x2E */
++#define ESR_ELx_EC_GCS (0x2D)
++/* Unallocated EC: 0x2E */
+ #define ESR_ELx_EC_SERROR (0x2F)
+ #define ESR_ELx_EC_BREAKPT_LOW (0x30)
+ #define ESR_ELx_EC_BREAKPT_CUR (0x31)
+@@ -382,6 +383,31 @@
+ #define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5)
+ #define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0)
+
++/* ISS field definitions for GCS */
++#define ESR_ELx_ExType_SHIFT (20)
++#define ESR_ELx_ExType_MASK GENMASK(23, 20)
++#define ESR_ELx_Raddr_SHIFT (10)
++#define ESR_ELx_Raddr_MASK GENMASK(14, 10)
++#define ESR_ELx_Rn_SHIFT (5)
++#define ESR_ELx_Rn_MASK GENMASK(9, 5)
++#define ESR_ELx_Rvalue_SHIFT 5
++#define ESR_ELx_Rvalue_MASK GENMASK(9, 5)
++#define ESR_ELx_IT_SHIFT (0)
++#define ESR_ELx_IT_MASK GENMASK(4, 0)
++
++#define ESR_ELx_ExType_DATA_CHECK 0
++#define ESR_ELx_ExType_EXLOCK 1
++#define ESR_ELx_ExType_STR 2
++
++#define ESR_ELx_IT_RET 0
++#define ESR_ELx_IT_GCSPOPM 1
++#define ESR_ELx_IT_RET_KEYA 2
++#define ESR_ELx_IT_RET_KEYB 3
++#define ESR_ELx_IT_GCSSS1 4
++#define ESR_ELx_IT_GCSSS2 5
++#define ESR_ELx_IT_GCSPOPCX 6
++#define ESR_ELx_IT_GCSPOPX 7
++
+ #ifndef __ASSEMBLY__
+ #include <asm/types.h>
+
+diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
+index ad688e157c9b..99caff458e20 100644
+--- a/arch/arm64/include/asm/exception.h
++++ b/arch/arm64/include/asm/exception.h
+@@ -57,6 +57,8 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr);
+ void do_el1_undef(struct pt_regs *regs, unsigned long esr);
+ void do_el0_bti(struct pt_regs *regs);
+ void do_el1_bti(struct pt_regs *regs, unsigned long esr);
++void do_el0_gcs(struct pt_regs *regs, unsigned long esr);
++void do_el1_gcs(struct pt_regs *regs, unsigned long esr);
+ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
+ struct pt_regs *regs);
+ void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
+diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
+index 0fc94207e69a..52d78ce63a4e 100644
+--- a/arch/arm64/kernel/entry-common.c
++++ b/arch/arm64/kernel/entry-common.c
+@@ -429,6 +429,15 @@ static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
+ exit_to_kernel_mode(regs);
+ }
+
++static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr)
++{
++ enter_from_kernel_mode(regs);
++ local_daif_inherit(regs);
++ do_el1_gcs(regs, esr);
++ local_daif_mask();
++ exit_to_kernel_mode(regs);
++}
++
+ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
+ {
+ unsigned long far = read_sysreg(far_el1);
+@@ -471,6 +480,9 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
+ case ESR_ELx_EC_BTI:
+ el1_bti(regs, esr);
+ break;
++ case ESR_ELx_EC_GCS:
++ el1_gcs(regs, esr);
++ break;
+ case ESR_ELx_EC_BREAKPT_CUR:
+ case ESR_ELx_EC_SOFTSTP_CUR:
+ case ESR_ELx_EC_WATCHPT_CUR:
+@@ -650,6 +662,14 @@ static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
+ exit_to_user_mode(regs);
+ }
+
++static void noinstr el0_gcs(struct pt_regs *regs, unsigned long esr)
++{
++ enter_from_user_mode(regs);
++ local_daif_restore(DAIF_PROCCTX);
++ do_el0_gcs(regs, esr);
++ exit_to_user_mode(regs);
++}
++
+ static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
+ {
+ enter_from_user_mode(regs);
+@@ -732,6 +752,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
+ case ESR_ELx_EC_MOPS:
+ el0_mops(regs, esr);
+ break;
++ case ESR_ELx_EC_GCS:
++ el0_gcs(regs, esr);
++ break;
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_WATCHPT_LOW:
+diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
+index 215e6d7f2df8..fb867c6526a6 100644
+--- a/arch/arm64/kernel/traps.c
++++ b/arch/arm64/kernel/traps.c
+@@ -500,6 +500,16 @@ void do_el1_bti(struct pt_regs *regs, unsigned long esr)
+ die("Oops - BTI", regs, esr);
+ }
+
++void do_el0_gcs(struct pt_regs *regs, unsigned long esr)
++{
++ force_signal_inject(SIGSEGV, SEGV_CPERR, regs->pc, 0);
++}
++
++void do_el1_gcs(struct pt_regs *regs, unsigned long esr)
++{
++ die("Oops - GCS", regs, esr);
++}
++
+ void do_el0_fpac(struct pt_regs *regs, unsigned long esr)
+ {
+ force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
+@@ -838,6 +848,7 @@ static const char *esr_class_str[] = {
+ [ESR_ELx_EC_MOPS] = "MOPS",
+ [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)",
+ [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)",
++ [ESR_ELx_EC_GCS] = "Guarded Control Stack",
+ [ESR_ELx_EC_SERROR] = "SError",
+ [ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)",
+ [ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)",
+--
+2.34.1
+
+
+From a44e4f0ea5726b528c7247c2331301e95de6acea Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 28 Apr 2023 13:59:24 +0100
+Subject: [PATCH 27/47] arm64/mm: Handle GCS data aborts
+
+All GCS operations at EL0 must happen on a page which is marked as
+having UnprivGCS access, including read operations. If a GCS operation
+attempts to access a page without this then it will generate a data
+abort with the GCS bit set in ESR_EL1.ISS2.
+
+EL0 may validly generate such faults, for example due to copy on write
+which will cause the GCS data to be stored in a read only page with no
+GCS permissions until the actual copy happens. Since UnprivGCS allows
+both reads and writes to the GCS (though only through GCS operations) we
+need to ensure that the memory management subsystem handles GCS accesses
+as writes at all times. Do this by adding FAULT_FLAG_WRITE to any GCS
+page faults, adding handling to ensure that invalid cases are identfied
+as such early so the memory management core does not think they will
+succeed. The core cannot distinguish between VMAs which are generally
+writeable and VMAs which are only writeable through GCS operations.
+
+EL1 may validly write to EL0 GCS for management purposes (eg, while
+initialising with cap tokens).
+
+We also report any GCS faults in VMAs not marked as part of a GCS as
+access violations, causing a fault to be delivered to userspace if it
+attempts to do GCS operations outside a GCS.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/mm/fault.c | 76 ++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 68 insertions(+), 8 deletions(-)
+
+diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
+index 55f6455a8284..c2a36102e143 100644
+--- a/arch/arm64/mm/fault.c
++++ b/arch/arm64/mm/fault.c
+@@ -494,13 +494,30 @@ static void do_bad_area(unsigned long far, unsigned long esr,
+ }
+ }
+
++/*
++ * Note: not valid for EL1 DC IVAC, but we never use that such that it
++ * should fault. EL0 cannot issue DC IVAC (undef).
++ */
++static bool is_write_abort(unsigned long esr)
++{
++ return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
++}
++
++static bool is_gcs_fault(unsigned long esr)
++{
++ if (!esr_is_data_abort(esr))
++ return false;
++
++ return ESR_ELx_ISS2(esr) & ESR_ELx_GCS;
++}
++
+ #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
+ #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
+
+ static vm_fault_t __do_page_fault(struct mm_struct *mm,
+ struct vm_area_struct *vma, unsigned long addr,
+ unsigned int mm_flags, unsigned long vm_flags,
+- struct pt_regs *regs)
++ unsigned long esr, struct pt_regs *regs)
+ {
+ /*
+ * Ok, we have a good vm_area for this memory access, so we can handle
+@@ -510,6 +527,26 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm,
+ */
+ if (!(vma->vm_flags & vm_flags))
+ return VM_FAULT_BADACCESS;
++
++ if (vma->vm_flags & VM_SHADOW_STACK) {
++ /*
++ * Writes to a GCS must either be generated by a GCS
++ * operation or be from EL1.
++ */
++ if (is_write_abort(esr) &&
++ !(is_gcs_fault(esr) || is_el1_data_abort(esr)))
++ return VM_FAULT_BADACCESS;
++ } else {
++ /*
++ * GCS faults should never happen for pages that are
++ * not part of a GCS and the operation being attempted
++ * can never succeed.
++ */
++ if (is_gcs_fault(esr))
++ return VM_FAULT_BADACCESS;
++ }
++
++
+ return handle_mm_fault(vma, addr, mm_flags, regs);
+ }
+
+@@ -518,13 +555,23 @@ static bool is_el0_instruction_abort(unsigned long esr)
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
+ }
+
+-/*
+- * Note: not valid for EL1 DC IVAC, but we never use that such that it
+- * should fault. EL0 cannot issue DC IVAC (undef).
+- */
+-static bool is_write_abort(unsigned long esr)
++static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr)
+ {
+- return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
++ if (!system_supports_gcs())
++ return false;
++
++ if (unlikely(is_gcs_fault(esr))) {
++ /* GCS accesses must be performed on a GCS page */
++ if (!(vma->vm_flags & VM_SHADOW_STACK))
++ return true;
++ if (!(vma->vm_flags & VM_WRITE))
++ return true;
++ } else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) {
++ /* Only GCS operations can write to a GCS page */
++ return is_write_abort(esr);
++ }
++
++ return false;
+ }
+
+ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
+@@ -561,6 +608,14 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
+ /* It was exec fault */
+ vm_flags = VM_EXEC;
+ mm_flags |= FAULT_FLAG_INSTRUCTION;
++ } else if (is_gcs_fault(esr)) {
++ /*
++ * The GCS permission on a page implies both read and
++ * write so always handle any GCS fault as a write fault,
++ * we need to trigger CoW even for GCS reads.
++ */
++ vm_flags = VM_WRITE;
++ mm_flags |= FAULT_FLAG_WRITE;
+ } else if (is_write_abort(esr)) {
+ /* It was write fault */
+ vm_flags = VM_WRITE;
+@@ -594,6 +649,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
+ if (!vma)
+ goto lock_mmap;
+
++ if (is_invalid_gcs_access(vma, esr)) {
++ vma_end_read(vma);
++ goto lock_mmap;
++ }
++
+ if (!(vma->vm_flags & vm_flags)) {
+ vma_end_read(vma);
+ goto lock_mmap;
+@@ -625,7 +685,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
+ goto done;
+ }
+
+- fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs);
++ fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, esr, regs);
+
+ /* Quick path to respond to signals */
+ if (fault_signal_pending(fault, regs)) {
+--
+2.34.1
+
+
+From 5fe1e5ee0c88af97929f0a604ac3079542f0aadb Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Apr 2023 20:31:01 +0100
+Subject: [PATCH 28/47] arm64/gcs: Context switch GCS state for EL0
+
+There are two registers controlling the GCS state of EL0, GCSPR_EL0 which
+is the current GCS pointer and GCSCRE0_EL1 which has enable bits for the
+specific GCS functionality enabled for EL0. Manage these on context switch
+and process lifetime events, GCS is reset on exec(). Also ensure that
+any changes to the GCS memory are visible to other PEs and that changes
+from other PEs are visible on this one by issuing a GCSB DSYNC when
+moving to or from a thread with GCS.
+
+Since the current GCS configuration of a thread will be visible to
+userspace we store the configuration in the format used with userspace
+and provide a helper which configures the system register as needed.
+
+On systems that support GCS we always allow access to GCSPR_EL0, this
+facilitates reporting of GCS faults if userspace implements disabling of
+GCS on error - the GCS can still be discovered and examined even if GCS
+has been disabled.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/gcs.h | 24 +++++++++++++
+ arch/arm64/include/asm/processor.h | 6 ++++
+ arch/arm64/kernel/process.c | 56 ++++++++++++++++++++++++++++++
+ arch/arm64/mm/Makefile | 1 +
+ arch/arm64/mm/gcs.c | 39 +++++++++++++++++++++
+ 5 files changed, 126 insertions(+)
+ create mode 100644 arch/arm64/mm/gcs.c
+
+diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
+index 7c5e95218db6..04594ef59dad 100644
+--- a/arch/arm64/include/asm/gcs.h
++++ b/arch/arm64/include/asm/gcs.h
+@@ -48,4 +48,28 @@ static inline u64 gcsss2(void)
+ return Xt;
+ }
+
++#ifdef CONFIG_ARM64_GCS
++
++static inline bool task_gcs_el0_enabled(struct task_struct *task)
++{
++ return current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE;
++}
++
++void gcs_set_el0_mode(struct task_struct *task);
++void gcs_free(struct task_struct *task);
++void gcs_preserve_current_state(void);
++
++#else
++
++static inline bool task_gcs_el0_enabled(struct task_struct *task)
++{
++ return false;
++}
++
++static inline void gcs_set_el0_mode(struct task_struct *task) { }
++static inline void gcs_free(struct task_struct *task) { }
++static inline void gcs_preserve_current_state(void) { }
++
++#endif
++
+ #endif
+diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
+index 5b0a04810b23..6fc6dcbd494c 100644
+--- a/arch/arm64/include/asm/processor.h
++++ b/arch/arm64/include/asm/processor.h
+@@ -182,6 +182,12 @@ struct thread_struct {
+ u64 sctlr_user;
+ u64 svcr;
+ u64 tpidr2_el0;
++#ifdef CONFIG_ARM64_GCS
++ unsigned int gcs_el0_mode;
++ u64 gcspr_el0;
++ u64 gcs_base;
++ u64 gcs_size;
++#endif
+ };
+
+ static inline unsigned int thread_get_vl(struct thread_struct *thread,
+diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
+index 7387b68c745b..fd80b43c2969 100644
+--- a/arch/arm64/kernel/process.c
++++ b/arch/arm64/kernel/process.c
+@@ -48,6 +48,7 @@
+ #include <asm/cacheflush.h>
+ #include <asm/exec.h>
+ #include <asm/fpsimd.h>
++#include <asm/gcs.h>
+ #include <asm/mmu_context.h>
+ #include <asm/mte.h>
+ #include <asm/processor.h>
+@@ -271,12 +272,32 @@ static void flush_tagged_addr_state(void)
+ clear_thread_flag(TIF_TAGGED_ADDR);
+ }
+
++#ifdef CONFIG_ARM64_GCS
++
++static void flush_gcs(void)
++{
++ if (!system_supports_gcs())
++ return;
++
++ gcs_free(current);
++ current->thread.gcs_el0_mode = 0;
++ write_sysreg_s(0, SYS_GCSCRE0_EL1);
++ write_sysreg_s(0, SYS_GCSPR_EL0);
++}
++
++#else
++
++static void flush_gcs(void) { }
++
++#endif
++
+ void flush_thread(void)
+ {
+ fpsimd_flush_thread();
+ tls_thread_flush();
+ flush_ptrace_hw_breakpoint(current);
+ flush_tagged_addr_state();
++ flush_gcs();
+ }
+
+ void arch_release_task_struct(struct task_struct *tsk)
+@@ -474,6 +495,40 @@ static void entry_task_switch(struct task_struct *next)
+ __this_cpu_write(__entry_task, next);
+ }
+
++#ifdef CONFIG_ARM64_GCS
++
++void gcs_preserve_current_state(void)
++{
++ if (task_gcs_el0_enabled(current))
++ current->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
++}
++
++static void gcs_thread_switch(struct task_struct *next)
++{
++ if (!system_supports_gcs())
++ return;
++
++ gcs_preserve_current_state();
++
++ gcs_set_el0_mode(next);
++ write_sysreg_s(next->thread.gcspr_el0, SYS_GCSPR_EL0);
++
++ /*
++ * Ensure that GCS changes are observable by/from other PEs in
++ * case of migration.
++ */
++ if (task_gcs_el0_enabled(current) || task_gcs_el0_enabled(next))
++ gcsb_dsync();
++}
++
++#else
++
++static void gcs_thread_switch(struct task_struct *next)
++{
++}
++
++#endif
++
+ /*
+ * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
+ * Ensure access is disabled when switching to a 32bit task, ensure
+@@ -533,6 +588,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
+ ssbs_thread_switch(next);
+ erratum_1418040_thread_switch(next);
+ ptrauth_thread_switch_user(next);
++ gcs_thread_switch(next);
+
+ /*
+ * Complete any pending TLB or cache maintenance on this CPU in case
+diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
+index dbd1bc95967d..4e7cb2f02999 100644
+--- a/arch/arm64/mm/Makefile
++++ b/arch/arm64/mm/Makefile
+@@ -10,6 +10,7 @@ obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
+ obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o
+ obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
+ obj-$(CONFIG_ARM64_MTE) += mteswap.o
++obj-$(CONFIG_ARM64_GCS) += gcs.o
+ KASAN_SANITIZE_physaddr.o += n
+
+ obj-$(CONFIG_KASAN) += kasan_init.o
+diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c
+new file mode 100644
+index 000000000000..b0a67efc522b
+--- /dev/null
++++ b/arch/arm64/mm/gcs.c
+@@ -0,0 +1,39 @@
++// SPDX-License-Identifier: GPL-2.0-only
++
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/syscalls.h>
++#include <linux/types.h>
++
++#include <asm/cpufeature.h>
++#include <asm/page.h>
++
++/*
++ * Apply the GCS mode configured for the specified task to the
++ * hardware.
++ */
++void gcs_set_el0_mode(struct task_struct *task)
++{
++ u64 gcscre0_el1 = GCSCRE0_EL1_nTR;
++
++ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE)
++ gcscre0_el1 |= GCSCRE0_EL1_RVCHKEN | GCSCRE0_EL1_PCRSEL;
++
++ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_WRITE)
++ gcscre0_el1 |= GCSCRE0_EL1_STREn;
++
++ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_PUSH)
++ gcscre0_el1 |= GCSCRE0_EL1_PUSHMEn;
++
++ write_sysreg_s(gcscre0_el1, SYS_GCSCRE0_EL1);
++}
++
++void gcs_free(struct task_struct *task)
++{
++ if (task->thread.gcs_base)
++ vm_munmap(task->thread.gcs_base, task->thread.gcs_size);
++
++ task->thread.gcspr_el0 = 0;
++ task->thread.gcs_base = 0;
++ task->thread.gcs_size = 0;
++}
+--
+2.34.1
+
+
+From ae084320fd60ebe9212701d1da31a466e3aecb61 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 31 May 2023 16:39:35 +0100
+Subject: [PATCH 29/47] arm64/gcs: Ensure that new threads have a GCS
+
+When a new thread is created by a thread with GCS enabled the GCS needs
+to be specified along with the regular stack. clone3() has been
+extended to support this case, allowing userspace to explicitly specify
+the size and location of the GCS. The specified GCS must have a valid
+GCS token at the top of the stack, as though userspace were pivoting to
+the new GCS. This will be consumed on use. At present we do not
+atomically consume the token, this will be addressed in a future
+revision.
+
+Unfortunately plain clone() is not extensible and existing clone3()
+users will not specify a stack so all existing code would be broken if
+we mandated specifying the stack explicitly. For compatibility with
+these cases and also x86 (which did not initially implement clone3()
+support for shadow stacks) if no GCS is specified we will allocate one
+so when a thread is created which has GCS enabled allocate one for it.
+We follow the extensively discussed x86 implementation and allocate
+min(RLIMIT_STACK, 2G). Since the GCS only stores the call stack and not
+any variables this should be more than sufficient for most applications.
+
+GCSs allocated via this mechanism will be freed when the thread exits,
+those explicitly configured by the user will not.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/gcs.h | 9 +++
+ arch/arm64/kernel/process.c | 29 +++++++++
+ arch/arm64/mm/gcs.c | 117 +++++++++++++++++++++++++++++++++++
+ 3 files changed, 155 insertions(+)
+
+diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
+index 04594ef59dad..c1f274fdb9c0 100644
+--- a/arch/arm64/include/asm/gcs.h
++++ b/arch/arm64/include/asm/gcs.h
+@@ -8,6 +8,8 @@
+ #include <asm/types.h>
+ #include <asm/uaccess.h>
+
++struct kernel_clone_args;
++
+ static inline void gcsb_dsync(void)
+ {
+ asm volatile(".inst 0xd503227f" : : : "memory");
+@@ -58,6 +60,8 @@ static inline bool task_gcs_el0_enabled(struct task_struct *task)
+ void gcs_set_el0_mode(struct task_struct *task);
+ void gcs_free(struct task_struct *task);
+ void gcs_preserve_current_state(void);
++unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
++ const struct kernel_clone_args *args);
+
+ #else
+
+@@ -69,6 +73,11 @@ static inline bool task_gcs_el0_enabled(struct task_struct *task)
+ static inline void gcs_set_el0_mode(struct task_struct *task) { }
+ static inline void gcs_free(struct task_struct *task) { }
+ static inline void gcs_preserve_current_state(void) { }
++static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
++ const struct kernel_clone_args *args)
++{
++ return -ENOTSUPP;
++}
+
+ #endif
+
+diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
+index fd80b43c2969..8bd66cde0a86 100644
+--- a/arch/arm64/kernel/process.c
++++ b/arch/arm64/kernel/process.c
+@@ -285,9 +285,32 @@ static void flush_gcs(void)
+ write_sysreg_s(0, SYS_GCSPR_EL0);
+ }
+
++static int copy_thread_gcs(struct task_struct *p,
++ const struct kernel_clone_args *args)
++{
++ unsigned long gcs;
++
++ gcs = gcs_alloc_thread_stack(p, args);
++ if (IS_ERR_VALUE(gcs))
++ return PTR_ERR((void *)gcs);
++
++ p->thread.gcs_el0_mode = current->thread.gcs_el0_mode;
++ p->thread.gcs_el0_locked = current->thread.gcs_el0_locked;
++
++ /* Ensure the current state of the GCS is seen by CoW */
++ gcsb_dsync();
++
++ return 0;
++}
++
+ #else
+
+ static void flush_gcs(void) { }
++static int copy_thread_gcs(struct task_struct *p,
++ const struct kernel_clone_args *args)
++{
++ return 0;
++}
+
+ #endif
+
+@@ -303,6 +326,7 @@ void flush_thread(void)
+ void arch_release_task_struct(struct task_struct *tsk)
+ {
+ fpsimd_release_task(tsk);
++ gcs_free(tsk);
+ }
+
+ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+@@ -369,6 +393,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+ unsigned long stack_start = args->stack;
+ unsigned long tls = args->tls;
+ struct pt_regs *childregs = task_pt_regs(p);
++ int ret;
+
+ memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
+
+@@ -410,6 +435,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+ p->thread.uw.tp_value = tls;
+ p->thread.tpidr2_el0 = 0;
+ }
++
++ ret = copy_thread_gcs(p, args);
++ if (ret != 0)
++ return ret;
+ } else {
+ /*
+ * A kthread has no context to ERET to, so ensure any buggy
+diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c
+index b0a67efc522b..3cbc3a3d4bc7 100644
+--- a/arch/arm64/mm/gcs.c
++++ b/arch/arm64/mm/gcs.c
+@@ -8,6 +8,113 @@
+ #include <asm/cpufeature.h>
+ #include <asm/page.h>
+
++static unsigned long alloc_gcs(unsigned long addr, unsigned long size,
++ unsigned long token_offset, bool set_res_tok)
++{
++ int flags = MAP_ANONYMOUS | MAP_PRIVATE;
++ struct mm_struct *mm = current->mm;
++ unsigned long mapped_addr, unused;
++
++ if (addr)
++ flags |= MAP_FIXED_NOREPLACE;
++
++ mmap_write_lock(mm);
++ mapped_addr = do_mmap(NULL, addr, size, PROT_READ | PROT_WRITE, flags,
++ VM_SHADOW_STACK, 0, &unused, NULL);
++ mmap_write_unlock(mm);
++
++ return mapped_addr;
++}
++
++static unsigned long gcs_size(unsigned long size)
++{
++ if (size)
++ return PAGE_ALIGN(size);
++
++ /* Allocate RLIMIT_STACK/2 with limits of PAGE_SIZE..2G */
++ size = PAGE_ALIGN(min_t(unsigned long long,
++ rlimit(RLIMIT_STACK) / 2, SZ_2G));
++ return max(PAGE_SIZE, size);
++}
++
++static bool gcs_consume_token(struct task_struct *tsk, unsigned long user_addr)
++{
++ unsigned long expected = GCS_CAP(user_addr);
++ unsigned long val;
++ int ret = 0;
++
++ /* This should really be an atomic cpmxchg. It is not. */
++ __get_user_error(val, (__user unsigned long *)user_addr, ret);
++ if (ret != 0)
++ return false;
++
++ if (val != expected)
++ return false;
++
++ put_user_gcs(0, (__user unsigned long*)user_addr, &ret);
++
++ return ret == 0;
++}
++
++unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
++ const struct kernel_clone_args *args)
++{
++ unsigned long addr, size, gcspr_el0;
++
++ /* If the user specified a GCS use it. */
++ if (args->shadow_stack_size) {
++ if (!system_supports_gcs())
++ return (unsigned long)ERR_PTR(-EINVAL);
++
++ addr = args->shadow_stack;
++ size = args->shadow_stack_size;
++
++ /*
++ * There should be a token, there might be an end of
++ * stack marker.
++ */
++ gcspr_el0 = addr + size - (2 * sizeof(u64));
++ if (!gcs_consume_token(tsk, gcspr_el0)) {
++ gcspr_el0 += sizeof(u64);
++ if (!gcs_consume_token(tsk, gcspr_el0))
++ return (unsigned long)ERR_PTR(-EINVAL);
++ }
++
++ /* Userspace is responsible for unmapping */
++ tsk->thread.gcspr_el0 = gcspr_el0 + sizeof(u64);
++ } else {
++
++ /*
++ * Otherwise fall back to legacy clone() support and
++ * implicitly allocate a GCS if we need a new one.
++ */
++
++ if (!system_supports_gcs())
++ return 0;
++
++ if (!task_gcs_el0_enabled(tsk))
++ return 0;
++
++ if ((args->flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) {
++ tsk->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
++ return 0;
++ }
++
++ size = args->stack_size;
++
++ size = gcs_size(size);
++ addr = alloc_gcs(0, size, 0, 0);
++ if (IS_ERR_VALUE(addr))
++ return addr;
++
++ tsk->thread.gcs_base = addr;
++ tsk->thread.gcs_size = size;
++ tsk->thread.gcspr_el0 = addr + size - sizeof(u64);
++ }
++
++ return addr;
++}
++
+ /*
+ * Apply the GCS mode configured for the specified task to the
+ * hardware.
+@@ -30,6 +137,16 @@ void gcs_set_el0_mode(struct task_struct *task)
+
+ void gcs_free(struct task_struct *task)
+ {
++
++ /*
++ * When fork() with CLONE_VM fails, the child (tsk) already
++ * has a GCS allocated, and exit_thread() calls this function
++ * to free it. In this case the parent (current) and the
++ * child share the same mm struct.
++ */
++ if (!task->mm || task->mm != current->mm)
++ return;
++
+ if (task->thread.gcs_base)
+ vm_munmap(task->thread.gcs_base, task->thread.gcs_size);
+
+--
+2.34.1
+
+
+From 59d903881df39c2f5ec10d818d303b7fd96a90aa Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 5 Apr 2023 20:14:17 +0100
+Subject: [PATCH 30/47] arm64/gcs: Implement shadow stack prctl() interface
+
+Implement the architecture neutral prtctl() interface for setting the
+shadow stack status, this supports setting and reading the current GCS
+configuration for the current thread.
+
+Userspace can enable basic GCS functionality and additionally also
+support for GCS pushes and arbitrary GCS stores. It is expected that
+this prctl() will be called very early in application startup, for
+example by the dynamic linker, and not subsequently adjusted during
+normal operation. Users should carefully note that after enabling GCS
+for a thread GCS will become active with no call stack so it is not
+normally possible to return from the function that invoked the prctl().
+
+State is stored per thread, enabling GCS for a thread causes a GCS to be
+allocated for that thread.
+
+Userspace may lock the current GCS configuration by specifying
+PR_SHADOW_STACK_ENABLE_LOCK, this prevents any further changes to the
+GCS configuration via any means.
+
+If GCS is not being enabled then all flags other than _LOCK are ignored,
+it is not possible to enable stores or pops without enabling GCS.
+
+When disabling the GCS we do not free the allocated stack, this allows
+for inspection of the GCS after disabling as part of fault reporting.
+Since it is not an expected use case and since it presents some
+complications in determining what to do with previously initialsed data
+on the GCS attempts to reenable GCS after this are rejected. This can
+be revisted if a use case arises.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/gcs.h | 22 ++++++++
+ arch/arm64/include/asm/processor.h | 1 +
+ arch/arm64/mm/gcs.c | 81 ++++++++++++++++++++++++++++++
+ 3 files changed, 104 insertions(+)
+
+diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
+index c1f274fdb9c0..48c97e63e56a 100644
+--- a/arch/arm64/include/asm/gcs.h
++++ b/arch/arm64/include/asm/gcs.h
+@@ -50,6 +50,9 @@ static inline u64 gcsss2(void)
+ return Xt;
+ }
+
++#define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK \
++ (PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH)
++
+ #ifdef CONFIG_ARM64_GCS
+
+ static inline bool task_gcs_el0_enabled(struct task_struct *task)
+@@ -63,6 +66,20 @@ void gcs_preserve_current_state(void);
+ unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
+ const struct kernel_clone_args *args);
+
++static inline int gcs_check_locked(struct task_struct *task,
++ unsigned long new_val)
++{
++ unsigned long cur_val = task->thread.gcs_el0_mode;
++
++ cur_val &= task->thread.gcs_el0_locked;
++ new_val &= task->thread.gcs_el0_locked;
++
++ if (cur_val != new_val)
++ return -EBUSY;
++
++ return 0;
++}
++
+ #else
+
+ static inline bool task_gcs_el0_enabled(struct task_struct *task)
+@@ -78,6 +95,11 @@ static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
+ {
+ return -ENOTSUPP;
+ }
++static inline int gcs_check_locked(struct task_struct *task,
++ unsigned long new_val)
++{
++ return 0;
++}
+
+ #endif
+
+diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
+index 6fc6dcbd494c..6a3091ec0f03 100644
+--- a/arch/arm64/include/asm/processor.h
++++ b/arch/arm64/include/asm/processor.h
+@@ -184,6 +184,7 @@ struct thread_struct {
+ u64 tpidr2_el0;
+ #ifdef CONFIG_ARM64_GCS
+ unsigned int gcs_el0_mode;
++ unsigned int gcs_el0_locked;
+ u64 gcspr_el0;
+ u64 gcs_base;
+ u64 gcs_size;
+diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c
+index 3cbc3a3d4bc7..d5b593d9d9bd 100644
+--- a/arch/arm64/mm/gcs.c
++++ b/arch/arm64/mm/gcs.c
+@@ -154,3 +154,84 @@ void gcs_free(struct task_struct *task)
+ task->thread.gcs_base = 0;
+ task->thread.gcs_size = 0;
+ }
++
++int arch_set_shadow_stack_status(struct task_struct *task, unsigned long arg)
++{
++ unsigned long gcs, size;
++ int ret;
++
++ if (!system_supports_gcs())
++ return -EINVAL;
++
++ if (is_compat_thread(task_thread_info(task)))
++ return -EINVAL;
++
++ /* Reject unknown flags */
++ if (arg & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
++ return -EINVAL;
++
++ ret = gcs_check_locked(task, arg);
++ if (ret != 0)
++ return ret;
++
++ /* If we are enabling GCS then make sure we have a stack */
++ if (arg & PR_SHADOW_STACK_ENABLE) {
++ if (!task_gcs_el0_enabled(task)) {
++ /* Do not allow GCS to be reenabled */
++ if (task->thread.gcs_base)
++ return -EINVAL;
++
++ if (task != current)
++ return -EBUSY;
++
++ size = gcs_size(0);
++ gcs = alloc_gcs(0, size, 0, 0);
++ if (!gcs)
++ return -ENOMEM;
++
++ task->thread.gcspr_el0 = gcs + size - sizeof(u64);
++ task->thread.gcs_base = gcs;
++ task->thread.gcs_size = size;
++ if (task == current)
++ write_sysreg_s(task->thread.gcspr_el0,
++ SYS_GCSPR_EL0);
++
++ }
++ }
++
++ task->thread.gcs_el0_mode = arg;
++ if (task == current)
++ gcs_set_el0_mode(task);
++
++ return 0;
++}
++
++int arch_get_shadow_stack_status(struct task_struct *task,
++ unsigned long __user *arg)
++{
++ if (!system_supports_gcs())
++ return -EINVAL;
++
++ if (is_compat_thread(task_thread_info(task)))
++ return -EINVAL;
++
++ return put_user(task->thread.gcs_el0_mode, arg);
++}
++
++int arch_lock_shadow_stack_status(struct task_struct *task,
++ unsigned long arg)
++{
++ if (!system_supports_gcs())
++ return -EINVAL;
++
++ if (is_compat_thread(task_thread_info(task)))
++ return -EINVAL;
++
++ /*
++ * We support locking unknown bits so applications can prevent
++ * any changes in a future proof manner.
++ */
++ task->thread.gcs_el0_locked |= arg;
++
++ return 0;
++}
+--
+2.34.1
+
+
+From 2f3d32fbac2eec5c04aca5e181a52f8d2bca9383 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Apr 2023 22:29:17 +0100
+Subject: [PATCH 31/47] arm64/mm: Implement map_shadow_stack()
+
+As discussed extensively in the changelog for the addition of this
+syscall on x86 ("x86/shstk: Introduce map_shadow_stack syscall") the
+existing mmap() and madvise() syscalls do not map entirely well onto the
+security requirements for guarded control stacks since they lead to
+windows where memory is allocated but not yet protected or stacks which
+are not properly and safely initialised. Instead a new syscall
+map_shadow_stack() has been defined which allocates and initialises a
+shadow stack page.
+
+Implement this for arm64. Two flags are provided, allowing applications
+to request that the stack be initialised with a valid cap token at the
+top of the stack and optionally also an end of stack marker above that.
+We support requesting an end of stack marker alone but since this is a
+NULL pointer it is indistinguishable from not initialising anything by
+itself.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/mm/gcs.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 61 insertions(+)
+
+diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c
+index d5b593d9d9bd..e238bc9c057d 100644
+--- a/arch/arm64/mm/gcs.c
++++ b/arch/arm64/mm/gcs.c
+@@ -115,6 +115,67 @@ unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
+ return addr;
+ }
+
++SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
++{
++ unsigned long alloc_size;
++ unsigned long __user *cap_ptr;
++ unsigned long cap_val;
++ int ret = 0;
++ int cap_offset;
++
++ if (!system_supports_gcs())
++ return -EOPNOTSUPP;
++
++ if (flags & ~(SHADOW_STACK_SET_TOKEN | SHADOW_STACK_SET_MARKER))
++ return -EINVAL;
++
++ if (addr && (addr % PAGE_SIZE))
++ return -EINVAL;
++
++ if (size == 8 || size % 8)
++ return -EINVAL;
++
++ /*
++ * An overflow would result in attempting to write the restore token
++ * to the wrong location. Not catastrophic, but just return the right
++ * error code and block it.
++ */
++ alloc_size = PAGE_ALIGN(size);
++ if (alloc_size < size)
++ return -EOVERFLOW;
++
++ addr = alloc_gcs(addr, alloc_size, 0, false);
++ if (IS_ERR_VALUE(addr))
++ return addr;
++
++ /*
++ * Put a cap token at the end of the allocated region so it
++ * can be switched to.
++ */
++ if (flags & SHADOW_STACK_SET_TOKEN) {
++ /* Leave an extra empty frame as a top of stack marker? */
++ if (flags & SHADOW_STACK_SET_MARKER)
++ cap_offset = 2;
++ else
++ cap_offset = 1;
++
++ cap_ptr = (unsigned long __user *)(addr + size -
++ (cap_offset * sizeof(unsigned long)));
++ cap_val = GCS_CAP(cap_ptr);
++
++ put_user_gcs(cap_val, cap_ptr, &ret);
++ if (ret != 0) {
++ vm_munmap(addr, size);
++ return -EFAULT;
++ }
++
++ /* Ensure the new cap is viaible for GCS */
++ gcsb_dsync();
++ }
++
++ return addr;
++}
++
+ /*
+ * Apply the GCS mode configured for the specified task to the
+ * hardware.
+--
+2.34.1
+
+
+From 3251d196916b7a331713cafaaa0265b7205c93a0 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 21 Jun 2023 01:28:09 +0100
+Subject: [PATCH 32/47] arm64/signal: Set up and restore the GCS context for
+ signal handlers
+
+When invoking a signal handler we use the GCS configuration and stack
+for the current thread.
+
+Since we implement signal return by calling the signal handler with a
+return address set up pointing to a trampoline in the vDSO we need to
+also configure any active GCS for this by pushing a frame for the
+trampoline onto the GCS. If we do not do this then signal return will
+generate a GCS protection fault.
+
+In order to guard against attempts to bypass GCS protections via signal
+return we only allow returning with GCSPR_EL0 pointing to an address
+where it was previously preempted by a signal. We do this by pushing a
+cap onto the GCS, this takes the form of an architectural GCS cap token
+with the top bit set and token type of 0 which we add on signal entry
+and validate and pop off on signal return. The combination of the top
+bit being set and the token type mean that this can't be interpreted as
+a valid token or address.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/asm/gcs.h | 1 +
+ arch/arm64/kernel/signal.c | 134 +++++++++++++++++++++++++++++++++--
+ arch/arm64/mm/gcs.c | 1 +
+ 3 files changed, 131 insertions(+), 5 deletions(-)
+
+diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
+index 48c97e63e56a..f50660603ecf 100644
+--- a/arch/arm64/include/asm/gcs.h
++++ b/arch/arm64/include/asm/gcs.h
+@@ -9,6 +9,7 @@
+ #include <asm/uaccess.h>
+
+ struct kernel_clone_args;
++struct ksignal;
+
+ static inline void gcsb_dsync(void)
+ {
+diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
+index 425b1bc17a3f..7a063d3e2a8d 100644
+--- a/arch/arm64/kernel/signal.c
++++ b/arch/arm64/kernel/signal.c
+@@ -25,6 +25,7 @@
+ #include <asm/elf.h>
+ #include <asm/exception.h>
+ #include <asm/cacheflush.h>
++#include <asm/gcs.h>
+ #include <asm/ucontext.h>
+ #include <asm/unistd.h>
+ #include <asm/fpsimd.h>
+@@ -34,6 +35,37 @@
+ #include <asm/traps.h>
+ #include <asm/vdso.h>
+
++#ifdef CONFIG_ARM64_GCS
++/* Extra bit set in the address distinguishing a signal cap token. */
++#define GCS_SIGNAL_CAP_FLAG BIT(63)
++
++#define GCS_SIGNAL_CAP(addr) ((((unsigned long)addr) & GCS_CAP_ADDR_MASK) | \
++ GCS_SIGNAL_CAP_FLAG)
++
++static bool gcs_signal_cap_valid(u64 addr, u64 val)
++{
++ /*
++ * The top bit should be set, this is an invalid address for
++ * EL0 and will only be set for caps created by signals.
++ */
++ if (!(val & GCS_SIGNAL_CAP_FLAG))
++ return false;
++
++ /* The rest should be a standard architectural cap token. */
++ val &= ~GCS_SIGNAL_CAP_FLAG;
++
++ /* The cap must not have a token set */
++ if (GCS_CAP_TOKEN(val) != 0)
++ return false;
++
++ /* The cap must store the VA the cap was stored at */
++ if (GCS_CAP_ADDR(addr) != GCS_CAP_ADDR(val))
++ return false;
++
++ return true;
++}
++#endif
++
+ /*
+ * Do a signal return; undo the signal stack. These are aligned to 128-bit.
+ */
+@@ -815,6 +847,50 @@ static int restore_sigframe(struct pt_regs *regs,
+ return err;
+ }
+
++#ifdef CONFIG_ARM64_GCS
++static int gcs_restore_signal(void)
++{
++ u64 gcspr_el0, cap;
++ int ret;
++
++ if (!system_supports_gcs())
++ return 0;
++
++ if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
++ return 0;
++
++ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
++
++ /*
++ * GCSPR_EL0 should be pointing at a capped GCS, read the cap...
++ */
++ gcsb_dsync();
++ ret = copy_from_user(&cap, (__user void*)gcspr_el0, sizeof(cap));
++ if (ret)
++ return -EFAULT;
++
++ /*
++ * ...then check that the cap is the actual GCS before
++ * restoring it.
++ */
++ if (!gcs_signal_cap_valid(gcspr_el0, cap))
++ return -EINVAL;
++
++ /* Invalidate the token to prevent reuse */
++ put_user_gcs(0, (__user void*)gcspr_el0, &ret);
++ if (ret != 0)
++ return -EFAULT;
++
++ current->thread.gcspr_el0 = gcspr_el0 + sizeof(cap);
++ write_sysreg_s(current->thread.gcspr_el0, SYS_GCSPR_EL0);
++
++ return 0;
++}
++
++#else
++static int gcs_restore_signal(void) { return 0; }
++#endif
++
+ SYSCALL_DEFINE0(rt_sigreturn)
+ {
+ struct pt_regs *regs = current_pt_regs();
+@@ -841,6 +917,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+
++ if (gcs_restore_signal())
++ goto badframe;
++
+ return regs->regs[0];
+
+ badframe:
+@@ -1071,7 +1150,50 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
+ return 0;
+ }
+
+-static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
++#ifdef CONFIG_ARM64_GCS
++
++static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
++{
++ unsigned long __user *gcspr_el0;
++ int ret = 0;
++
++ if (!system_supports_gcs())
++ return 0;
++
++ if (!task_gcs_el0_enabled(current))
++ return 0;
++
++ /*
++ * We are entering a signal handler, current register state is
++ * active.
++ */
++ gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0);
++
++ /*
++ * Push a cap and the GCS entry for the trampoline onto the GCS.
++ */
++ put_user_gcs((unsigned long)sigtramp, gcspr_el0 - 2, &ret);
++ put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 1), gcspr_el0 - 1, &ret);
++ if (ret != 0)
++ return ret;
++
++ gcsb_dsync();
++
++ gcspr_el0 -= 2;
++ write_sysreg_s((unsigned long)gcspr_el0, SYS_GCSPR_EL0);
++
++ return 0;
++}
++#else
++
++static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
++{
++ return 0;
++}
++
++#endif
++
++static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
+ struct rt_sigframe_user_layout *user, int usig)
+ {
+ __sigrestore_t sigtramp;
+@@ -1079,7 +1201,7 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+ regs->regs[0] = usig;
+ regs->sp = (unsigned long)user->sigframe;
+ regs->regs[29] = (unsigned long)&user->next_frame->fp;
+- regs->pc = (unsigned long)ka->sa.sa_handler;
++ regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
+
+ /*
+ * Signal delivery is a (wacky) indirect function call in
+@@ -1119,12 +1241,14 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+ sme_smstop();
+ }
+
+- if (ka->sa.sa_flags & SA_RESTORER)
+- sigtramp = ka->sa.sa_restorer;
++ if (ksig->ka.sa.sa_flags & SA_RESTORER)
++ sigtramp = ksig->ka.sa.sa_restorer;
+ else
+ sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
+
+ regs->regs[30] = (unsigned long)sigtramp;
++
++ return gcs_signal_entry(sigtramp, ksig);
+ }
+
+ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
+@@ -1147,7 +1271,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
+ err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+ err |= setup_sigframe(&user, regs, set);
+ if (err == 0) {
+- setup_return(regs, &ksig->ka, &user, usig);
++ err = setup_return(regs, ksig, &user, usig);
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+ regs->regs[1] = (unsigned long)&frame->info;
+diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c
+index e238bc9c057d..e6f505c9bf4a 100644
+--- a/arch/arm64/mm/gcs.c
++++ b/arch/arm64/mm/gcs.c
+@@ -6,6 +6,7 @@
+ #include <linux/types.h>
+
+ #include <asm/cpufeature.h>
++#include <asm/gcs.h>
+ #include <asm/page.h>
+
+ static unsigned long alloc_gcs(unsigned long addr, unsigned long size,
+--
+2.34.1
+
+
+From 4f8eb2f612f1df2ca8033dda6114e0e228befdaa Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 1 Jun 2023 16:35:46 +0100
+Subject: [PATCH 33/47] arm64/signal: Expose GCS state in signal frames
+
+Add a context for the GCS state and include it in the signal context when
+running on a system that supports GCS. We reuse the same flags that the
+prctl() uses to specify which GCS features are enabled and also provide the
+current GCS pointer.
+
+We do not support enabling GCS via signal return, there is a conflict
+between specifying GCSPR_EL0 and allocation of a new GCS and this is not
+an ancticipated use case. We also enforce GCS configuration locking on
+signal return.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/uapi/asm/sigcontext.h | 9 ++
+ arch/arm64/kernel/signal.c | 108 +++++++++++++++++++++++
+ 2 files changed, 117 insertions(+)
+
+diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
+index f23c1dc3f002..7b66d245f2d2 100644
+--- a/arch/arm64/include/uapi/asm/sigcontext.h
++++ b/arch/arm64/include/uapi/asm/sigcontext.h
+@@ -168,6 +168,15 @@ struct zt_context {
+ __u16 __reserved[3];
+ };
+
++#define GCS_MAGIC 0x47435300
++
++struct gcs_context {
++ struct _aarch64_ctx head;
++ __u64 gcspr;
++ __u64 features_enabled;
++ __u64 reserved;
++};
++
+ #endif /* !__ASSEMBLY__ */
+
+ #include <asm/sve_context.h>
+diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
+index 7a063d3e2a8d..5b9a45a45f4b 100644
+--- a/arch/arm64/kernel/signal.c
++++ b/arch/arm64/kernel/signal.c
+@@ -88,6 +88,7 @@ struct rt_sigframe_user_layout {
+
+ unsigned long fpsimd_offset;
+ unsigned long esr_offset;
++ unsigned long gcs_offset;
+ unsigned long sve_offset;
+ unsigned long tpidr2_offset;
+ unsigned long za_offset;
+@@ -214,6 +215,8 @@ struct user_ctxs {
+ u32 za_size;
+ struct zt_context __user *zt;
+ u32 zt_size;
++ struct gcs_context __user *gcs;
++ u32 gcs_size;
+ };
+
+ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
+@@ -606,6 +609,83 @@ extern int restore_zt_context(struct user_ctxs *user);
+
+ #endif /* ! CONFIG_ARM64_SME */
+
++#ifdef CONFIG_ARM64_GCS
++
++static int preserve_gcs_context(struct gcs_context __user *ctx)
++{
++ int err = 0;
++ u64 gcspr;
++
++ /*
++ * We will add a cap token to the frame, include it in the
++ * GCSPR_EL0 we report to support stack switching via
++ * sigreturn.
++ */
++ gcs_preserve_current_state();
++ gcspr = current->thread.gcspr_el0;
++ if (task_gcs_el0_enabled(current))
++ gcspr -= 8;
++
++ __put_user_error(GCS_MAGIC, &ctx->head.magic, err);
++ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
++ __put_user_error(gcspr, &ctx->gcspr, err);
++ __put_user_error(0, &ctx->reserved, err);
++ __put_user_error(current->thread.gcs_el0_mode,
++ &ctx->features_enabled, err);
++
++ return err;
++}
++
++static int restore_gcs_context(struct user_ctxs *user)
++{
++ u64 gcspr, enabled;
++ int err = 0;
++
++ if (user->gcs_size != sizeof(*user->gcs))
++ return -EINVAL;
++
++ __get_user_error(gcspr, &user->gcs->gcspr, err);
++ __get_user_error(enabled, &user->gcs->features_enabled, err);
++ if (err)
++ return err;
++
++ /* Don't allow unknown modes */
++ if (enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
++ return -EINVAL;
++
++ err = gcs_check_locked(current, enabled);
++ if (err != 0)
++ return err;
++
++ /* Don't allow enabling */
++ if (!task_gcs_el0_enabled(current) &&
++ (enabled & PR_SHADOW_STACK_ENABLE))
++ return -EINVAL;
++
++ /* If we are disabling disable everything */
++ if (!(enabled & PR_SHADOW_STACK_ENABLE))
++ enabled = 0;
++
++ current->thread.gcs_el0_mode = enabled;
++
++ /*
++ * We let userspace set GCSPR_EL0 to anything here, we will
++ * validate later in gcs_restore_signal().
++ */
++ current->thread.gcspr_el0 = gcspr;
++ write_sysreg_s(current->thread.gcspr_el0, SYS_GCSPR_EL0);
++
++ return 0;
++}
++
++#else /* ! CONFIG_ARM64_GCS */
++
++/* Turn any non-optimised out attempts to use these into a link error: */
++extern int preserve_gcs_context(void __user *ctx);
++extern int restore_gcs_context(struct user_ctxs *user);
++
++#endif /* ! CONFIG_ARM64_GCS */
++
+ static int parse_user_sigframe(struct user_ctxs *user,
+ struct rt_sigframe __user *sf)
+ {
+@@ -622,6 +702,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
+ user->tpidr2 = NULL;
+ user->za = NULL;
+ user->zt = NULL;
++ user->gcs = NULL;
+
+ if (!IS_ALIGNED((unsigned long)base, 16))
+ goto invalid;
+@@ -716,6 +797,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
+ user->zt_size = size;
+ break;
+
++ case GCS_MAGIC:
++ if (!system_supports_gcs())
++ goto invalid;
++
++ if (user->gcs)
++ goto invalid;
++
++ user->gcs = (struct gcs_context __user *)head;
++ user->gcs_size = size;
++ break;
++
+ case EXTRA_MAGIC:
+ if (have_extra_context)
+ goto invalid;
+@@ -835,6 +927,9 @@ static int restore_sigframe(struct pt_regs *regs,
+ err = restore_fpsimd_context(&user);
+ }
+
++ if (err == 0 && system_supports_gcs() && user.gcs)
++ err = restore_gcs_context(&user);
++
+ if (err == 0 && system_supports_tpidr2() && user.tpidr2)
+ err = restore_tpidr2_context(&user);
+
+@@ -954,6 +1049,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
+ return err;
+ }
+
++ if (system_supports_gcs()) {
++ err = sigframe_alloc(user, &user->gcs_offset,
++ sizeof(struct gcs_context));
++ if (err)
++ return err;
++ }
++
+ if (system_supports_sve() || system_supports_sme()) {
+ unsigned int vq = 0;
+
+@@ -1047,6 +1149,12 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
+ __put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
+ }
+
++ if (system_supports_gcs() && err == 0 && user->gcs_offset) {
++ struct gcs_context __user *gcs_ctx =
++ apply_user_offset(user, user->gcs_offset);
++ err |= preserve_gcs_context(gcs_ctx);
++ }
++
+ /* Scalable Vector Extension state (including streaming), if present */
+ if ((system_supports_sve() || system_supports_sme()) &&
+ err == 0 && user->sve_offset) {
+--
+2.34.1
+
+
+From 26cb1cbfb9f942ed9d4194d3cc8917e0c636811b Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 30 Jun 2023 17:32:38 +0100
+Subject: [PATCH 34/47] arm64/ptrace: Expose GCS via ptrace and core files
+
+Provide a new register type NT_ARM_GCS reporting the current GCS mode
+and pointer for EL0. Due to the interactions with allocation and
+deallocation of Guarded Control Stacks we do not permit any changes to
+the GCS mode via ptrace, only GCSPR_EL0 may be changed.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/include/uapi/asm/ptrace.h | 8 ++++
+ arch/arm64/kernel/ptrace.c | 59 ++++++++++++++++++++++++++++
+ include/uapi/linux/elf.h | 1 +
+ 3 files changed, 68 insertions(+)
+
+diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
+index 7fa2f7036aa7..0f39ba4f3efd 100644
+--- a/arch/arm64/include/uapi/asm/ptrace.h
++++ b/arch/arm64/include/uapi/asm/ptrace.h
+@@ -324,6 +324,14 @@ struct user_za_header {
+ #define ZA_PT_SIZE(vq) \
+ (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq))
+
++/* GCS state (NT_ARM_GCS) */
++
++struct user_gcs {
++ __u64 features_enabled;
++ __u64 features_locked;
++ __u64 gcspr_el0;
++};
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _UAPI__ASM_PTRACE_H */
+diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
+index e3bef38fc2e2..e291c0145e94 100644
+--- a/arch/arm64/kernel/ptrace.c
++++ b/arch/arm64/kernel/ptrace.c
+@@ -34,6 +34,7 @@
+ #include <asm/cpufeature.h>
+ #include <asm/debug-monitors.h>
+ #include <asm/fpsimd.h>
++#include <asm/gcs.h>
+ #include <asm/mte.h>
+ #include <asm/pointer_auth.h>
+ #include <asm/stacktrace.h>
+@@ -1411,6 +1412,51 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
+ }
+ #endif
+
++#ifdef CONFIG_ARM64_GCS
++static int gcs_get(struct task_struct *target,
++ const struct user_regset *regset,
++ struct membuf to)
++{
++ struct user_gcs user_gcs;
++
++ if (target == current)
++ gcs_preserve_current_state();
++
++ user_gcs.features_enabled = target->thread.gcs_el0_mode;
++ user_gcs.features_locked = target->thread.gcs_el0_locked;
++ user_gcs.gcspr_el0 = target->thread.gcspr_el0;
++
++ return membuf_write(&to, &user_gcs, sizeof(user_gcs));
++}
++
++static int gcs_set(struct task_struct *target, const struct
++ user_regset *regset, unsigned int pos,
++ unsigned int count, const void *kbuf, const
++ void __user *ubuf)
++{
++ int ret;
++ struct user_gcs user_gcs;
++
++ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1);
++ if (ret)
++ return ret;
++
++ if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
++ return -EINVAL;
++
++ /* Do not allow enable via ptrace */
++ if ((user_gcs.features_enabled & PR_SHADOW_STACK_ENABLE) &&
++ !(target->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
++ return -EBUSY;
++
++ target->thread.gcs_el0_mode = user_gcs.features_enabled;
++ target->thread.gcs_el0_locked = user_gcs.features_locked;
++ target->thread.gcspr_el0 = user_gcs.gcspr_el0;
++
++ return 0;
++}
++#endif
++
+ enum aarch64_regset {
+ REGSET_GPR,
+ REGSET_FPR,
+@@ -1439,6 +1485,9 @@ enum aarch64_regset {
+ #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+ REGSET_TAGGED_ADDR_CTRL,
+ #endif
++#ifdef CONFIG_ARM64_GCS
++ REGSET_GCS,
++#endif
+ };
+
+ static const struct user_regset aarch64_regsets[] = {
+@@ -1590,6 +1639,16 @@ static const struct user_regset aarch64_regsets[] = {
+ .set = tagged_addr_ctrl_set,
+ },
+ #endif
++#ifdef CONFIG_ARM64_GCS
++ [REGSET_GCS] = {
++ .core_note_type = NT_ARM_GCS,
++ .n = sizeof(struct user_gcs) / sizeof(u64),
++ .size = sizeof(u64),
++ .align = sizeof(u64),
++ .regset_get = gcs_get,
++ .set = gcs_set,
++ },
++#endif
+ };
+
+ static const struct user_regset_view user_aarch64_view = {
+diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
+index 9417309b7230..436dfc359f61 100644
+--- a/include/uapi/linux/elf.h
++++ b/include/uapi/linux/elf.h
+@@ -440,6 +440,7 @@ typedef struct elf64_shdr {
+ #define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */
+ #define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
+ #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */
++#define NT_ARM_GCS 0x40e /* ARM GCS state */
+ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
+ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
+ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
+--
+2.34.1
+
+
+From 2f6d799ed18f86452b2c1a07ec1a65a2843cab7e Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Tue, 7 Mar 2023 22:34:05 +0000
+Subject: [PATCH 35/47] arm64: Add Kconfig for Guarded Control Stack (GCS)
+
+Provide a Kconfig option allowing the user to select if GCS support is
+built into the kernel.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ arch/arm64/Kconfig | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index aa7c1d435139..e0048e4660cf 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -2098,6 +2098,26 @@ config ARM64_EPAN
+ if the cpu does not implement the feature.
+ endmenu # "ARMv8.7 architectural features"
+
++menu "v9.4 architectural features"
++
++config ARM64_GCS
++ bool "Enable support for Guarded Control Stack (GCS)"
++ default y
++ select ARCH_HAS_USER_SHADOW_STACK
++ select ARCH_USES_HIGH_VMA_FLAGS
++ help
++ Guarded Control Stack (GCS) provides support for a separate
++ stack with restricted access which contains only return
++ addresses. This can be used to harden against some attacks
++ by comparing return address used by the program with what is
++ stored in the GCS, and may also be used to efficiently obtain
++ the call stack for applications such as profiling.
++
++ The feature is detected at runtime, and will remain disabled
++ if the system does not implement the feature.
++
++endmenu # "v9.4 architectural features"
++
+ config ARM64_SVE
+ bool "ARM Scalable Vector Extension support"
+ default y
+--
+2.34.1
+
+
+From 56346b042191155ca2397d01de908dc8954592e3 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Mon, 20 Mar 2023 18:24:51 +0000
+Subject: [PATCH 36/47] kselftest/arm64: Verify the GCS hwcap
+
+Add coverage of the GCS hwcap to the hwcap selftest, using a read of
+GCSPR_EL0 to generate SIGILL without having to worry about enabling GCS.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/arm64/abi/hwcap.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
+index 1189e77c8152..bc9e3250a9df 100644
+--- a/tools/testing/selftests/arm64/abi/hwcap.c
++++ b/tools/testing/selftests/arm64/abi/hwcap.c
+@@ -63,6 +63,17 @@ static void fp_sigill(void)
+ asm volatile("fmov s0, #1");
+ }
+
++static void gcs_sigill(void)
++{
++ unsigned long *gcspr;
++
++ asm volatile(
++ "mrs %0, S3_3_C2_C5_1"
++ : "=r" (gcspr)
++ :
++ : "cc");
++}
++
+ static void ilrcpc_sigill(void)
+ {
+ /* LDAPUR W0, [SP, #8] */
+@@ -360,6 +371,14 @@ static const struct hwcap_data {
+ .cpuinfo = "fp",
+ .sigill_fn = fp_sigill,
+ },
++ {
++ .name = "GCS",
++ .at_hwcap = AT_HWCAP2,
++ .hwcap_bit = HWCAP2_GCS,
++ .cpuinfo = "gcs",
++ .sigill_fn = gcs_sigill,
++ .sigill_reliable = true,
++ },
+ {
+ .name = "JSCVT",
+ .at_hwcap = AT_HWCAP,
+--
+2.34.1
+
+
+From 38f47a23529268f26d7378710065e378e4022a95 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Mon, 29 Jan 2024 22:45:01 +0000
+Subject: [PATCH 37/47] kselftest: Provide shadow stack enable helpers for
+ arm64
+
+Allow test programs to use the shadow stack helpers on arm64.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/ksft_shstk.h | 37 ++++++++++++++++++++++++++++
+ 1 file changed, 37 insertions(+)
+
+diff --git a/tools/testing/selftests/ksft_shstk.h b/tools/testing/selftests/ksft_shstk.h
+index 85d0747c1802..223e24b4eb80 100644
+--- a/tools/testing/selftests/ksft_shstk.h
++++ b/tools/testing/selftests/ksft_shstk.h
+@@ -50,6 +50,43 @@ static inline __attribute__((always_inline)) void enable_shadow_stack(void)
+
+ #endif
+
++#ifdef __aarch64__
++#define PR_SET_SHADOW_STACK_STATUS 72
++# define PR_SHADOW_STACK_ENABLE (1UL << 0)
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ register long _num __asm__ ("x8") = (num); \
++ register long _arg1 __asm__ ("x0") = (long)(arg1); \
++ register long _arg2 __asm__ ("x1") = (long)(arg2); \
++ register long _arg3 __asm__ ("x2") = 0; \
++ register long _arg4 __asm__ ("x3") = 0; \
++ register long _arg5 __asm__ ("x4") = 0; \
++ \
++ __asm__ volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), \
++ "r"(_arg3), "r"(_arg4), \
++ "r"(_arg5), "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++#define ENABLE_SHADOW_STACK
++static inline __attribute__((always_inline)) void enable_shadow_stack(void)
++{
++ int ret;
++
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ PR_SHADOW_STACK_ENABLE);
++ if (ret == 0)
++ shadow_stack_enabled = true;
++}
++
++#endif
++
+ #ifndef __NR_map_shadow_stack
+ #define __NR_map_shadow_stack 453
+ #endif
+--
+2.34.1
+
+
+From 99d2e9efa5194a3fa58d64e8c89657952607ca49 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 26 Apr 2023 19:05:43 +0100
+Subject: [PATCH 38/47] kselftest/arm64: Add GCS as a detected feature in the
+ signal tests
+
+In preparation for testing GCS related signal handling add it as a feature
+we check for in the signal handling support code.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/arm64/signal/test_signals.h | 2 ++
+ tools/testing/selftests/arm64/signal/test_signals_utils.c | 3 +++
+ 2 files changed, 5 insertions(+)
+
+diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
+index 1e6273d81575..7ada43688c02 100644
+--- a/tools/testing/selftests/arm64/signal/test_signals.h
++++ b/tools/testing/selftests/arm64/signal/test_signals.h
+@@ -35,6 +35,7 @@ enum {
+ FSME_BIT,
+ FSME_FA64_BIT,
+ FSME2_BIT,
++ FGCS_BIT,
+ FMAX_END
+ };
+
+@@ -43,6 +44,7 @@ enum {
+ #define FEAT_SME (1UL << FSME_BIT)
+ #define FEAT_SME_FA64 (1UL << FSME_FA64_BIT)
+ #define FEAT_SME2 (1UL << FSME2_BIT)
++#define FEAT_GCS (1UL << FGCS_BIT)
+
+ /*
+ * A descriptor used to describe and configure a test case.
+diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
+index 0dc948db3a4a..89ef95c1af0e 100644
+--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
++++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
+@@ -30,6 +30,7 @@ static char const *const feats_names[FMAX_END] = {
+ " SME ",
+ " FA64 ",
+ " SME2 ",
++ " GCS ",
+ };
+
+ #define MAX_FEATS_SZ 128
+@@ -329,6 +330,8 @@ int test_init(struct tdescr *td)
+ td->feats_supported |= FEAT_SME_FA64;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
+ td->feats_supported |= FEAT_SME2;
++ if (getauxval(AT_HWCAP2) & HWCAP2_GCS)
++ td->feats_supported |= FEAT_GCS;
+ if (feats_ok(td)) {
+ if (td->feats_required & td->feats_supported)
+ fprintf(stderr,
+--
+2.34.1
+
+
+From 4d1754e3575bc4546e633d957977edb42718fd79 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 1 Jun 2023 17:52:08 +0100
+Subject: [PATCH 39/47] kselftest/arm64: Add framework support for GCS to
+ signal handling tests
+
+Teach the framework about the GCS signal context, avoiding warnings on
+the unknown context.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/arm64/signal/testcases/testcases.c | 7 +++++++
+ tools/testing/selftests/arm64/signal/testcases/testcases.h | 1 +
+ 2 files changed, 8 insertions(+)
+
+diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
+index 9f580b55b388..1cd124732be4 100644
+--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
++++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
+@@ -209,6 +209,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
+ zt = (struct zt_context *)head;
+ new_flags |= ZT_CTX;
+ break;
++ case GCS_MAGIC:
++ if (flags & GCS_CTX)
++ *err = "Multiple GCS_MAGIC";
++ if (head->size != sizeof(struct gcs_context))
++ *err = "Bad size for gcs_context";
++ new_flags |= GCS_CTX;
++ break;
+ case EXTRA_MAGIC:
+ if (flags & EXTRA_CTX)
+ *err = "Multiple EXTRA_MAGIC";
+diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
+index a08ab0d6207a..9b2599745c29 100644
+--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
++++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
+@@ -19,6 +19,7 @@
+ #define ZA_CTX (1 << 2)
+ #define EXTRA_CTX (1 << 3)
+ #define ZT_CTX (1 << 4)
++#define GCS_CTX (1 << 5)
+
+ #define KSFT_BAD_MAGIC 0xdeadbeef
+
+--
+2.34.1
+
+
+From 6cfe93b36e2ac080db62d44ed0bd8a6f972ceffa Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 5 Jul 2023 17:40:22 +0100
+Subject: [PATCH 40/47] kselftest/arm64: Allow signals tests to specify an
+ expected si_code
+
+Currently we ignore si_code unless the expected signal is a SIGSEGV, in
+which case we enforce it being SEGV_ACCERR. Allow test cases to specify
+exactly which si_code should be generated so we can validate this, and
+test for other segfault codes.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ .../selftests/arm64/signal/test_signals.h | 4 +++
+ .../arm64/signal/test_signals_utils.c | 29 ++++++++++++-------
+ 2 files changed, 23 insertions(+), 10 deletions(-)
+
+diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
+index 7ada43688c02..ee75a2c25ce7 100644
+--- a/tools/testing/selftests/arm64/signal/test_signals.h
++++ b/tools/testing/selftests/arm64/signal/test_signals.h
+@@ -71,6 +71,10 @@ struct tdescr {
+ * Zero when no signal is expected on success
+ */
+ int sig_ok;
++ /*
++ * expected si_code for sig_ok, or 0 to not check
++ */
++ int sig_ok_code;
+ /* signum expected on unsupported CPU features. */
+ int sig_unsupp;
+ /* a timeout in second for test completion */
+diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
+index 89ef95c1af0e..63deca32b0df 100644
+--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
++++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
+@@ -143,16 +143,25 @@ static bool handle_signal_ok(struct tdescr *td,
+ "current->token ZEROED...test is probably broken!\n");
+ abort();
+ }
+- /*
+- * Trying to narrow down the SEGV to the ones generated by Kernel itself
+- * via arm64_notify_segfault(). This is a best-effort check anyway, and
+- * the si_code check may need to change if this aspect of the kernel
+- * ABI changes.
+- */
+- if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) {
+- fprintf(stdout,
+- "si_code != SEGV_ACCERR...test is probably broken!\n");
+- abort();
++ if (td->sig_ok_code) {
++ if (si->si_code != td->sig_ok_code) {
++ fprintf(stdout, "si_code is %d not %d\n",
++ si->si_code, td->sig_ok_code);
++ abort();
++ }
++ } else {
++ /*
++ * Trying to narrow down the SEGV to the ones
++ * generated by Kernel itself via
++ * arm64_notify_segfault(). This is a best-effort
++ * check anyway, and the si_code check may need to
++ * change if this aspect of the kernel ABI changes.
++ */
++ if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) {
++ fprintf(stdout,
++ "si_code != SEGV_ACCERR...test is probably broken!\n");
++ abort();
++ }
+ }
+ td->pass = 1;
+ /*
+--
+2.34.1
+
+
+From c48cd5f2f586f4bfc9b17e6101727933d428511e Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 5 Jul 2023 14:43:32 +0100
+Subject: [PATCH 41/47] kselftest/arm64: Always run signals tests with GCS
+ enabled
+
+Since it is not possible to return from the function that enabled GCS
+without disabling GCS it is very inconvenient to use the signal handling
+tests to cover GCS when GCS is not enabled by the toolchain and runtime,
+something that no current distribution does. Since none of the testcases
+do anything with stacks that would cause problems with GCS we can sidestep
+this issue by unconditionally enabling GCS on startup and exiting with a
+call to exit() rather than a return from main().
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ .../selftests/arm64/signal/test_signals.c | 17 ++++++++++-
+ .../arm64/signal/test_signals_utils.h | 29 +++++++++++++++++++
+ 2 files changed, 45 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c
+index 00051b40d71e..30e95f50db19 100644
+--- a/tools/testing/selftests/arm64/signal/test_signals.c
++++ b/tools/testing/selftests/arm64/signal/test_signals.c
+@@ -7,6 +7,10 @@
+ * Each test provides its own tde struct tdescr descriptor to link with
+ * this wrapper. Framework provides common helpers.
+ */
++
++#include <sys/auxv.h>
++#include <sys/prctl.h>
++
+ #include <kselftest.h>
+
+ #include "test_signals.h"
+@@ -16,6 +20,16 @@ struct tdescr *current = &tde;
+
+ int main(int argc, char *argv[])
+ {
++ /*
++ * Ensure GCS is at least enabled throughout the tests if
++ * supported, otherwise the inability to return from the
++ * function that enabled GCS makes it very inconvenient to set
++ * up test cases. The prctl() may fail if GCS was locked by
++ * libc setup code.
++ */
++ if (getauxval(AT_HWCAP2) & HWCAP2_GCS)
++ gcs_set_state(PR_SHADOW_STACK_ENABLE);
++
+ ksft_print_msg("%s :: %s\n", current->name, current->descr);
+ if (test_setup(current) && test_init(current)) {
+ test_run(current);
+@@ -23,5 +37,6 @@ int main(int argc, char *argv[])
+ }
+ test_result(current);
+
+- return current->result;
++ /* Do not return in case GCS was enabled */
++ exit(current->result);
+ }
+diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
+index 762c8fe9c54a..1e80808ee105 100644
+--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
++++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
+@@ -18,6 +18,35 @@ void test_cleanup(struct tdescr *td);
+ int test_run(struct tdescr *td);
+ void test_result(struct tdescr *td);
+
++#ifndef __NR_prctl
++#define __NR_prctl 167
++#endif
++
++/*
++ * The prctl takes 1 argument but we need to ensure that the other
++ * values passed in registers to the syscall are zero since the kernel
++ * validates them.
++ */
++#define gcs_set_state(state) \
++ ({ \
++ register long _num __asm__ ("x8") = __NR_prctl; \
++ register long _arg1 __asm__ ("x0") = PR_SET_SHADOW_STACK_STATUS; \
++ register long _arg2 __asm__ ("x1") = (long)(state); \
++ register long _arg3 __asm__ ("x2") = 0; \
++ register long _arg4 __asm__ ("x3") = 0; \
++ register long _arg5 __asm__ ("x4") = 0; \
++ \
++ __asm__ volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), \
++ "r"(_arg3), "r"(_arg4), \
++ "r"(_arg5), "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++ })
++
+ static inline bool feats_ok(struct tdescr *td)
+ {
+ if (td->feats_incompatible & td->feats_supported)
+--
+2.34.1
+
+
+From dff5594c2072aa86c028171e06c5c706dc632a0c Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 6 Apr 2023 00:35:19 +0100
+Subject: [PATCH 42/47] kselftest/arm64: Add very basic GCS test program
+
+This test program just covers the basic GCS ABI, covering aspects of the
+ABI as standalone features without attempting to integrate things.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/arm64/Makefile | 2 +-
+ tools/testing/selftests/arm64/gcs/.gitignore | 1 +
+ tools/testing/selftests/arm64/gcs/Makefile | 18 +
+ tools/testing/selftests/arm64/gcs/basic-gcs.c | 431 ++++++++++++++++++
+ tools/testing/selftests/arm64/gcs/gcs-util.h | 90 ++++
+ 5 files changed, 541 insertions(+), 1 deletion(-)
+ create mode 100644 tools/testing/selftests/arm64/gcs/.gitignore
+ create mode 100644 tools/testing/selftests/arm64/gcs/Makefile
+ create mode 100644 tools/testing/selftests/arm64/gcs/basic-gcs.c
+ create mode 100644 tools/testing/selftests/arm64/gcs/gcs-util.h
+
+diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
+index 28b93cab8c0d..22029e60eff3 100644
+--- a/tools/testing/selftests/arm64/Makefile
++++ b/tools/testing/selftests/arm64/Makefile
+@@ -4,7 +4,7 @@
+ ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+
+ ifneq (,$(filter $(ARCH),aarch64 arm64))
+-ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi
++ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi gcs
+ else
+ ARM64_SUBTARGETS :=
+ endif
+diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore
+new file mode 100644
+index 000000000000..0e5e695ecba5
+--- /dev/null
++++ b/tools/testing/selftests/arm64/gcs/.gitignore
+@@ -0,0 +1 @@
++basic-gcs
+diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile
+new file mode 100644
+index 000000000000..61a30f483429
+--- /dev/null
++++ b/tools/testing/selftests/arm64/gcs/Makefile
+@@ -0,0 +1,18 @@
++# SPDX-License-Identifier: GPL-2.0
++# Copyright (C) 2023 ARM Limited
++#
++# In order to avoid interaction with the toolchain and dynamic linker the
++# portions of these tests that interact with the GCS are implemented using
++# nolibc.
++#
++
++TEST_GEN_PROGS := basic-gcs
++
++include ../../lib.mk
++
++$(OUTPUT)/basic-gcs: basic-gcs.c
++ $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
++ -static -include ../../../../include/nolibc/nolibc.h \
++ -I../../../../../usr/include \
++ -std=gnu99 -I../.. -g \
++ -ffreestanding -Wall $^ -o $@ -lgcc
+diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c
+new file mode 100644
+index 000000000000..b3522d606a58
+--- /dev/null
++++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c
+@@ -0,0 +1,431 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (C) 2023 ARM Limited.
++ */
++
++#include <limits.h>
++#include <stdbool.h>
++
++#include <linux/prctl.h>
++
++#include <sys/mman.h>
++#include <asm/mman.h>
++#include <linux/sched.h>
++
++#include "kselftest.h"
++#include "gcs-util.h"
++
++/* nolibc doesn't have sysconf(), just hard code the maximum */
++static size_t page_size = 65536;
++
++static __attribute__((noinline)) void valid_gcs_function(void)
++{
++ /* Do something the compiler can't optimise out */
++ my_syscall1(__NR_prctl, PR_SVE_GET_VL);
++}
++
++static inline int gcs_set_status(unsigned long mode)
++{
++ bool enabling = mode & PR_SHADOW_STACK_ENABLE;
++ int ret;
++ unsigned long new_mode;
++
++ /*
++ * The prctl takes 1 argument but we need to ensure that the
++ * other 3 values passed in registers to the syscall are zero
++ * since the kernel validates them.
++ */
++ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode,
++ 0, 0, 0);
++
++ if (ret == 0) {
++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
++ &new_mode, 0, 0, 0);
++ if (ret == 0) {
++ if (new_mode != mode) {
++ ksft_print_msg("Mode set to %x not %x\n",
++ new_mode, mode);
++ ret = -EINVAL;
++ }
++ } else {
++ ksft_print_msg("Failed to validate mode: %d\n", ret);
++ }
++
++ if (enabling != chkfeat_gcs()) {
++ ksft_print_msg("%senabled by prctl but %senabled in CHKFEAT\n",
++ enabling ? "" : "not ",
++ chkfeat_gcs() ? "" : "not ");
++ ret = -EINVAL;
++ }
++ }
++
++ return ret;
++}
++
++/* Try to read the status */
++static bool read_status(void)
++{
++ unsigned long state;
++ int ret;
++
++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
++ &state, 0, 0, 0);
++ if (ret != 0) {
++ ksft_print_msg("Failed to read state: %d\n", ret);
++ return false;
++ }
++
++ return state & PR_SHADOW_STACK_ENABLE;
++}
++
++/* Just a straight enable */
++static bool base_enable(void)
++{
++ int ret;
++
++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE);
++ if (ret) {
++ ksft_print_msg("PR_SHADOW_STACK_ENABLE failed %d\n", ret);
++ return false;
++ }
++
++ return true;
++}
++
++/* Check we can read GCSPR_EL0 when GCS is enabled */
++static bool read_gcspr_el0(void)
++{
++ unsigned long *gcspr_el0;
++
++ ksft_print_msg("GET GCSPR\n");
++ gcspr_el0 = get_gcspr();
++ ksft_print_msg("GCSPR_EL0 is %p\n", gcspr_el0);
++
++ return true;
++}
++
++/* Also allow writes to stack */
++static bool enable_writeable(void)
++{
++ int ret;
++
++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE);
++ if (ret) {
++ ksft_print_msg("PR_SHADOW_STACK_ENABLE writeable failed: %d\n", ret);
++ return false;
++ }
++
++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE);
++ if (ret) {
++ ksft_print_msg("failed to restore plain enable %d\n", ret);
++ return false;
++ }
++
++ return true;
++}
++
++/* Also allow writes to stack */
++static bool enable_push_pop(void)
++{
++ int ret;
++
++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH);
++ if (ret) {
++ ksft_print_msg("PR_SHADOW_STACK_ENABLE with push failed: %d\n",
++ ret);
++ return false;
++ }
++
++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE);
++ if (ret) {
++ ksft_print_msg("failed to restore plain enable %d\n", ret);
++ return false;
++ }
++
++ return true;
++}
++
++/* Enable GCS and allow everything */
++static bool enable_all(void)
++{
++ int ret;
++
++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH |
++ PR_SHADOW_STACK_WRITE);
++ if (ret) {
++ ksft_print_msg("PR_SHADOW_STACK_ENABLE with everything failed: %d\n",
++ ret);
++ return false;
++ }
++
++ ret = gcs_set_status(PR_SHADOW_STACK_ENABLE);
++ if (ret) {
++ ksft_print_msg("failed to restore plain enable %d\n", ret);
++ return false;
++ }
++
++ return true;
++}
++
++static bool enable_invalid(void)
++{
++ int ret = gcs_set_status(ULONG_MAX);
++ if (ret == 0) {
++ ksft_print_msg("GCS_SET_STATUS %lx succeeded\n", ULONG_MAX);
++ return false;
++ }
++
++ return true;
++}
++
++/* Map a GCS */
++static bool map_guarded_stack(void)
++{
++ int ret;
++ uint64_t *buf;
++ uint64_t expected_cap;
++ int elem;
++ bool pass = true;
++
++ buf = (void *)my_syscall3(__NR_map_shadow_stack, 0, page_size,
++ SHADOW_STACK_SET_MARKER |
++ SHADOW_STACK_SET_TOKEN);
++ if (buf == MAP_FAILED) {
++ ksft_print_msg("Failed to map %d byte GCS: %d\n",
++ page_size, errno);
++ return false;
++ }
++ ksft_print_msg("Mapped GCS at %p-%p\n", buf,
++ (uint64_t)buf + page_size);
++
++ /* The top of the newly allocated region should be 0 */
++ elem = (page_size / sizeof(uint64_t)) - 1;
++ if (buf[elem]) {
++ ksft_print_msg("Last entry is 0x%lx not 0x0\n", buf[elem]);
++ pass = false;
++ }
++
++ /* Then a valid cap token */
++ elem--;
++ expected_cap = ((uint64_t)buf + page_size - 16);
++ expected_cap &= GCS_CAP_ADDR_MASK;
++ expected_cap |= GCS_CAP_VALID_TOKEN;
++ if (buf[elem] != expected_cap) {
++ ksft_print_msg("Cap entry is 0x%lx not 0x%lx\n",
++ buf[elem], expected_cap);
++ pass = false;
++ }
++ ksft_print_msg("cap token is 0x%lx\n", buf[elem]);
++
++ /* The rest should be zeros */
++ for (elem = 0; elem < page_size / sizeof(uint64_t) - 2; elem++) {
++ if (!buf[elem])
++ continue;
++ ksft_print_msg("GCS slot %d is 0x%lx not 0x0\n",
++ elem, buf[elem]);
++ pass = false;
++ }
++
++ ret = munmap(buf, page_size);
++ if (ret != 0) {
++ ksft_print_msg("Failed to unmap %d byte GCS: %d\n",
++ page_size, errno);
++ pass = false;
++ }
++
++ return pass;
++}
++
++/* A fork()ed process can run */
++static bool test_fork(void)
++{
++ unsigned long child_mode;
++ int ret, status;
++ pid_t pid;
++ bool pass = true;
++
++ pid = fork();
++ if (pid == -1) {
++ ksft_print_msg("fork() failed: %d\n", errno);
++ pass = false;
++ goto out;
++ }
++ if (pid == 0) {
++ /* In child, make sure we can call a function, read
++ * the GCS pointer and status and then exit */
++ valid_gcs_function();
++ get_gcspr();
++
++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
++ &child_mode, 0, 0, 0);
++ if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) {
++ ksft_print_msg("GCS not enabled in child\n");
++ ret = -EINVAL;
++ }
++
++ exit(ret);
++ }
++
++ /*
++ * In parent, check we can still do function calls then block
++ * for the child.
++ */
++ valid_gcs_function();
++
++ ksft_print_msg("Waiting for child %d\n", pid);
++
++ ret = waitpid(pid, &status, 0);
++ if (ret == -1) {
++ ksft_print_msg("Failed to wait for child: %d\n",
++ errno);
++ return false;
++ }
++
++ if (!WIFEXITED(status)) {
++ ksft_print_msg("Child exited due to signal %d\n",
++ WTERMSIG(status));
++ pass = false;
++ } else {
++ if (WEXITSTATUS(status)) {
++ ksft_print_msg("Child exited with status %d\n",
++ WEXITSTATUS(status));
++ pass = false;
++ }
++ }
++
++out:
++
++ return pass;
++}
++
++/* Check that we can explicitly specify a GCS via clone3() */
++static bool test_clone3(void)
++{
++ struct clone_args args;
++ unsigned long child_mode;
++ pid_t pid = -1;
++ int status, ret;
++ bool pass;
++
++ memset(&args, 0, sizeof(args));
++ args.flags = CLONE_VM;
++ args.shadow_stack = my_syscall3(__NR_map_shadow_stack, 0, page_size,
++ SHADOW_STACK_SET_MARKER |
++ SHADOW_STACK_SET_TOKEN);
++ args.shadow_stack_size = page_size;
++
++ pid = my_syscall2(__NR_clone3, &args, sizeof(args));
++ if (pid < 0) {
++ ksft_print_msg("clone3() failed: %d\n", errno);
++ pass = false;
++ goto out;
++ }
++
++ /* In child? */
++ if (pid == 0) {
++ /* Do we have GCS enabled? */
++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
++ &child_mode, 0, 0, 0);
++ if (ret != 0) {
++ ksft_print_msg("PR_GET_SHADOW_STACK_STATUS failed: %d\n",
++ ret);
++ exit(EXIT_FAILURE);
++ }
++
++ if (!(child_mode & PR_SHADOW_STACK_ENABLE)) {
++ ksft_print_msg("GCS not enabled in child\n");
++ exit(EXIT_FAILURE);
++ }
++
++ ksft_print_msg("GCS enabled in child\n");
++
++ /* We've probably already called a function but make sure */
++ valid_gcs_function();
++
++ exit(EXIT_SUCCESS);
++ }
++
++ if (waitpid(-1, &status, __WALL) < 0) {
++ ksft_print_msg("waitpid() failed %d\n", errno);
++ pass = false;
++ goto out;
++ }
++ if (WIFEXITED(status)) {
++ if (WEXITSTATUS(status) == EXIT_SUCCESS) {
++ pass = true;
++ } else {
++ ksft_print_msg("Child returned status %d\n",
++ WEXITSTATUS(status));
++ pass = false;
++ }
++ } else if (WIFSIGNALED(status)) {
++ ksft_print_msg("Child exited due to signal %d\n",
++ WTERMSIG(status));
++ pass = false;
++ } else {
++ ksft_print_msg("Child exited uncleanly\n");
++ pass = false;
++ }
++
++out:
++ return pass;
++}
++
++typedef bool (*gcs_test)(void);
++
++static struct {
++ char *name;
++ gcs_test test;
++ bool needs_enable;
++} tests[] = {
++ { "read_status", read_status },
++ { "base_enable", base_enable, true },
++ { "read_gcspr_el0", read_gcspr_el0 },
++ { "enable_writeable", enable_writeable, true },
++ { "enable_push_pop", enable_push_pop, true },
++ { "enable_all", enable_all, true },
++ { "enable_invalid", enable_invalid, true },
++ { "map_guarded_stack", map_guarded_stack },
++ { "fork", test_fork },
++ { "clone3", test_clone3 },
++};
++
++int main(void)
++{
++ int i, ret;
++ unsigned long gcs_mode;
++
++ ksft_print_header();
++
++ /*
++ * We don't have getauxval() with nolibc so treat a failure to
++ * read GCS state as a lack of support and skip.
++ */
++ ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
++ &gcs_mode, 0, 0, 0);
++ if (ret != 0)
++ ksft_exit_skip("Failed to read GCS state: %d\n", ret);
++
++ if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) {
++ gcs_mode = PR_SHADOW_STACK_ENABLE;
++ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ gcs_mode, 0, 0, 0);
++ if (ret != 0)
++ ksft_exit_fail_msg("Failed to enable GCS: %d\n", ret);
++ }
++
++ ksft_set_plan(ARRAY_SIZE(tests));
++
++ for (i = 0; i < ARRAY_SIZE(tests); i++) {
++ ksft_test_result((*tests[i].test)(), "%s\n", tests[i].name);
++ }
++
++ /* One last test: disable GCS, we can do this one time */
++ my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
++ if (ret != 0)
++ ksft_print_msg("Failed to disable GCS: %d\n", ret);
++
++ ksft_finished();
++
++ return 0;
++}
+diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h
+new file mode 100644
+index 000000000000..b37801c95604
+--- /dev/null
++++ b/tools/testing/selftests/arm64/gcs/gcs-util.h
+@@ -0,0 +1,90 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Copyright (C) 2023 ARM Limited.
++ */
++
++#ifndef GCS_UTIL_H
++#define GCS_UTIL_H
++
++#include <stdbool.h>
++
++#ifndef __NR_map_shadow_stack
++#define __NR_map_shadow_stack 453
++#endif
++
++#ifndef __NR_prctl
++#define __NR_prctl 167
++#endif
++
++/* Shadow Stack/Guarded Control Stack interface */
++#define PR_GET_SHADOW_STACK_STATUS 71
++#define PR_SET_SHADOW_STACK_STATUS 72
++#define PR_LOCK_SHADOW_STACK_STATUS 73
++
++# define PR_SHADOW_STACK_ENABLE (1UL << 0)
++# define PR_SHADOW_STACK_WRITE (1UL << 1)
++# define PR_SHADOW_STACK_PUSH (1UL << 2)
++
++#define PR_SHADOW_STACK_ALL_MODES \
++ PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH
++
++#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
++#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack merker in the shadow stack */
++
++#define GCS_CAP_ADDR_MASK (0xfffffffffffff000UL)
++#define GCS_CAP_TOKEN_MASK (0x0000000000000fffUL)
++#define GCS_CAP_VALID_TOKEN 1
++#define GCS_CAP_IN_PROGRESS_TOKEN 5
++
++#define GCS_CAP(x) (((unsigned long)(x) & GCS_CAP_ADDR_MASK) | \
++ GCS_CAP_VALID_TOKEN)
++
++static inline unsigned long *get_gcspr(void)
++{
++ unsigned long *gcspr;
++
++ asm volatile(
++ "mrs %0, S3_3_C2_C5_1"
++ : "=r" (gcspr)
++ :
++ : "cc");
++
++ return gcspr;
++}
++
++static inline void __attribute__((always_inline)) gcsss1(unsigned long *Xt)
++{
++ asm volatile (
++ "sys #3, C7, C7, #2, %0\n"
++ :
++ : "rZ" (Xt)
++ : "memory");
++}
++
++static inline unsigned long __attribute__((always_inline)) *gcsss2(void)
++{
++ unsigned long *Xt;
++
++ asm volatile(
++ "SYSL %0, #3, C7, C7, #3\n"
++ : "=r" (Xt)
++ :
++ : "memory");
++
++ return Xt;
++}
++
++static inline bool chkfeat_gcs(void)
++{
++ register long val __asm__ ("x16") = 1;
++
++ /* CHKFEAT x16 */
++ asm volatile(
++ "hint #0x28\n"
++ : "=r" (val)
++ : "r" (val));
++
++ return val != 1;
++}
++
++#endif
+--
+2.34.1
+
+
+From 21b5f923dd2284877481d62e62994edfae826f71 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 28 Apr 2023 18:06:06 +0100
+Subject: [PATCH 43/47] kselftest/arm64: Add a GCS test program built with the
+ system libc
+
+There are things like threads which nolibc struggles with which we want
+to add coverage for, and the ABI allows us to test most of these even if
+libc itself does not understand GCS so add a test application built
+using the system libc.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/arm64/gcs/.gitignore | 1 +
+ tools/testing/selftests/arm64/gcs/Makefile | 4 +-
+ tools/testing/selftests/arm64/gcs/gcs-util.h | 10 +
+ tools/testing/selftests/arm64/gcs/libc-gcs.c | 736 +++++++++++++++++++
+ 4 files changed, 750 insertions(+), 1 deletion(-)
+ create mode 100644 tools/testing/selftests/arm64/gcs/libc-gcs.c
+
+diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore
+index 0e5e695ecba5..5810c4a163d4 100644
+--- a/tools/testing/selftests/arm64/gcs/.gitignore
++++ b/tools/testing/selftests/arm64/gcs/.gitignore
+@@ -1 +1,2 @@
+ basic-gcs
++libc-gcs
+diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile
+index 61a30f483429..a8fdf21e9a47 100644
+--- a/tools/testing/selftests/arm64/gcs/Makefile
++++ b/tools/testing/selftests/arm64/gcs/Makefile
+@@ -6,7 +6,9 @@
+ # nolibc.
+ #
+
+-TEST_GEN_PROGS := basic-gcs
++TEST_GEN_PROGS := basic-gcs libc-gcs
++
++LDLIBS+=-lpthread
+
+ include ../../lib.mk
+
+diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h
+index b37801c95604..4bafd1d7feb5 100644
+--- a/tools/testing/selftests/arm64/gcs/gcs-util.h
++++ b/tools/testing/selftests/arm64/gcs/gcs-util.h
+@@ -16,6 +16,16 @@
+ #define __NR_prctl 167
+ #endif
+
++#ifndef NT_ARM_GCS
++#define NT_ARM_GCS 0x40e
++
++struct user_gcs {
++ __u64 features_enabled;
++ __u64 features_locked;
++ __u64 gcspr_el0;
++};
++#endif
++
+ /* Shadow Stack/Guarded Control Stack interface */
+ #define PR_GET_SHADOW_STACK_STATUS 71
+ #define PR_SET_SHADOW_STACK_STATUS 72
+diff --git a/tools/testing/selftests/arm64/gcs/libc-gcs.c b/tools/testing/selftests/arm64/gcs/libc-gcs.c
+new file mode 100644
+index 000000000000..937f8bee7bdd
+--- /dev/null
++++ b/tools/testing/selftests/arm64/gcs/libc-gcs.c
+@@ -0,0 +1,736 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (C) 2023 ARM Limited.
++ */
++
++#define _GNU_SOURCE
++
++#include <pthread.h>
++#include <stdbool.h>
++
++#include <sys/auxv.h>
++#include <sys/mman.h>
++#include <sys/prctl.h>
++#include <sys/ptrace.h>
++#include <sys/uio.h>
++
++#include <asm/hwcap.h>
++#include <asm/mman.h>
++
++#include <linux/compiler.h>
++
++#include "kselftest_harness.h"
++
++#include "gcs-util.h"
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ register long _num __asm__ ("x8") = (num); \
++ register long _arg1 __asm__ ("x0") = (long)(arg1); \
++ register long _arg2 __asm__ ("x1") = (long)(arg2); \
++ register long _arg3 __asm__ ("x2") = 0; \
++ register long _arg4 __asm__ ("x3") = 0; \
++ register long _arg5 __asm__ ("x4") = 0; \
++ \
++ __asm__ volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), \
++ "r"(_arg3), "r"(_arg4), \
++ "r"(_arg5), "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++static noinline void gcs_recurse(int depth)
++{
++ if (depth)
++ gcs_recurse(depth - 1);
++
++ /* Prevent tail call optimization so we actually recurse */
++ asm volatile("dsb sy" : : : "memory");
++}
++
++/* Smoke test that a function call and return works*/
++TEST(can_call_function)
++{
++ gcs_recurse(0);
++}
++
++static void *gcs_test_thread(void *arg)
++{
++ int ret;
++ unsigned long mode;
++
++ /*
++ * Some libcs don't seem to fill unused arguments with 0 but
++ * the kernel validates this so we supply all 5 arguments.
++ */
++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
++ if (ret != 0) {
++ ksft_print_msg("PR_GET_SHADOW_STACK_STATUS failed: %d\n", ret);
++ return NULL;
++ }
++
++ if (!(mode & PR_SHADOW_STACK_ENABLE)) {
++ ksft_print_msg("GCS not enabled in thread, mode is %u\n",
++ mode);
++ return NULL;
++ }
++
++ /* Just in case... */
++ gcs_recurse(0);
++
++ /* Use a non-NULL value to indicate a pass */
++ return &gcs_test_thread;
++}
++
++/* Verify that if we start a new thread it has GCS enabled */
++TEST(gcs_enabled_thread)
++{
++ pthread_t thread;
++ void *thread_ret;
++ int ret;
++
++ ret = pthread_create(&thread, NULL, gcs_test_thread, NULL);
++ ASSERT_TRUE(ret == 0);
++ if (ret != 0)
++ return;
++
++ ret = pthread_join(thread, &thread_ret);
++ ASSERT_TRUE(ret == 0);
++ if (ret != 0)
++ return;
++
++ ASSERT_TRUE(thread_ret != NULL);
++}
++
++/* Read the GCS until we find the terminator */
++TEST(gcs_find_terminator)
++{
++ unsigned long *gcs, *cur;
++
++ gcs = get_gcspr();
++ cur = gcs;
++ while (*cur)
++ cur++;
++
++ ksft_print_msg("GCS in use from %p-%p\n", gcs, cur);
++
++ /*
++ * We should have at least whatever called into this test so
++ * the two pointer should differ.
++ */
++ ASSERT_TRUE(gcs != cur);
++}
++
++/*
++ * We can access a GCS via ptrace
++ *
++ * This could usefully have a fixture but note that each test is
++ * fork()ed into a new child whcih causes issues. Might be better to
++ * lift at least some of this out into a separate, non-harness, test
++ * program.
++ */
++TEST(ptrace_read_write)
++{
++ pid_t child, pid;
++ int ret, status;
++ siginfo_t si;
++ uint64_t val, rval, gcspr;
++ struct user_gcs child_gcs;
++ struct iovec iov, local_iov, remote_iov;
++
++ child = fork();
++ if (child == -1) {
++ ksft_print_msg("fork() failed: %d (%s)\n",
++ errno, strerror(errno));
++ ASSERT_NE(child, -1);
++ }
++
++ if (child == 0) {
++ /*
++ * In child, make sure there's something on the stack and
++ * ask to be traced.
++ */
++ gcs_recurse(0);
++ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
++ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
++
++ if (raise(SIGSTOP))
++ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
++
++ return;
++ }
++
++ ksft_print_msg("Child: %d\n", child);
++
++ /* Attach to the child */
++ while (1) {
++ int sig;
++
++ pid = wait(&status);
++ if (pid == -1) {
++ ksft_print_msg("wait() failed: %s",
++ strerror(errno));
++ goto error;
++ }
++
++ /*
++ * This should never happen but it's hard to flag in
++ * the framework.
++ */
++ if (pid != child)
++ continue;
++
++ if (WIFEXITED(status) || WIFSIGNALED(status))
++ ksft_exit_fail_msg("Child died unexpectedly\n");
++
++ if (!WIFSTOPPED(status))
++ goto error;
++
++ sig = WSTOPSIG(status);
++
++ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
++ if (errno == ESRCH) {
++ ASSERT_NE(errno, ESRCH);
++ return;
++ }
++
++ if (errno == EINVAL) {
++ sig = 0; /* bust group-stop */
++ goto cont;
++ }
++
++ ksft_print_msg("PTRACE_GETSIGINFO: %s\n",
++ strerror(errno));
++ goto error;
++ }
++
++ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
++ si.si_pid == pid)
++ break;
++
++ cont:
++ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
++ if (errno == ESRCH) {
++ ASSERT_NE(errno, ESRCH);
++ return;
++ }
++
++ ksft_print_msg("PTRACE_CONT: %s\n", strerror(errno));
++ goto error;
++ }
++ }
++
++ /* Where is the child GCS? */
++ iov.iov_base = &child_gcs;
++ iov.iov_len = sizeof(child_gcs);
++ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_GCS, &iov);
++ if (ret != 0) {
++ ksft_print_msg("Failed to read child GCS state: %s (%d)\n",
++ strerror(errno), errno);
++ goto error;
++ }
++
++ /* We should have inherited GCS over fork(), confirm */
++ if (!(child_gcs.features_enabled & PR_SHADOW_STACK_ENABLE)) {
++ ASSERT_TRUE(child_gcs.features_enabled &
++ PR_SHADOW_STACK_ENABLE);
++ goto error;
++ }
++
++ gcspr = child_gcs.gcspr_el0;
++ ksft_print_msg("Child GCSPR 0x%lx, flags %x, locked %x\n",
++ gcspr, child_gcs.features_enabled,
++ child_gcs.features_locked);
++
++ /* Ideally we'd cross check with the child memory map */
++
++ errno = 0;
++ val = ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL);
++ ret = errno;
++ if (ret != 0)
++ ksft_print_msg("PTRACE_PEEKDATA failed: %s (%d)\n",
++ strerror(ret), ret);
++ EXPECT_EQ(ret, 0);
++
++ /* The child should be in a function, the GCSPR shouldn't be 0 */
++ EXPECT_NE(val, 0);
++
++ /* Same thing via process_vm_readv() */
++ local_iov.iov_base = &rval;
++ local_iov.iov_len = sizeof(rval);
++ remote_iov.iov_base = (void *)gcspr;
++ remote_iov.iov_len = sizeof(rval);
++ ret = process_vm_readv(child, &local_iov, 1, &remote_iov, 1, 0);
++ if (ret == -1)
++ ksft_print_msg("process_vm_readv() failed: %s (%d)\n",
++ strerror(errno), errno);
++ EXPECT_EQ(ret, sizeof(rval));
++ EXPECT_EQ(val, rval);
++
++ /* Write data via a peek */
++ ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, NULL);
++ if (ret == -1)
++ ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n",
++ strerror(errno), errno);
++ EXPECT_EQ(ret, 0);
++ EXPECT_EQ(0, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL));
++
++ /* Restore what we had before */
++ ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, val);
++ if (ret == -1)
++ ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n",
++ strerror(errno), errno);
++ EXPECT_EQ(ret, 0);
++ EXPECT_EQ(val, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL));
++
++ /* That's all, folks */
++ kill(child, SIGKILL);
++ return;
++
++error:
++ kill(child, SIGKILL);
++ ASSERT_FALSE(true);
++}
++
++FIXTURE(map_gcs)
++{
++ unsigned long *stack;
++};
++
++FIXTURE_VARIANT(map_gcs)
++{
++ size_t stack_size;
++ unsigned long flags;
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s2k_cap_marker)
++{
++ .stack_size = 2 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s2k_cap)
++{
++ .stack_size = 2 * 1024,
++ .flags = SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s2k_marker)
++{
++ .stack_size = 2 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s2k)
++{
++ .stack_size = 2 * 1024,
++ .flags = 0,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s4k_cap_marker)
++{
++ .stack_size = 4 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s4k_cap)
++{
++ .stack_size = 4 * 1024,
++ .flags = SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s3k_marker)
++{
++ .stack_size = 4 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s4k)
++{
++ .stack_size = 4 * 1024,
++ .flags = 0,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s16k_cap_marker)
++{
++ .stack_size = 16 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s16k_cap)
++{
++ .stack_size = 16 * 1024,
++ .flags = SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s16k_marker)
++{
++ .stack_size = 16 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s16k)
++{
++ .stack_size = 16 * 1024,
++ .flags = 0,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s64k_cap_marker)
++{
++ .stack_size = 64 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s64k_cap)
++{
++ .stack_size = 64 * 1024,
++ .flags = SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s64k_marker)
++{
++ .stack_size = 64 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s64k)
++{
++ .stack_size = 64 * 1024,
++ .flags = 0,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s128k_cap_marker)
++{
++ .stack_size = 128 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s128k_cap)
++{
++ .stack_size = 128 * 1024,
++ .flags = SHADOW_STACK_SET_TOKEN,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s128k_marker)
++{
++ .stack_size = 128 * 1024,
++ .flags = SHADOW_STACK_SET_MARKER,
++};
++
++FIXTURE_VARIANT_ADD(map_gcs, s128k)
++{
++ .stack_size = 128 * 1024,
++ .flags = 0,
++};
++
++FIXTURE_SETUP(map_gcs)
++{
++ self->stack = (void *)syscall(__NR_map_shadow_stack, 0,
++ variant->stack_size,
++ variant->flags);
++ ASSERT_FALSE(self->stack == MAP_FAILED);
++ ksft_print_msg("Allocated stack from %p-%p\n", self->stack,
++ (unsigned long)self->stack + variant->stack_size);
++}
++
++FIXTURE_TEARDOWN(map_gcs)
++{
++ int ret;
++
++ if (self->stack != MAP_FAILED) {
++ ret = munmap(self->stack, variant->stack_size);
++ ASSERT_EQ(ret, 0);
++ }
++}
++
++/* The stack has a cap token */
++TEST_F(map_gcs, stack_capped)
++{
++ unsigned long *stack = self->stack;
++ size_t cap_index;
++
++ cap_index = (variant->stack_size / sizeof(unsigned long));
++
++ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) {
++ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN:
++ cap_index -= 2;
++ break;
++ case SHADOW_STACK_SET_TOKEN:
++ cap_index -= 1;
++ break;
++ case SHADOW_STACK_SET_MARKER:
++ case 0:
++ /* No cap, no test */
++ return;
++ }
++
++ ASSERT_EQ(stack[cap_index], GCS_CAP(&stack[cap_index]));
++}
++
++/* The top of the stack is 0 */
++TEST_F(map_gcs, stack_terminated)
++{
++ unsigned long *stack = self->stack;
++ size_t term_index;
++
++ if (!(variant->flags & SHADOW_STACK_SET_MARKER))
++ return;
++
++ term_index = (variant->stack_size / sizeof(unsigned long)) - 1;
++
++ ASSERT_EQ(stack[term_index], 0);
++}
++
++/* Writes should fault */
++TEST_F_SIGNAL(map_gcs, not_writeable, SIGSEGV)
++{
++ self->stack[0] = 0;
++}
++
++/* Put it all together, we can safely switch to and from the stack */
++TEST_F(map_gcs, stack_switch)
++{
++ size_t cap_index;
++ cap_index = (variant->stack_size / sizeof(unsigned long));
++ unsigned long *orig_gcspr_el0, *pivot_gcspr_el0;
++
++ /* Skip over the stack terminator and point at the cap */
++ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) {
++ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN:
++ cap_index -= 2;
++ break;
++ case SHADOW_STACK_SET_TOKEN:
++ cap_index -= 1;
++ break;
++ case SHADOW_STACK_SET_MARKER:
++ case 0:
++ /* No cap, no test */
++ return;
++ }
++ pivot_gcspr_el0 = &self->stack[cap_index];
++
++ /* Pivot to the new GCS */
++ ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n",
++ pivot_gcspr_el0, get_gcspr(),
++ *pivot_gcspr_el0);
++ gcsss1(pivot_gcspr_el0);
++ orig_gcspr_el0 = gcsss2();
++ ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n",
++ get_gcspr(), orig_gcspr_el0,
++ *pivot_gcspr_el0);
++
++ ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr());
++
++ /* New GCS must be in the new buffer */
++ ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack);
++ ASSERT_TRUE((unsigned long)get_gcspr() <=
++ (unsigned long)self->stack + variant->stack_size);
++
++ /* We should be able to use all but 2 slots of the new stack */
++ ksft_print_msg("Recursing %d levels\n", cap_index - 1);
++ gcs_recurse(cap_index - 1);
++
++ /* Pivot back to the original GCS */
++ gcsss1(orig_gcspr_el0);
++ pivot_gcspr_el0 = gcsss2();
++
++ gcs_recurse(0);
++ ksft_print_msg("Pivoted back to GCSPR_EL0 0x%lx\n", get_gcspr());
++}
++
++/* We fault if we try to go beyond the end of the stack */
++TEST_F_SIGNAL(map_gcs, stack_overflow, SIGSEGV)
++{
++ size_t cap_index;
++ cap_index = (variant->stack_size / sizeof(unsigned long));
++ unsigned long *orig_gcspr_el0, *pivot_gcspr_el0;
++
++ /* Skip over the stack terminator and point at the cap */
++ switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) {
++ case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN:
++ cap_index -= 2;
++ break;
++ case SHADOW_STACK_SET_TOKEN:
++ cap_index -= 1;
++ break;
++ case SHADOW_STACK_SET_MARKER:
++ case 0:
++ /* No cap, no test but we need to SEGV to avoid a false fail */
++ orig_gcspr_el0 = get_gcspr();
++ *orig_gcspr_el0 = 0;
++ return;
++ }
++ pivot_gcspr_el0 = &self->stack[cap_index];
++
++ /* Pivot to the new GCS */
++ ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n",
++ pivot_gcspr_el0, get_gcspr(),
++ *pivot_gcspr_el0);
++ gcsss1(pivot_gcspr_el0);
++ orig_gcspr_el0 = gcsss2();
++ ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n",
++ pivot_gcspr_el0, orig_gcspr_el0,
++ *pivot_gcspr_el0);
++
++ ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr());
++
++ /* New GCS must be in the new buffer */
++ ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack);
++ ASSERT_TRUE((unsigned long)get_gcspr() <=
++ (unsigned long)self->stack + variant->stack_size);
++
++ /* Now try to recurse, we should fault doing this. */
++ ksft_print_msg("Recursing %d levels...\n", cap_index + 1);
++ gcs_recurse(cap_index + 1);
++ ksft_print_msg("...done\n");
++
++ /* Clean up properly to try to guard against spurious passes. */
++ gcsss1(orig_gcspr_el0);
++ pivot_gcspr_el0 = gcsss2();
++ ksft_print_msg("Pivoted back to GCSPR_EL0 0x%lx\n", get_gcspr());
++}
++
++FIXTURE(map_invalid_gcs)
++{
++};
++
++FIXTURE_VARIANT(map_invalid_gcs)
++{
++ size_t stack_size;
++};
++
++FIXTURE_SETUP(map_invalid_gcs)
++{
++}
++
++FIXTURE_TEARDOWN(map_invalid_gcs)
++{
++}
++
++/* GCS must be larger than 16 bytes */
++FIXTURE_VARIANT_ADD(map_invalid_gcs, too_small)
++{
++ .stack_size = 8,
++};
++
++/* GCS size must be 16 byte aligned */
++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_1) { .stack_size = 1024 + 1 };
++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_2) { .stack_size = 1024 + 2 };
++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_3) { .stack_size = 1024 + 3 };
++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_4) { .stack_size = 1024 + 4 };
++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_5) { .stack_size = 1024 + 5 };
++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_6) { .stack_size = 1024 + 6 };
++FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_7) { .stack_size = 1024 + 7 };
++
++TEST_F(map_invalid_gcs, do_map)
++{
++ void *stack;
++
++ stack = (void *)syscall(__NR_map_shadow_stack, 0,
++ variant->stack_size, 0);
++ ASSERT_TRUE(stack == MAP_FAILED);
++ if (stack != MAP_FAILED)
++ munmap(stack, variant->stack_size);
++}
++
++FIXTURE(invalid_mprotect)
++{
++ unsigned long *stack;
++ size_t stack_size;
++};
++
++FIXTURE_VARIANT(invalid_mprotect)
++{
++ unsigned long flags;
++};
++
++FIXTURE_SETUP(invalid_mprotect)
++{
++ self->stack_size = sysconf(_SC_PAGE_SIZE);
++ self->stack = (void *)syscall(__NR_map_shadow_stack, 0,
++ self->stack_size, 0);
++ ASSERT_FALSE(self->stack == MAP_FAILED);
++ ksft_print_msg("Allocated stack from %p-%p\n", self->stack,
++ (unsigned long)self->stack + self->stack_size);
++}
++
++FIXTURE_TEARDOWN(invalid_mprotect)
++{
++ int ret;
++
++ if (self->stack != MAP_FAILED) {
++ ret = munmap(self->stack, self->stack_size);
++ ASSERT_EQ(ret, 0);
++ }
++}
++
++FIXTURE_VARIANT_ADD(invalid_mprotect, exec)
++{
++ .flags = PROT_EXEC,
++};
++
++FIXTURE_VARIANT_ADD(invalid_mprotect, bti)
++{
++ .flags = PROT_BTI,
++};
++
++FIXTURE_VARIANT_ADD(invalid_mprotect, exec_bti)
++{
++ .flags = PROT_EXEC | PROT_BTI,
++};
++
++TEST_F(invalid_mprotect, do_map)
++{
++ int ret;
++
++ ret = mprotect(self->stack, self->stack_size, variant->flags);
++ ASSERT_EQ(ret, -1);
++}
++
++TEST_F(invalid_mprotect, do_map_read)
++{
++ int ret;
++
++ ret = mprotect(self->stack, self->stack_size,
++ variant->flags | PROT_READ);
++ ASSERT_EQ(ret, -1);
++}
++
++int main(int argc, char **argv)
++{
++ unsigned long gcs_mode;
++ int ret;
++
++ if (!(getauxval(AT_HWCAP2) & HWCAP2_GCS))
++ ksft_exit_skip("SKIP GCS not supported\n");
++
++ /*
++ * Force shadow stacks on, our tests *should* be fine with or
++ * without libc support and with or without this having ended
++ * up tagged for GCS and enabled by the dynamic linker. We
++ * can't use the libc prctl() function since we can't return
++ * from enabling the stack.
++ */
++ ret = my_syscall2(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode);
++ if (ret) {
++ ksft_print_msg("Failed to read GCS state: %d\n", ret);
++ return EXIT_FAILURE;
++ }
++
++ if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) {
++ gcs_mode = PR_SHADOW_STACK_ENABLE;
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ gcs_mode);
++ if (ret) {
++ ksft_print_msg("Failed to configure GCS: %d\n", ret);
++ return EXIT_FAILURE;
++ }
++ }
++
++ /* Avoid returning in case libc doesn't understand GCS */
++ exit(test_harness_run(argc, argv));
++}
+--
+2.34.1
+
+
+From 93014b383e621ede703124d9f26b8d0d4f5a010a Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 21 Jul 2023 14:21:32 +0100
+Subject: [PATCH 44/47] kselftest/arm64: Add test coverage for GCS mode locking
+
+Verify that we can lock individual GCS mode bits, that other modes
+aren't affected and as a side effect also that every combination of
+modes can be enabled.
+
+Normally the inability to reenable GCS after disabling it would be an
+issue with testing but fortunately the kselftest_harness runs each test
+within a fork()ed child. This can be inconvenient for some kinds of
+testing but here it means that each test is in a separate thread and
+therefore won't be affected by other tests in the suite.
+
+Once we get toolchains with support for enabling GCS by default we will
+need to take care to not do that in the build system but there are no
+such toolchains yet so it is not yet an issue.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/arm64/gcs/.gitignore | 1 +
+ tools/testing/selftests/arm64/gcs/Makefile | 2 +-
+ .../testing/selftests/arm64/gcs/gcs-locking.c | 200 ++++++++++++++++++
+ 3 files changed, 202 insertions(+), 1 deletion(-)
+ create mode 100644 tools/testing/selftests/arm64/gcs/gcs-locking.c
+
+diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore
+index 5810c4a163d4..0c86f53f68ad 100644
+--- a/tools/testing/selftests/arm64/gcs/.gitignore
++++ b/tools/testing/selftests/arm64/gcs/.gitignore
+@@ -1,2 +1,3 @@
+ basic-gcs
+ libc-gcs
++gcs-locking
+diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile
+index a8fdf21e9a47..2173d6275956 100644
+--- a/tools/testing/selftests/arm64/gcs/Makefile
++++ b/tools/testing/selftests/arm64/gcs/Makefile
+@@ -6,7 +6,7 @@
+ # nolibc.
+ #
+
+-TEST_GEN_PROGS := basic-gcs libc-gcs
++TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking
+
+ LDLIBS+=-lpthread
+
+diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c
+new file mode 100644
+index 000000000000..f6a73254317e
+--- /dev/null
++++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c
+@@ -0,0 +1,200 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (C) 2023 ARM Limited.
++ *
++ * Tests for GCS mode locking. These tests rely on both having GCS
++ * unconfigured on entry and on the kselftest harness running each
++ * test in a fork()ed process which will have it's own mode.
++ */
++
++#include <limits.h>
++
++#include <sys/auxv.h>
++#include <sys/prctl.h>
++
++#include <asm/hwcap.h>
++
++#include "kselftest_harness.h"
++
++#include "gcs-util.h"
++
++#define my_syscall2(num, arg1, arg2) \
++({ \
++ register long _num __asm__ ("x8") = (num); \
++ register long _arg1 __asm__ ("x0") = (long)(arg1); \
++ register long _arg2 __asm__ ("x1") = (long)(arg2); \
++ register long _arg3 __asm__ ("x2") = 0; \
++ register long _arg4 __asm__ ("x3") = 0; \
++ register long _arg5 __asm__ ("x4") = 0; \
++ \
++ __asm__ volatile ( \
++ "svc #0\n" \
++ : "=r"(_arg1) \
++ : "r"(_arg1), "r"(_arg2), \
++ "r"(_arg3), "r"(_arg4), \
++ "r"(_arg5), "r"(_num) \
++ : "memory", "cc" \
++ ); \
++ _arg1; \
++})
++
++/* No mode bits are rejected for locking */
++TEST(lock_all_modes)
++{
++ int ret;
++
++ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, ULONG_MAX, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++}
++
++FIXTURE(valid_modes)
++{
++};
++
++FIXTURE_VARIANT(valid_modes)
++{
++ unsigned long mode;
++};
++
++FIXTURE_VARIANT_ADD(valid_modes, enable)
++{
++ .mode = PR_SHADOW_STACK_ENABLE,
++};
++
++FIXTURE_VARIANT_ADD(valid_modes, enable_write)
++{
++ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE,
++};
++
++FIXTURE_VARIANT_ADD(valid_modes, enable_push)
++{
++ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH,
++};
++
++FIXTURE_VARIANT_ADD(valid_modes, enable_write_push)
++{
++ .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE |
++ PR_SHADOW_STACK_PUSH,
++};
++
++FIXTURE_SETUP(valid_modes)
++{
++}
++
++FIXTURE_TEARDOWN(valid_modes)
++{
++}
++
++/* We can set the mode at all */
++TEST_F(valid_modes, set)
++{
++ int ret;
++
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ variant->mode);
++ ASSERT_EQ(ret, 0);
++
++ _exit(0);
++}
++
++/* Enabling, locking then disabling is rejected */
++TEST_F(valid_modes, enable_lock_disable)
++{
++ unsigned long mode;
++ int ret;
++
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ variant->mode);
++ ASSERT_EQ(ret, 0);
++
++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++ ASSERT_EQ(mode, variant->mode);
++
++ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0);
++ ASSERT_EQ(ret, -EBUSY);
++
++ _exit(0);
++}
++
++/* Locking then enabling is rejected */
++TEST_F(valid_modes, lock_enable)
++{
++ unsigned long mode;
++ int ret;
++
++ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ variant->mode);
++ ASSERT_EQ(ret, -EBUSY);
++
++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++ ASSERT_EQ(mode, 0);
++
++ _exit(0);
++}
++
++/* Locking then changing other modes is fine */
++TEST_F(valid_modes, lock_enable_disable_others)
++{
++ unsigned long mode;
++ int ret;
++
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ variant->mode);
++ ASSERT_EQ(ret, 0);
++
++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++ ASSERT_EQ(mode, variant->mode);
++
++ ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ PR_SHADOW_STACK_ALL_MODES);
++ ASSERT_EQ(ret, 0);
++
++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++ ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES);
++
++
++ ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
++ variant->mode);
++ ASSERT_EQ(ret, 0);
++
++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
++ ASSERT_EQ(ret, 0);
++ ASSERT_EQ(mode, variant->mode);
++
++ _exit(0);
++}
++
++int main(int argc, char **argv)
++{
++ unsigned long mode;
++ int ret;
++
++ if (!(getauxval(AT_HWCAP2) & HWCAP2_GCS))
++ ksft_exit_skip("SKIP GCS not supported\n");
++
++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0);
++ if (ret) {
++ ksft_print_msg("Failed to read GCS state: %d\n", ret);
++ return EXIT_FAILURE;
++ }
++
++ if (mode & PR_SHADOW_STACK_ENABLE) {
++ ksft_print_msg("GCS was enabled, test unsupported\n");
++ return KSFT_SKIP;
++ }
++
++ return test_harness_run(argc, argv);
++}
+--
+2.34.1
+
+
+From 8bb3f253e14703f8c4213fd45ff120d07847cfb9 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Fri, 16 Jun 2023 22:13:44 +0100
+Subject: [PATCH 45/47] kselftest/arm64: Add GCS signal tests
+
+Do some testing of the signal handling for GCS, checking that a GCS
+frame has the expected information in it and that the expected signals
+are delivered with invalid operations.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ .../testing/selftests/arm64/signal/.gitignore | 1 +
+ .../arm64/signal/test_signals_utils.h | 10 +++
+ .../signal/testcases/gcs_exception_fault.c | 62 +++++++++++++
+ .../arm64/signal/testcases/gcs_frame.c | 88 +++++++++++++++++++
+ .../arm64/signal/testcases/gcs_write_fault.c | 67 ++++++++++++++
+ 5 files changed, 228 insertions(+)
+ create mode 100644 tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c
+ create mode 100644 tools/testing/selftests/arm64/signal/testcases/gcs_frame.c
+ create mode 100644 tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c
+
+diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
+index 839e3a252629..26de12918890 100644
+--- a/tools/testing/selftests/arm64/signal/.gitignore
++++ b/tools/testing/selftests/arm64/signal/.gitignore
+@@ -1,6 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ mangle_*
+ fake_sigreturn_*
++gcs_*
+ sme_*
+ ssve_*
+ sve_*
+diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
+index 1e80808ee105..36fc12b3cd60 100644
+--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
++++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
+@@ -6,6 +6,7 @@
+
+ #include <assert.h>
+ #include <stdio.h>
++#include <stdint.h>
+ #include <string.h>
+
+ #include <linux/compiler.h>
+@@ -47,6 +48,15 @@ void test_result(struct tdescr *td);
+ _arg1; \
+ })
+
++static inline __attribute__((always_inline)) uint64_t get_gcspr_el0(void)
++{
++ uint64_t val;
++
++ asm volatile("mrs %0, S3_3_C2_C5_1" : "=r" (val));
++
++ return val;
++}
++
+ static inline bool feats_ok(struct tdescr *td)
+ {
+ if (td->feats_incompatible & td->feats_supported)
+diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c
+new file mode 100644
+index 000000000000..6228448b2ae7
+--- /dev/null
++++ b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c
+@@ -0,0 +1,62 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2023 ARM Limited
++ */
++
++#include <errno.h>
++#include <signal.h>
++#include <unistd.h>
++
++#include <sys/mman.h>
++#include <sys/prctl.h>
++
++#include "test_signals_utils.h"
++#include "testcases.h"
++
++/*
++ * We should get this from asm/siginfo.h but the testsuite is being
++ * clever with redefining siginfo_t.
++ */
++#ifndef SEGV_CPERR
++#define SEGV_CPERR 10
++#endif
++
++static inline void gcsss1(uint64_t Xt)
++{
++ asm volatile (
++ "sys #3, C7, C7, #2, %0\n"
++ :
++ : "rZ" (Xt)
++ : "memory");
++}
++
++static int gcs_op_fault_trigger(struct tdescr *td)
++{
++ /*
++ * The slot below our current GCS should be in a valid GCS but
++ * must not have a valid cap in it.
++ */
++ gcsss1(get_gcspr_el0() - 8);
++
++ return 0;
++}
++
++static int gcs_op_fault_signal(struct tdescr *td, siginfo_t *si,
++ ucontext_t *uc)
++{
++ ASSERT_GOOD_CONTEXT(uc);
++
++ return 1;
++}
++
++struct tdescr tde = {
++ .name = "Invalid GCS operation",
++ .descr = "An invalid GCS operation generates the expected signal",
++ .feats_required = FEAT_GCS,
++ .timeout = 3,
++ .sig_ok = SIGSEGV,
++ .sig_ok_code = SEGV_CPERR,
++ .sanity_disabled = true,
++ .trigger = gcs_op_fault_trigger,
++ .run = gcs_op_fault_signal,
++};
+diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c
+new file mode 100644
+index 000000000000..b405d82321da
+--- /dev/null
++++ b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c
+@@ -0,0 +1,88 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2023 ARM Limited
++ */
++
++#include <signal.h>
++#include <ucontext.h>
++#include <sys/prctl.h>
++
++#include "test_signals_utils.h"
++#include "testcases.h"
++
++static union {
++ ucontext_t uc;
++ char buf[1024 * 64];
++} context;
++
++static int gcs_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
++{
++ size_t offset;
++ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
++ struct gcs_context *gcs;
++ unsigned long expected, gcspr;
++ uint64_t *u64_val;
++ int ret;
++
++ ret = prctl(PR_GET_SHADOW_STACK_STATUS, &expected, 0, 0, 0);
++ if (ret != 0) {
++ fprintf(stderr, "Unable to query GCS status\n");
++ return 1;
++ }
++
++ /* We expect a cap to be added to the GCS in the signal frame */
++ gcspr = get_gcspr_el0();
++ gcspr -= 8;
++ fprintf(stderr, "Expecting GCSPR_EL0 %lx\n", gcspr);
++
++ if (!get_current_context(td, &context.uc, sizeof(context))) {
++ fprintf(stderr, "Failed getting context\n");
++ return 1;
++ }
++
++ /* Ensure that the signal restore token was consumed */
++ u64_val = (uint64_t *)get_gcspr_el0() + 1;
++ if (*u64_val) {
++ fprintf(stderr, "GCS value at %p is %lx not 0\n",
++ u64_val, *u64_val);
++ return 1;
++ }
++
++ fprintf(stderr, "Got context\n");
++
++ head = get_header(head, GCS_MAGIC, GET_BUF_RESV_SIZE(context),
++ &offset);
++ if (!head) {
++ fprintf(stderr, "No GCS context\n");
++ return 1;
++ }
++
++ gcs = (struct gcs_context *)head;
++
++ /* Basic size validation is done in get_current_context() */
++
++ if (gcs->features_enabled != expected) {
++ fprintf(stderr, "Features enabled %llx but expected %lx\n",
++ gcs->features_enabled, expected);
++ return 1;
++ }
++
++ if (gcs->gcspr != gcspr) {
++ fprintf(stderr, "Got GCSPR %llx but expected %lx\n",
++ gcs->gcspr, gcspr);
++ return 1;
++ }
++
++ fprintf(stderr, "GCS context validated\n");
++ td->pass = 1;
++
++ return 0;
++}
++
++struct tdescr tde = {
++ .name = "GCS basics",
++ .descr = "Validate a GCS signal context",
++ .feats_required = FEAT_GCS,
++ .timeout = 3,
++ .run = gcs_regs,
++};
+diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c
+new file mode 100644
+index 000000000000..faeabb18c4b2
+--- /dev/null
++++ b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c
+@@ -0,0 +1,67 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2023 ARM Limited
++ */
++
++#include <errno.h>
++#include <signal.h>
++#include <unistd.h>
++
++#include <sys/mman.h>
++#include <sys/prctl.h>
++
++#include "test_signals_utils.h"
++#include "testcases.h"
++
++static uint64_t *gcs_page;
++
++#ifndef __NR_map_shadow_stack
++#define __NR_map_shadow_stack 453
++#endif
++
++static bool alloc_gcs(struct tdescr *td)
++{
++ long page_size = sysconf(_SC_PAGE_SIZE);
++
++ gcs_page = (void *)syscall(__NR_map_shadow_stack, 0,
++ page_size, 0);
++ if (gcs_page == MAP_FAILED) {
++ fprintf(stderr, "Failed to map %ld byte GCS: %d\n",
++ page_size, errno);
++ return false;
++ }
++
++ return true;
++}
++
++static int gcs_write_fault_trigger(struct tdescr *td)
++{
++ /* Verify that the page is readable (ie, not completely unmapped) */
++ fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]);
++
++ /* A regular write should trigger a fault */
++ gcs_page[0] = EINVAL;
++
++ return 0;
++}
++
++static int gcs_write_fault_signal(struct tdescr *td, siginfo_t *si,
++ ucontext_t *uc)
++{
++ ASSERT_GOOD_CONTEXT(uc);
++
++ return 1;
++}
++
++
++struct tdescr tde = {
++ .name = "GCS write fault",
++ .descr = "Normal writes to a GCS segfault",
++ .feats_required = FEAT_GCS,
++ .timeout = 3,
++ .sig_ok = SIGSEGV,
++ .sanity_disabled = true,
++ .init = alloc_gcs,
++ .trigger = gcs_write_fault_trigger,
++ .run = gcs_write_fault_signal,
++};
+--
+2.34.1
+
+
+From f378d27d073d96254a972ad48b14f12fa684e9ac Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 26 Jul 2023 22:27:08 +0100
+Subject: [PATCH 46/47] kselftest/arm64: Add a GCS stress test
+
+Add a stress test which runs one more process than we have CPUs spinning
+through a very recursive function with frequent syscalls immediately prior
+to return and signals being injected every 100ms. The goal is to flag up
+any scheduling related issues, for example failure to ensure that barriers
+are inserted when moving a GCS using task to another CPU. The test runs for
+a configurable amount of time, defaulting to 10 seconds.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/arm64/gcs/.gitignore | 2 +
+ tools/testing/selftests/arm64/gcs/Makefile | 6 +-
+ .../testing/selftests/arm64/gcs/asm-offsets.h | 0
+ .../selftests/arm64/gcs/gcs-stress-thread.S | 311 ++++++++++
+ .../testing/selftests/arm64/gcs/gcs-stress.c | 532 ++++++++++++++++++
+ 5 files changed, 850 insertions(+), 1 deletion(-)
+ create mode 100644 tools/testing/selftests/arm64/gcs/asm-offsets.h
+ create mode 100644 tools/testing/selftests/arm64/gcs/gcs-stress-thread.S
+ create mode 100644 tools/testing/selftests/arm64/gcs/gcs-stress.c
+
+diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore
+index 0c86f53f68ad..1e8d1f6b27f2 100644
+--- a/tools/testing/selftests/arm64/gcs/.gitignore
++++ b/tools/testing/selftests/arm64/gcs/.gitignore
+@@ -1,3 +1,5 @@
+ basic-gcs
+ libc-gcs
+ gcs-locking
++gcs-stress
++gcs-stress-thread
+diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile
+index 2173d6275956..d8b06ca51e22 100644
+--- a/tools/testing/selftests/arm64/gcs/Makefile
++++ b/tools/testing/selftests/arm64/gcs/Makefile
+@@ -6,7 +6,8 @@
+ # nolibc.
+ #
+
+-TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking
++TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking gcs-stress
++TEST_GEN_PROGS_EXTENDED := gcs-stress-thread
+
+ LDLIBS+=-lpthread
+
+@@ -18,3 +19,6 @@ $(OUTPUT)/basic-gcs: basic-gcs.c
+ -I../../../../../usr/include \
+ -std=gnu99 -I../.. -g \
+ -ffreestanding -Wall $^ -o $@ -lgcc
++
++$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S
++ $(CC) -nostdlib $^ -o $@
+diff --git a/tools/testing/selftests/arm64/gcs/asm-offsets.h b/tools/testing/selftests/arm64/gcs/asm-offsets.h
+new file mode 100644
+index 000000000000..e69de29bb2d1
+diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S
+new file mode 100644
+index 000000000000..2a08d6bf1ced
+--- /dev/null
++++ b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S
+@@ -0,0 +1,311 @@
++// Program that loops for ever doing lots of recursions and system calls,
++// intended to be used as part of a stress test for GCS context switching.
++//
++// Copyright 2015-2023 Arm Ltd
++
++#include <asm/unistd.h>
++
++#define sa_sz 32
++#define sa_flags 8
++#define sa_handler 0
++#define sa_mask_sz 8
++
++#define si_code 8
++
++#define SIGINT 2
++#define SIGABRT 6
++#define SIGUSR1 10
++#define SIGSEGV 11
++#define SIGUSR2 12
++#define SIGTERM 15
++#define SEGV_CPERR 10
++
++#define SA_NODEFER 1073741824
++#define SA_SIGINFO 4
++#define ucontext_regs 184
++
++#define PR_SET_SHADOW_STACK_STATUS 72
++# define PR_SHADOW_STACK_ENABLE (1UL << 0)
++
++#define GCSPR_EL0 S3_3_C2_C5_1
++
++.macro function name
++ .macro endfunction
++ .type \name, @function
++ .purgem endfunction
++ .endm
++\name:
++.endm
++
++// Print a single character x0 to stdout
++// Clobbers x0-x2,x8
++function putc
++ str x0, [sp, #-16]!
++
++ mov x0, #1 // STDOUT_FILENO
++ mov x1, sp
++ mov x2, #1
++ mov x8, #__NR_write
++ svc #0
++
++ add sp, sp, #16
++ ret
++endfunction
++.globl putc
++
++// Print a NUL-terminated string starting at address x0 to stdout
++// Clobbers x0-x3,x8
++function puts
++ mov x1, x0
++
++ mov x2, #0
++0: ldrb w3, [x0], #1
++ cbz w3, 1f
++ add x2, x2, #1
++ b 0b
++
++1: mov w0, #1 // STDOUT_FILENO
++ mov x8, #__NR_write
++ svc #0
++
++ ret
++endfunction
++.globl puts
++
++// Utility macro to print a literal string
++// Clobbers x0-x4,x8
++.macro puts string
++ .pushsection .rodata.str1.1, "aMS", @progbits, 1
++.L__puts_literal\@: .string "\string"
++ .popsection
++
++ ldr x0, =.L__puts_literal\@
++ bl puts
++.endm
++
++// Print an unsigned decimal number x0 to stdout
++// Clobbers x0-x4,x8
++function putdec
++ mov x1, sp
++ str x30, [sp, #-32]! // Result can't be > 20 digits
++
++ mov x2, #0
++ strb w2, [x1, #-1]! // Write the NUL terminator
++
++ mov x2, #10
++0: udiv x3, x0, x2 // div-mod loop to generate the digits
++ msub x0, x3, x2, x0
++ add w0, w0, #'0'
++ strb w0, [x1, #-1]!
++ mov x0, x3
++ cbnz x3, 0b
++
++ ldrb w0, [x1]
++ cbnz w0, 1f
++ mov w0, #'0' // Print "0" for 0, not ""
++ strb w0, [x1, #-1]!
++
++1: mov x0, x1
++ bl puts
++
++ ldr x30, [sp], #32
++ ret
++endfunction
++.globl putdec
++
++// Print an unsigned decimal number x0 to stdout, followed by a newline
++// Clobbers x0-x5,x8
++function putdecn
++ mov x5, x30
++
++ bl putdec
++ mov x0, #'\n'
++ bl putc
++
++ ret x5
++endfunction
++.globl putdecn
++
++// Fill x1 bytes starting at x0 with 0.
++// Clobbers x1, x2.
++function memclr
++ mov w2, #0
++endfunction
++.globl memclr
++ // fall through to memfill
++
++// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
++// Clobbers x1
++function memfill
++ cmp x1, #0
++ b.eq 1f
++
++0: strb w2, [x0], #1
++ subs x1, x1, #1
++ b.ne 0b
++
++1: ret
++endfunction
++.globl memfill
++
++// w0: signal number
++// x1: sa_action
++// w2: sa_flags
++// Clobbers x0-x6,x8
++function setsignal
++ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
++
++ mov w4, w0
++ mov x5, x1
++ mov w6, w2
++
++ add x0, sp, #16
++ mov x1, #sa_sz
++ bl memclr
++
++ mov w0, w4
++ add x1, sp, #16
++ str w6, [x1, #sa_flags]
++ str x5, [x1, #sa_handler]
++ mov x2, #0
++ mov x3, #sa_mask_sz
++ mov x8, #__NR_rt_sigaction
++ svc #0
++
++ cbz w0, 1f
++
++ puts "sigaction failure\n"
++ b abort
++
++1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
++ ret
++endfunction
++
++
++function tickle_handler
++ // Perhaps collect GCSPR_EL0 here in future?
++ ret
++endfunction
++
++function terminate_handler
++ mov w21, w0
++ mov x20, x2
++
++ puts "Terminated by signal "
++ mov w0, w21
++ bl putdec
++ puts ", no error\n"
++
++ mov x0, #0
++ mov x8, #__NR_exit
++ svc #0
++endfunction
++
++function segv_handler
++ // stash the siginfo_t *
++ mov x20, x1
++
++ // Disable GCS, we don't want additional faults logging things
++ mov x0, PR_SET_SHADOW_STACK_STATUS
++ mov x1, xzr
++ mov x2, xzr
++ mov x3, xzr
++ mov x4, xzr
++ mov x5, xzr
++ mov x8, #__NR_prctl
++ svc #0
++
++ puts "Got SIGSEGV code "
++
++ ldr x21, [x20, #si_code]
++ mov x0, x21
++ bl putdec
++
++ // GCS faults should have si_code SEGV_CPERR
++ cmp x21, #SEGV_CPERR
++ bne 1f
++
++ puts " (GCS violation)"
++1:
++ mov x0, '\n'
++ bl putc
++ b abort
++endfunction
++
++// Recurse x20 times
++.macro recurse id
++function recurse\id
++ stp x29, x30, [sp, #-16]!
++ mov x29, sp
++
++ cmp x20, 0
++ beq 1f
++ sub x20, x20, 1
++ bl recurse\id
++
++1:
++ ldp x29, x30, [sp], #16
++
++ // Do a syscall immediately prior to returning to try to provoke
++ // scheduling and migration at a point where coherency issues
++ // might trigger.
++ mov x8, #__NR_getpid
++ svc #0
++
++ ret
++endfunction
++.endm
++
++// Generate and use two copies so we're changing the GCS contents
++recurse 1
++recurse 2
++
++.globl _start
++function _start
++ // Run with GCS
++ mov x0, PR_SET_SHADOW_STACK_STATUS
++ mov x1, PR_SHADOW_STACK_ENABLE
++ mov x2, xzr
++ mov x3, xzr
++ mov x4, xzr
++ mov x5, xzr
++ mov x8, #__NR_prctl
++ svc #0
++ cbz x0, 1f
++ puts "Failed to enable GCS\n"
++ b abort
++1:
++
++ mov w0, #SIGTERM
++ adr x1, terminate_handler
++ mov w2, #SA_SIGINFO
++ bl setsignal
++
++ mov w0, #SIGUSR1
++ adr x1, tickle_handler
++ mov w2, #SA_SIGINFO
++ orr w2, w2, #SA_NODEFER
++ bl setsignal
++
++ mov w0, #SIGSEGV
++ adr x1, segv_handler
++ mov w2, #SA_SIGINFO
++ orr w2, w2, #SA_NODEFER
++ bl setsignal
++
++ puts "Running\n"
++
++loop:
++ // Small recursion depth so we're frequently flipping between
++ // the two recursors and changing what's on the stack
++ mov x20, #5
++ bl recurse1
++ mov x20, #5
++ bl recurse2
++ b loop
++endfunction
++
++abort:
++ mov x0, #255
++ mov x8, #__NR_exit
++ svc #0
+diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c
+new file mode 100644
+index 000000000000..23fd8ec37bdc
+--- /dev/null
++++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c
+@@ -0,0 +1,532 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (C) 2022-3 ARM Limited.
++ */
++
++#define _GNU_SOURCE
++#define _POSIX_C_SOURCE 199309L
++
++#include <errno.h>
++#include <getopt.h>
++#include <poll.h>
++#include <signal.h>
++#include <stdbool.h>
++#include <stddef.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++#include <sys/auxv.h>
++#include <sys/epoll.h>
++#include <sys/prctl.h>
++#include <sys/types.h>
++#include <sys/uio.h>
++#include <sys/wait.h>
++#include <asm/hwcap.h>
++
++#include "../../kselftest.h"
++
++struct child_data {
++ char *name, *output;
++ pid_t pid;
++ int stdout;
++ bool output_seen;
++ bool exited;
++ int exit_status;
++ int exit_signal;
++};
++
++static int epoll_fd;
++static struct child_data *children;
++static struct epoll_event *evs;
++static int tests;
++static int num_children;
++static bool terminate;
++
++static int startup_pipe[2];
++
++static int num_processors(void)
++{
++ long nproc = sysconf(_SC_NPROCESSORS_CONF);
++ if (nproc < 0) {
++ perror("Unable to read number of processors\n");
++ exit(EXIT_FAILURE);
++ }
++
++ return nproc;
++}
++
++static void start_thread(struct child_data *child)
++{
++ int ret, pipefd[2], i;
++ struct epoll_event ev;
++
++ ret = pipe(pipefd);
++ if (ret != 0)
++ ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n",
++ strerror(errno), errno);
++
++ child->pid = fork();
++ if (child->pid == -1)
++ ksft_exit_fail_msg("fork() failed: %s (%d)\n",
++ strerror(errno), errno);
++
++ if (!child->pid) {
++ /*
++ * In child, replace stdout with the pipe, errors to
++ * stderr from here as kselftest prints to stdout.
++ */
++ ret = dup2(pipefd[1], 1);
++ if (ret == -1) {
++ fprintf(stderr, "dup2() %d\n", errno);
++ exit(EXIT_FAILURE);
++ }
++
++ /*
++ * Duplicate the read side of the startup pipe to
++ * FD 3 so we can close everything else.
++ */
++ ret = dup2(startup_pipe[0], 3);
++ if (ret == -1) {
++ fprintf(stderr, "dup2() %d\n", errno);
++ exit(EXIT_FAILURE);
++ }
++
++ /*
++ * Very dumb mechanism to clean open FDs other than
++ * stdio. We don't want O_CLOEXEC for the pipes...
++ */
++ for (i = 4; i < 8192; i++)
++ close(i);
++
++ /*
++ * Read from the startup pipe, there should be no data
++ * and we should block until it is closed. We just
++ * carry on on error since this isn't super critical.
++ */
++ ret = read(3, &i, sizeof(i));
++ if (ret < 0)
++ fprintf(stderr, "read(startp pipe) failed: %s (%d)\n",
++ strerror(errno), errno);
++ if (ret > 0)
++ fprintf(stderr, "%d bytes of data on startup pipe\n",
++ ret);
++ close(3);
++
++ ret = execl("gcs-stress-thread", "gcs-stress-thread", NULL);
++ fprintf(stderr, "execl(gcs-stress-thread) failed: %d (%s)\n",
++ errno, strerror(errno));
++
++ exit(EXIT_FAILURE);
++ } else {
++ /*
++ * In parent, remember the child and close our copy of the
++ * write side of stdout.
++ */
++ close(pipefd[1]);
++ child->stdout = pipefd[0];
++ child->output = NULL;
++ child->exited = false;
++ child->output_seen = false;
++
++ ev.events = EPOLLIN | EPOLLHUP;
++ ev.data.ptr = child;
++
++ ret = asprintf(&child->name, "Thread-%d", child->pid);
++ if (ret == -1)
++ ksft_exit_fail_msg("asprintf() failed\n");
++
++ ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev);
++ if (ret < 0) {
++ ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n",
++ child->name, strerror(errno), errno);
++ }
++ }
++
++ ksft_print_msg("Started %s\n", child->name);
++ num_children++;
++}
++
++static bool child_output_read(struct child_data *child)
++{
++ char read_data[1024];
++ char work[1024];
++ int ret, len, cur_work, cur_read;
++
++ ret = read(child->stdout, read_data, sizeof(read_data));
++ if (ret < 0) {
++ if (errno == EINTR)
++ return true;
++
++ ksft_print_msg("%s: read() failed: %s (%d)\n",
++ child->name, strerror(errno),
++ errno);
++ return false;
++ }
++ len = ret;
++
++ child->output_seen = true;
++
++ /* Pick up any partial read */
++ if (child->output) {
++ strncpy(work, child->output, sizeof(work) - 1);
++ cur_work = strnlen(work, sizeof(work));
++ free(child->output);
++ child->output = NULL;
++ } else {
++ cur_work = 0;
++ }
++
++ cur_read = 0;
++ while (cur_read < len) {
++ work[cur_work] = read_data[cur_read++];
++
++ if (work[cur_work] == '\n') {
++ work[cur_work] = '\0';
++ ksft_print_msg("%s: %s\n", child->name, work);
++ cur_work = 0;
++ } else {
++ cur_work++;
++ }
++ }
++
++ if (cur_work) {
++ work[cur_work] = '\0';
++ ret = asprintf(&child->output, "%s", work);
++ if (ret == -1)
++ ksft_exit_fail_msg("Out of memory\n");
++ }
++
++ return false;
++}
++
++static void child_output(struct child_data *child, uint32_t events,
++ bool flush)
++{
++ bool read_more;
++
++ if (events & EPOLLIN) {
++ do {
++ read_more = child_output_read(child);
++ } while (read_more);
++ }
++
++ if (events & EPOLLHUP) {
++ close(child->stdout);
++ child->stdout = -1;
++ flush = true;
++ }
++
++ if (flush && child->output) {
++ ksft_print_msg("%s: %s<EOF>\n", child->name, child->output);
++ free(child->output);
++ child->output = NULL;
++ }
++}
++
++static void child_tickle(struct child_data *child)
++{
++ if (child->output_seen && !child->exited)
++ kill(child->pid, SIGUSR1);
++}
++
++static void child_stop(struct child_data *child)
++{
++ if (!child->exited)
++ kill(child->pid, SIGTERM);
++}
++
++static void child_cleanup(struct child_data *child)
++{
++ pid_t ret;
++ int status;
++ bool fail = false;
++
++ if (!child->exited) {
++ do {
++ ret = waitpid(child->pid, &status, 0);
++ if (ret == -1 && errno == EINTR)
++ continue;
++
++ if (ret == -1) {
++ ksft_print_msg("waitpid(%d) failed: %s (%d)\n",
++ child->pid, strerror(errno),
++ errno);
++ fail = true;
++ break;
++ }
++
++ if (WIFEXITED(status)) {
++ child->exit_status = WEXITSTATUS(status);
++ child->exited = true;
++ }
++
++ if (WIFSIGNALED(status)) {
++ child->exit_signal = WTERMSIG(status);
++ ksft_print_msg("%s: Exited due to signal %d\n",
++ child->name);
++ fail = true;
++ child->exited = true;
++ }
++ } while (!child->exited);
++ }
++
++ if (!child->output_seen) {
++ ksft_print_msg("%s no output seen\n", child->name);
++ fail = true;
++ }
++
++ if (child->exit_status != 0) {
++ ksft_print_msg("%s exited with error code %d\n",
++ child->name, child->exit_status);
++ fail = true;
++ }
++
++ ksft_test_result(!fail, "%s\n", child->name);
++}
++
++static void handle_child_signal(int sig, siginfo_t *info, void *context)
++{
++ int i;
++ bool found = false;
++
++ for (i = 0; i < num_children; i++) {
++ if (children[i].pid == info->si_pid) {
++ children[i].exited = true;
++ children[i].exit_status = info->si_status;
++ found = true;
++ break;
++ }
++ }
++
++ if (!found)
++ ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n",
++ info->si_pid, info->si_status);
++}
++
++static void handle_exit_signal(int sig, siginfo_t *info, void *context)
++{
++ int i;
++
++ /* If we're already exiting then don't signal again */
++ if (terminate)
++ return;
++
++ ksft_print_msg("Got signal, exiting...\n");
++
++ terminate = true;
++
++ /*
++ * This should be redundant, the main loop should clean up
++ * after us, but for safety stop everything we can here.
++ */
++ for (i = 0; i < num_children; i++)
++ child_stop(&children[i]);
++}
++
++/* Handle any pending output without blocking */
++static void drain_output(bool flush)
++{
++ int ret = 1;
++ int i;
++
++ while (ret > 0) {
++ ret = epoll_wait(epoll_fd, evs, tests, 0);
++ if (ret < 0) {
++ if (errno == EINTR)
++ continue;
++ ksft_print_msg("epoll_wait() failed: %s (%d)\n",
++ strerror(errno), errno);
++ }
++
++ for (i = 0; i < ret; i++)
++ child_output(evs[i].data.ptr, evs[i].events, flush);
++ }
++}
++
++static const struct option options[] = {
++ { "timeout", required_argument, NULL, 't' },
++ { }
++};
++
++int main(int argc, char **argv)
++{
++ int seen_children;
++ bool all_children_started = false;
++ int gcs_threads;
++ int timeout = 10;
++ int ret, cpus, i, c;
++ struct sigaction sa;
++
++ while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) {
++ switch (c) {
++ case 't':
++ ret = sscanf(optarg, "%d", &timeout);
++ if (ret != 1)
++ ksft_exit_fail_msg("Failed to parse timeout %s\n",
++ optarg);
++ break;
++ default:
++ ksft_exit_fail_msg("Unknown argument\n");
++ }
++ }
++
++ cpus = num_processors();
++ tests = 0;
++
++ if (getauxval(AT_HWCAP2) & HWCAP2_GCS) {
++ /* One extra thread, trying to trigger migrations */
++ gcs_threads = cpus + 1;
++ tests += gcs_threads;
++ } else {
++ gcs_threads = 0;
++ }
++
++ ksft_print_header();
++ ksft_set_plan(tests);
++
++ ksft_print_msg("%d CPUs, %d GCS threads\n",
++ cpus, gcs_threads);
++
++ if (!tests)
++ ksft_exit_skip("No tests scheduled\n");
++
++ if (timeout > 0)
++ ksft_print_msg("Will run for %ds\n", timeout);
++ else
++ ksft_print_msg("Will run until terminated\n");
++
++ children = calloc(sizeof(*children), tests);
++ if (!children)
++ ksft_exit_fail_msg("Unable to allocate child data\n");
++
++ ret = epoll_create1(EPOLL_CLOEXEC);
++ if (ret < 0)
++ ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n",
++ strerror(errno), ret);
++ epoll_fd = ret;
++
++ /* Create a pipe which children will block on before execing */
++ ret = pipe(startup_pipe);
++ if (ret != 0)
++ ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n",
++ strerror(errno), errno);
++
++ /* Get signal handers ready before we start any children */
++ memset(&sa, 0, sizeof(sa));
++ sa.sa_sigaction = handle_exit_signal;
++ sa.sa_flags = SA_RESTART | SA_SIGINFO;
++ sigemptyset(&sa.sa_mask);
++ ret = sigaction(SIGINT, &sa, NULL);
++ if (ret < 0)
++ ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n",
++ strerror(errno), errno);
++ ret = sigaction(SIGTERM, &sa, NULL);
++ if (ret < 0)
++ ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n",
++ strerror(errno), errno);
++ sa.sa_sigaction = handle_child_signal;
++ ret = sigaction(SIGCHLD, &sa, NULL);
++ if (ret < 0)
++ ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n",
++ strerror(errno), errno);
++
++ evs = calloc(tests, sizeof(*evs));
++ if (!evs)
++ ksft_exit_fail_msg("Failed to allocated %d epoll events\n",
++ tests);
++
++ for (i = 0; i < gcs_threads; i++)
++ start_thread(&children[i]);
++
++ /*
++ * All children started, close the startup pipe and let them
++ * run.
++ */
++ close(startup_pipe[0]);
++ close(startup_pipe[1]);
++
++ timeout *= 10;
++ for (;;) {
++ /* Did we get a signal asking us to exit? */
++ if (terminate)
++ break;
++
++ /*
++ * Timeout is counted in 100ms with no output, the
++ * tests print during startup then are silent when
++ * running so this should ensure they all ran enough
++ * to install the signal handler, this is especially
++ * useful in emulation where we will both be slow and
++ * likely to have a large set of VLs.
++ */
++ ret = epoll_wait(epoll_fd, evs, tests, 100);
++ if (ret < 0) {
++ if (errno == EINTR)
++ continue;
++ ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n",
++ strerror(errno), errno);
++ }
++
++ /* Output? */
++ if (ret > 0) {
++ for (i = 0; i < ret; i++) {
++ child_output(evs[i].data.ptr, evs[i].events,
++ false);
++ }
++ continue;
++ }
++
++ /* Otherwise epoll_wait() timed out */
++
++ /*
++ * If the child processes have not produced output they
++ * aren't actually running the tests yet.
++ */
++ if (!all_children_started) {
++ seen_children = 0;
++
++ for (i = 0; i < num_children; i++)
++ if (children[i].output_seen ||
++ children[i].exited)
++ seen_children++;
++
++ if (seen_children != num_children) {
++ ksft_print_msg("Waiting for %d children\n",
++ num_children - seen_children);
++ continue;
++ }
++
++ all_children_started = true;
++ }
++
++ ksft_print_msg("Sending signals, timeout remaining: %d00ms\n",
++ timeout);
++
++ for (i = 0; i < num_children; i++)
++ child_tickle(&children[i]);
++
++ /* Negative timeout means run indefinitely */
++ if (timeout < 0)
++ continue;
++ if (--timeout == 0)
++ break;
++ }
++
++ ksft_print_msg("Finishing up...\n");
++ terminate = true;
++
++ for (i = 0; i < tests; i++)
++ child_stop(&children[i]);
++
++ drain_output(false);
++
++ for (i = 0; i < tests; i++)
++ child_cleanup(&children[i]);
++
++ drain_output(true);
++
++ ksft_print_cnts();
++
++ return 0;
++}
+--
+2.34.1
+
+
+From dd225d77971ba0aec130153db16df53014141756 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 21 Jun 2023 17:53:57 +0100
+Subject: [PATCH 47/47] kselftest/arm64: Enable GCS for the FP stress tests
+
+While it's a bit off topic for them the floating point stress tests do give
+us some coverage of context thrashing cases, and also of active signal
+delivery separate to the relatively complicated framework in the actual
+signals tests. Have the tests enable GCS on startup, ignoring failures so
+they continue to work as before on systems without GCS.
+
+Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+---
+ tools/testing/selftests/arm64/fp/assembler.h | 15 +++++++++++++++
+ tools/testing/selftests/arm64/fp/fpsimd-test.S | 2 ++
+ tools/testing/selftests/arm64/fp/sve-test.S | 2 ++
+ tools/testing/selftests/arm64/fp/za-test.S | 2 ++
+ tools/testing/selftests/arm64/fp/zt-test.S | 2 ++
+ 5 files changed, 23 insertions(+)
+
+diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
+index 9b38a0da407d..7012f9f796de 100644
+--- a/tools/testing/selftests/arm64/fp/assembler.h
++++ b/tools/testing/selftests/arm64/fp/assembler.h
+@@ -65,4 +65,19 @@ endfunction
+ bl puts
+ .endm
+
++#define PR_SET_SHADOW_STACK_STATUS 72
++# define PR_SHADOW_STACK_ENABLE (1UL << 0)
++
++.macro enable_gcs
++ // Run with GCS
++ mov x0, PR_SET_SHADOW_STACK_STATUS
++ mov x1, PR_SHADOW_STACK_ENABLE
++ mov x2, xzr
++ mov x3, xzr
++ mov x4, xzr
++ mov x5, xzr
++ mov x8, #__NR_prctl
++ svc #0
++.endm
++
+ #endif /* ! ASSEMBLER_H */
+diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
+index 8b960d01ed2e..b16fb7f42e3e 100644
+--- a/tools/testing/selftests/arm64/fp/fpsimd-test.S
++++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
+@@ -215,6 +215,8 @@ endfunction
+ // Main program entry point
+ .globl _start
+ function _start
++ enable_gcs
++
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
+index fff60e2a25ad..2fb4f0b84476 100644
+--- a/tools/testing/selftests/arm64/fp/sve-test.S
++++ b/tools/testing/selftests/arm64/fp/sve-test.S
+@@ -378,6 +378,8 @@ endfunction
+ // Main program entry point
+ .globl _start
+ function _start
++ enable_gcs
++
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S
+index 095b45531640..b2603aba99de 100644
+--- a/tools/testing/selftests/arm64/fp/za-test.S
++++ b/tools/testing/selftests/arm64/fp/za-test.S
+@@ -231,6 +231,8 @@ endfunction
+ // Main program entry point
+ .globl _start
+ function _start
++ enable_gcs
++
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
+index b5c81e81a379..8d9609a49008 100644
+--- a/tools/testing/selftests/arm64/fp/zt-test.S
++++ b/tools/testing/selftests/arm64/fp/zt-test.S
+@@ -200,6 +200,8 @@ endfunction
+ // Main program entry point
+ .globl _start
+ function _start
++ enable_gcs
++
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+--
+2.34.1
+
diff --git a/meta-arm-gcs/recipes-kernel/linux/linux-yocto-dev.bbappend b/meta-arm-gcs/recipes-kernel/linux/linux-yocto-dev.bbappend
new file mode 100644
index 00000000..0d6d78b1
--- /dev/null
+++ b/meta-arm-gcs/recipes-kernel/linux/linux-yocto-dev.bbappend
@@ -0,0 +1,13 @@
+FILESEXTRAPATHS:prepend := "${THISDIR}/files:"
+
+KBRANCH = "v6.8/base"
+SRCREV_machine = "e8f897f4afef0031fe618a8e94127a0934896aba"
+SRCREV_meta = "69506f439abc9bde9dae104e53c597ed472b5940"
+LINUX_VERSION = "6.8.0"
+
+SRC_URI += "file://gcs.patch"
+
+# TMPDIR references in:
+# /usr/src/debug/linux-yocto-dev/6.8.0+git/drivers/tty/vt/consolemap_deftbl.c
+# /usr/src/debug/linux-yocto-dev/6.8.0+git/lib/oid_registry_data.c
+INSANE_SKIP:${PN}-src += "buildpaths"