summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--meta/recipes-devtools/gcc/gcc-9.5.inc1
-rw-r--r--meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch1506
2 files changed, 1507 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc-9.5.inc b/meta/recipes-devtools/gcc/gcc-9.5.inc
index 23bfb1a9db8..9bb41bbe244 100644
--- a/meta/recipes-devtools/gcc/gcc-9.5.inc
+++ b/meta/recipes-devtools/gcc/gcc-9.5.inc
@@ -70,6 +70,7 @@ SRC_URI = "\
file://0038-gentypes-genmodes-Do-not-use-__LINE__-for-maintainin.patch \
file://0039-process_alt_operands-Don-t-match-user-defined-regs-o.patch \
file://0002-libstdc-Fix-inconsistent-noexcept-specific-for-valar.patch \
+ file://CVE-2023-4039.patch \
"
S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/gcc-${PV}"
SRC_URI[sha256sum] = "27769f64ef1d4cd5e2be8682c0c93f9887983e6cfd1a927ce5a0a2915a95cf8f"
diff --git a/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch b/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch
new file mode 100644
index 00000000000..56d229066f9
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch
@@ -0,0 +1,1506 @@
+From: Richard Sandiford <richard.sandiford@arm.com>
+Subject: [PATCH 00/19] aarch64: Fix -fstack-protector issue
+Date: Tue, 12 Sep 2023 16:25:10 +0100
+
+This series of patches fixes deficiencies in GCC's -fstack-protector
+implementation for AArch64 when using dynamically allocated stack space.
+This is CVE-2023-4039. See:
+
+https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64
+https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf
+
+for more details.
+
+The fix is to put the saved registers above the locals area when
+-fstack-protector is used.
+
+The series also fixes a stack-clash problem that I found while working
+on the CVE. In unpatched sources, the stack-clash problem would only
+trigger for unrealistic numbers of arguments (8K 64-bit arguments, or an
+equivalent). But it would be a more significant issue with the new
+-fstack-protector frame layout. It's therefore important that both
+problems are fixed together.
+
+Some reorganisation of the code seemed necessary to fix the problems in a
+cleanish way. The series is therefore quite long, but only a handful of
+patches should have any effect on code generation.
+
+See the individual patches for a detailed description.
+
+Tested on aarch64-linux-gnu. Pushed to trunk and to all active branches.
+I've also pushed backports to GCC 7+ to vendors/ARM/heads/CVE-2023-4039.
+
+CVE: CVE-2023-4039
+Upstream-Status: Submitted
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+
+
+From 78ebdb7b12d5e258b9811bab715734454268fd0c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Fri, 16 Jun 2023 17:00:51 +0100
+Subject: [PATCH 01/10] aarch64: Explicitly handle frames with no saved
+ registers
+
+If a frame has no saved registers, it can be allocated in one go.
+There is no need to treat the areas below and above the saved
+registers as separate.
+
+And if we allocate the frame in one go, it should be allocated
+as the initial_adjust rather than the final_adjust. This allows the
+frame size to grow to guard_size - guard_used_by_caller before a stack
+probe is needed. (A frame with no register saves is necessarily a
+leaf frame.)
+
+This is a no-op as thing stand, since a leaf function will have
+no outgoing arguments, and so all the frame will be above where
+the saved registers normally go.
+
+gcc/
+ * config/aarch64/aarch64.c (aarch64_layout_frame): Explicitly
+ allocate the frame in one go if there are no saved registers.
+---
+ gcc/config/aarch64/aarch64.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index a35dceab9fc..e9dad682738 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4771,9 +4771,11 @@ aarch64_layout_frame (void)
+ max_push_offset = 256;
+
+ HOST_WIDE_INT const_size, const_fp_offset;
+- if (cfun->machine->frame.frame_size.is_constant (&const_size)
+- && const_size < max_push_offset
+- && known_eq (crtl->outgoing_args_size, 0))
++ if (cfun->machine->frame.saved_regs_size == 0)
++ cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
++ else if (cfun->machine->frame.frame_size.is_constant (&const_size)
++ && const_size < max_push_offset
++ && known_eq (crtl->outgoing_args_size, 0))
+ {
+ /* Simple, small frame with no outgoing arguments:
+ stp reg1, reg2, [sp, -frame_size]!
+--
+2.34.1
+
+
+From 347487fffa0266d43bf18f1f91878410881f596e Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Fri, 16 Jun 2023 16:55:12 +0100
+Subject: [PATCH 02/10] aarch64: Add bytes_below_hard_fp to frame info
+
+The frame layout code currently hard-codes the assumption that
+the number of bytes below the saved registers is equal to the
+size of the outgoing arguments. This patch abstracts that
+value into a new field of aarch64_frame.
+
+gcc/
+ * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
+ field.
+ * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize it,
+ and use it instead of crtl->outgoing_args_size.
+ (aarch64_get_separate_components): Use bytes_below_hard_fp instead
+ of outgoing_args_size.
+ (aarch64_process_components): Likewise.
+---
+ gcc/config/aarch64/aarch64.c | 50 +++++++++++++++++++-----------------
+ gcc/config/aarch64/aarch64.h | 6 ++++-
+ 2 files changed, 32 insertions(+), 24 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index e9dad682738..25cf10cc4b9 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4684,6 +4684,8 @@ aarch64_layout_frame (void)
+ last_fp_reg = regno;
+ }
+
++ cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
++
+ if (cfun->machine->frame.emit_frame_chain)
+ {
+ /* FP and LR are placed in the linkage record. */
+@@ -4751,11 +4753,11 @@ aarch64_layout_frame (void)
+ STACK_BOUNDARY / BITS_PER_UNIT);
+
+ /* Both these values are already aligned. */
+- gcc_assert (multiple_p (crtl->outgoing_args_size,
++ gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp,
+ STACK_BOUNDARY / BITS_PER_UNIT));
+ cfun->machine->frame.frame_size
+ = (cfun->machine->frame.hard_fp_offset
+- + crtl->outgoing_args_size);
++ + cfun->machine->frame.bytes_below_hard_fp);
+
+ cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
+
+@@ -4775,23 +4777,23 @@ aarch64_layout_frame (void)
+ cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
+ else if (cfun->machine->frame.frame_size.is_constant (&const_size)
+ && const_size < max_push_offset
+- && known_eq (crtl->outgoing_args_size, 0))
++ && known_eq (cfun->machine->frame.bytes_below_hard_fp, 0))
+ {
+- /* Simple, small frame with no outgoing arguments:
++ /* Simple, small frame with no data below the saved registers.
+ stp reg1, reg2, [sp, -frame_size]!
+ stp reg3, reg4, [sp, 16] */
+ cfun->machine->frame.callee_adjust = const_size;
+ }
+- else if (known_lt (crtl->outgoing_args_size
++ else if (known_lt (cfun->machine->frame.bytes_below_hard_fp
+ + cfun->machine->frame.saved_regs_size, 512)
+ && !(cfun->calls_alloca
+ && known_lt (cfun->machine->frame.hard_fp_offset,
+ max_push_offset)))
+ {
+- /* Frame with small outgoing arguments:
++ /* Frame with small area below the saved registers:
+ sub sp, sp, frame_size
+- stp reg1, reg2, [sp, outgoing_args_size]
+- stp reg3, reg4, [sp, outgoing_args_size + 16] */
++ stp reg1, reg2, [sp, bytes_below_hard_fp]
++ stp reg3, reg4, [sp, bytes_below_hard_fp + 16] */
+ cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
+ cfun->machine->frame.callee_offset
+ = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
+@@ -4799,22 +4801,23 @@ aarch64_layout_frame (void)
+ else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
+ && const_fp_offset < max_push_offset)
+ {
+- /* Frame with large outgoing arguments but a small local area:
++ /* Frame with large area below the saved registers, but with a
++ small area above:
+ stp reg1, reg2, [sp, -hard_fp_offset]!
+ stp reg3, reg4, [sp, 16]
+- sub sp, sp, outgoing_args_size */
++ sub sp, sp, bytes_below_hard_fp */
+ cfun->machine->frame.callee_adjust = const_fp_offset;
+ cfun->machine->frame.final_adjust
+ = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
+ }
+ else
+ {
+- /* Frame with large local area and outgoing arguments using frame pointer:
++ /* General case:
+ sub sp, sp, hard_fp_offset
+ stp x29, x30, [sp, 0]
+ add x29, sp, 0
+ stp reg3, reg4, [sp, 16]
+- sub sp, sp, outgoing_args_size */
++ sub sp, sp, bytes_below_hard_fp */
+ cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
+ cfun->machine->frame.final_adjust
+ = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
+@@ -5243,9 +5246,11 @@ aarch64_get_separate_components (void)
+ if (aarch64_register_saved_on_entry (regno))
+ {
+ poly_int64 offset = cfun->machine->frame.reg_offset[regno];
++
++ /* Get the offset relative to the register we'll use. */
+ if (!frame_pointer_needed)
+- offset += cfun->machine->frame.frame_size
+- - cfun->machine->frame.hard_fp_offset;
++ offset += cfun->machine->frame.bytes_below_hard_fp;
++
+ /* Check that we can access the stack slot of the register with one
+ direct load with no adjustments needed. */
+ if (offset_12bit_unsigned_scaled_p (DImode, offset))
+@@ -5367,8 +5372,8 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ rtx reg = gen_rtx_REG (mode, regno);
+ poly_int64 offset = cfun->machine->frame.reg_offset[regno];
+ if (!frame_pointer_needed)
+- offset += cfun->machine->frame.frame_size
+- - cfun->machine->frame.hard_fp_offset;
++ offset += cfun->machine->frame.bytes_below_hard_fp;
++
+ rtx addr = plus_constant (Pmode, ptr_reg, offset);
+ rtx mem = gen_frame_mem (mode, addr);
+
+@@ -5410,8 +5415,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
+ /* REGNO2 can be saved/restored in a pair with REGNO. */
+ rtx reg2 = gen_rtx_REG (mode, regno2);
+ if (!frame_pointer_needed)
+- offset2 += cfun->machine->frame.frame_size
+- - cfun->machine->frame.hard_fp_offset;
++ offset2 += cfun->machine->frame.bytes_below_hard_fp;
+ rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+ rtx mem2 = gen_frame_mem (mode, addr2);
+ rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
+@@ -5478,10 +5482,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
+ registers. If POLY_SIZE is not large enough to require a probe this function
+ will only adjust the stack. When allocating the stack space
+ FRAME_RELATED_P is then used to indicate if the allocation is frame related.
+- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
+- arguments. If we are then we ensure that any allocation larger than the ABI
+- defined buffer needs a probe so that the invariant of having a 1KB buffer is
+- maintained.
++ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
++ the saved registers. If we are then we ensure that any allocation
++ larger than the ABI defined buffer needs a probe so that the
++ invariant of having a 1KB buffer is maintained.
+
+ We emit barriers after each stack adjustment to prevent optimizations from
+ breaking the invariant that we never drop the stack more than a page. This
+@@ -5671,7 +5675,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
+ be probed. This maintains the requirement that each page is probed at
+ least once. For initial probing we probe only if the allocation is
+- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
++ more than GUARD_SIZE - buffer, and below the saved registers we probe
+ if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
+ GUARD_SIZE. This works that for any allocation that is large enough to
+ trigger a probe here, we'll have at least one, and if they're not large
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index af0bc3f1881..95831637ba7 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -712,9 +712,13 @@ struct GTY (()) aarch64_frame
+ HOST_WIDE_INT saved_varargs_size;
+
+ /* The size of the saved callee-save int/FP registers. */
+-
+ HOST_WIDE_INT saved_regs_size;
+
++ /* The number of bytes between the bottom of the static frame (the bottom
++ of the outgoing arguments) and the hard frame pointer. This value is
++ always a multiple of STACK_BOUNDARY. */
++ poly_int64 bytes_below_hard_fp;
++
+ /* Offset from the base of the frame (incomming SP) to the
+ top of the locals area. This value is always a multiple of
+ STACK_BOUNDARY. */
+--
+2.34.1
+
+
+From 4604c4cd0a6c4c26d6594ec9a0383b4d9197d9df Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 27 Jun 2023 11:25:40 +0100
+Subject: [PATCH 03/10] aarch64: Rename locals_offset to bytes_above_locals
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+locals_offset was described as:
+
+ /* Offset from the base of the frame (incomming SP) to the
+ top of the locals area. This value is always a multiple of
+ STACK_BOUNDARY. */
+
+This is implicitly an “upside down” view of the frame: the incoming
+SP is at offset 0, and anything N bytes below the incoming SP is at
+offset N (rather than -N).
+
+However, reg_offset instead uses a “right way up” view; that is,
+it views offsets in address terms. Something above X is at a
+positive offset from X and something below X is at a negative
+offset from X.
+
+Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
+target-independent code views offsets in address terms too:
+locals are allocated at negative offsets to virtual_stack_vars.
+
+It seems confusing to have *_offset fields of the same structure
+using different polarities like this. This patch tries to avoid
+that by renaming locals_offset to bytes_above_locals.
+
+gcc/
+ * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
+ (aarch64_frame::bytes_above_locals): ...this.
+ * config/aarch64/aarch64.c (aarch64_layout_frame)
+ (aarch64_initial_elimination_offset): Update accordingly.
+---
+ gcc/config/aarch64/aarch64.c | 9 +++++----
+ gcc/config/aarch64/aarch64.h | 6 +++---
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 25cf10cc4b9..dcaf491af42 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4759,7 +4759,8 @@ aarch64_layout_frame (void)
+ = (cfun->machine->frame.hard_fp_offset
+ + cfun->machine->frame.bytes_below_hard_fp);
+
+- cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
++ cfun->machine->frame.bytes_above_locals
++ = cfun->machine->frame.saved_varargs_size;
+
+ cfun->machine->frame.initial_adjust = 0;
+ cfun->machine->frame.final_adjust = 0;
+@@ -8566,14 +8567,14 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
+
+ if (from == FRAME_POINTER_REGNUM)
+ return cfun->machine->frame.hard_fp_offset
+- - cfun->machine->frame.locals_offset;
++ - cfun->machine->frame.bytes_above_locals;
+ }
+
+ if (to == STACK_POINTER_REGNUM)
+ {
+ if (from == FRAME_POINTER_REGNUM)
+- return cfun->machine->frame.frame_size
+- - cfun->machine->frame.locals_offset;
++ return cfun->machine->frame.frame_size
++ - cfun->machine->frame.bytes_above_locals;
+ }
+
+ return cfun->machine->frame.frame_size;
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 95831637ba7..a079a88b4f4 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -719,10 +719,10 @@ struct GTY (()) aarch64_frame
+ always a multiple of STACK_BOUNDARY. */
+ poly_int64 bytes_below_hard_fp;
+
+- /* Offset from the base of the frame (incomming SP) to the
+- top of the locals area. This value is always a multiple of
++ /* The number of bytes between the top of the locals area and the top
++ of the frame (the incomming SP). This value is always a multiple of
+ STACK_BOUNDARY. */
+- poly_int64 locals_offset;
++ poly_int64 bytes_above_locals;
+
+ /* Offset from the base of the frame (incomming SP) to the
+ hard_frame_pointer. This value is always a multiple of
+--
+2.34.1
+
+
+From 16016465ff28a75f5e0540cbaeb4eb102fdc3230 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 27 Jun 2023 11:28:11 +0100
+Subject: [PATCH 04/10] aarch64: Rename hard_fp_offset to bytes_above_hard_fp
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Similarly to the previous locals_offset patch, hard_fp_offset
+was described as:
+
+ /* Offset from the base of the frame (incomming SP) to the
+ hard_frame_pointer. This value is always a multiple of
+ STACK_BOUNDARY. */
+ poly_int64 hard_fp_offset;
+
+which again took an “upside-down” view: higher offsets meant lower
+addresses. This patch renames the field to bytes_above_hard_fp instead.
+
+gcc/
+ * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
+ to...
+ (aarch64_frame::bytes_above_hard_fp): ...this.
+ * config/aarch64/aarch64.c (aarch64_layout_frame)
+ (aarch64_expand_prologue): Update accordingly.
+ (aarch64_initial_elimination_offset): Likewise.
+---
+ gcc/config/aarch64/aarch64.c | 21 +++++++++++----------
+ gcc/config/aarch64/aarch64.h | 6 +++---
+ 2 files changed, 14 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index dcaf491af42..2681e0c2bb9 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4747,7 +4747,7 @@ aarch64_layout_frame (void)
+ HOST_WIDE_INT varargs_and_saved_regs_size
+ = offset + cfun->machine->frame.saved_varargs_size;
+
+- cfun->machine->frame.hard_fp_offset
++ cfun->machine->frame.bytes_above_hard_fp
+ = aligned_upper_bound (varargs_and_saved_regs_size
+ + get_frame_size (),
+ STACK_BOUNDARY / BITS_PER_UNIT);
+@@ -4756,7 +4756,7 @@ aarch64_layout_frame (void)
+ gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp,
+ STACK_BOUNDARY / BITS_PER_UNIT));
+ cfun->machine->frame.frame_size
+- = (cfun->machine->frame.hard_fp_offset
++ = (cfun->machine->frame.bytes_above_hard_fp
+ + cfun->machine->frame.bytes_below_hard_fp);
+
+ cfun->machine->frame.bytes_above_locals
+@@ -4788,7 +4788,7 @@ aarch64_layout_frame (void)
+ else if (known_lt (cfun->machine->frame.bytes_below_hard_fp
+ + cfun->machine->frame.saved_regs_size, 512)
+ && !(cfun->calls_alloca
+- && known_lt (cfun->machine->frame.hard_fp_offset,
++ && known_lt (cfun->machine->frame.bytes_above_hard_fp,
+ max_push_offset)))
+ {
+ /* Frame with small area below the saved registers:
+@@ -4797,14 +4797,14 @@ aarch64_layout_frame (void)
+ stp reg3, reg4, [sp, bytes_below_hard_fp + 16] */
+ cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
+ cfun->machine->frame.callee_offset
+- = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
++ = cfun->machine->frame.frame_size - cfun->machine->frame.bytes_above_hard_fp;
+ }
+- else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
++ else if (cfun->machine->frame.bytes_above_hard_fp.is_constant (&const_fp_offset)
+ && const_fp_offset < max_push_offset)
+ {
+ /* Frame with large area below the saved registers, but with a
+ small area above:
+- stp reg1, reg2, [sp, -hard_fp_offset]!
++ stp reg1, reg2, [sp, -bytes_above_hard_fp]!
+ stp reg3, reg4, [sp, 16]
+ sub sp, sp, bytes_below_hard_fp */
+ cfun->machine->frame.callee_adjust = const_fp_offset;
+@@ -4814,12 +4814,13 @@ aarch64_layout_frame (void)
+ else
+ {
+ /* General case:
+- sub sp, sp, hard_fp_offset
++ sub sp, sp, bytes_above_hard_fp
+ stp x29, x30, [sp, 0]
+ add x29, sp, 0
+ stp reg3, reg4, [sp, 16]
+ sub sp, sp, bytes_below_hard_fp */
+- cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
++ cfun->machine->frame.initial_adjust
++ = cfun->machine->frame.bytes_above_hard_fp;
+ cfun->machine->frame.final_adjust
+ = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
+ }
+@@ -8563,10 +8564,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
+ if (to == HARD_FRAME_POINTER_REGNUM)
+ {
+ if (from == ARG_POINTER_REGNUM)
+- return cfun->machine->frame.hard_fp_offset;
++ return cfun->machine->frame.bytes_above_hard_fp;
+
+ if (from == FRAME_POINTER_REGNUM)
+- return cfun->machine->frame.hard_fp_offset
++ return cfun->machine->frame.bytes_above_hard_fp
+ - cfun->machine->frame.bytes_above_locals;
+ }
+
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index a079a88b4f4..eab6da84a02 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -724,10 +724,10 @@ struct GTY (()) aarch64_frame
+ STACK_BOUNDARY. */
+ poly_int64 bytes_above_locals;
+
+- /* Offset from the base of the frame (incomming SP) to the
+- hard_frame_pointer. This value is always a multiple of
++ /* The number of bytes between the hard_frame_pointer and the top of
++ the frame (the incomming SP). This value is always a multiple of
+ STACK_BOUNDARY. */
+- poly_int64 hard_fp_offset;
++ poly_int64 bytes_above_hard_fp;
+
+ /* The size of the frame. This value is the offset from base of the
+ frame (incomming SP) to the stack_pointer. This value is always
+--
+2.34.1
+
+
+From eb2271eb6bb68ec3c9aa9ae4746ea1ee5f18874a Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 22 Jun 2023 22:26:30 +0100
+Subject: [PATCH 05/10] aarch64: Tweak frame_size comment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This patch fixes another case in which a value was described with
+an “upside-down” view.
+
+gcc/
+ * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
+---
+ gcc/config/aarch64/aarch64.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index eab6da84a02..7c4b65ec55b 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -729,8 +729,8 @@ struct GTY (()) aarch64_frame
+ STACK_BOUNDARY. */
+ poly_int64 bytes_above_hard_fp;
+
+- /* The size of the frame. This value is the offset from base of the
+- frame (incomming SP) to the stack_pointer. This value is always
++ /* The size of the frame, i.e. the number of bytes between the bottom
++ of the outgoing arguments and the incoming SP. This value is always
+ a multiple of STACK_BOUNDARY. */
+ poly_int64 frame_size;
+
+--
+2.34.1
+
+
+From cfed3b87e9351edff1568ade4ef666edc9887639 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 15 Aug 2023 19:05:30 +0100
+Subject: [PATCH 06/10] Backport check-function-bodies support
+
+---
+ gcc/testsuite/lib/scanasm.exp | 191 ++++++++++++++++++++++++++++++++++
+ 1 file changed, 191 insertions(+)
+
+diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
+index 35ccbc86fc0..c9af27bf47a 100644
+--- a/gcc/testsuite/lib/scanasm.exp
++++ b/gcc/testsuite/lib/scanasm.exp
+@@ -546,3 +546,194 @@ proc scan-lto-assembler { args } {
+ verbose "output_file: $output_file"
+ dg-scan "scan-lto-assembler" 1 $testcase $output_file $args
+ }
++
++# Read assembly file FILENAME and store a mapping from function names
++# to function bodies in array RESULT. FILENAME has already been uploaded
++# locally where necessary and is known to exist.
++
++proc parse_function_bodies { filename result } {
++ upvar $result up_result
++
++ # Regexp for the start of a function definition (name in \1).
++ set label {^([a-zA-Z_]\S+):$}
++
++ # Regexp for the end of a function definition.
++ set terminator {^\s*\.size}
++
++ # Regexp for lines that aren't interesting.
++ set fluff {^\s*(?:\.|//|@|$)}
++
++ set fd [open $filename r]
++ set in_function 0
++ while { [gets $fd line] >= 0 } {
++ if { [regexp $label $line dummy function_name] } {
++ set in_function 1
++ set function_body ""
++ } elseif { $in_function } {
++ if { [regexp $terminator $line] } {
++ set up_result($function_name) $function_body
++ set in_function 0
++ } elseif { ![regexp $fluff $line] } {
++ append function_body $line "\n"
++ }
++ }
++ }
++ close $fd
++}
++
++# FUNCTIONS is an array that maps function names to function bodies.
++# Return true if it contains a definition of function NAME and if
++# that definition matches BODY_REGEXP.
++
++proc check_function_body { functions name body_regexp } {
++ upvar $functions up_functions
++
++ if { ![info exists up_functions($name)] } {
++ return 0
++ }
++ set fn_res [regexp "^$body_regexp\$" $up_functions($name)]
++ if { !$fn_res } {
++ verbose -log "body: $body_regexp"
++ verbose -log "against: $up_functions($name)"
++ }
++ return $fn_res
++}
++
++# Check the implementations of functions against expected output. Used as:
++#
++# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } }
++#
++# See sourcebuild.texi for details.
++
++proc check-function-bodies { args } {
++ if { [llength $args] < 2 } {
++ error "too few arguments to check-function-bodies"
++ }
++ if { [llength $args] > 4 } {
++ error "too many arguments to check-function-bodies"
++ }
++
++ if { [llength $args] >= 3 } {
++ set required_flags [lindex $args 2]
++
++ upvar 2 dg-extra-tool-flags extra_tool_flags
++ set flags $extra_tool_flags
++
++ global torture_current_flags
++ if { [info exists torture_current_flags] } {
++ append flags " " $torture_current_flags
++ }
++ foreach required_flag $required_flags {
++ switch -- $required_flag {
++ target -
++ xfail {
++ error "misplaced $required_flag in check-function-bodies"
++ }
++ }
++ }
++ foreach required_flag $required_flags {
++ if { ![regexp " $required_flag " $flags] } {
++ return
++ }
++ }
++ }
++
++ set xfail_all 0
++ if { [llength $args] >= 4 } {
++ switch [dg-process-target [lindex $args 3]] {
++ "S" { }
++ "N" { return }
++ "F" { set xfail_all 1 }
++ "P" { }
++ }
++ }
++
++ set testcase [testname-for-summary]
++ # The name might include a list of options; extract the file name.
++ set filename [lindex $testcase 0]
++
++ global srcdir
++ set input_filename "$srcdir/$filename"
++ set output_filename "[file rootname [file tail $filename]].s"
++
++ set prefix [lindex $args 0]
++ set prefix_len [string length $prefix]
++ set terminator [lindex $args 1]
++ if { [string equal $terminator ""] } {
++ set terminator "*/"
++ }
++ set terminator_len [string length $terminator]
++
++ set have_bodies 0
++ if { [is_remote host] } {
++ remote_upload host "$filename"
++ }
++ if { [file exists $output_filename] } {
++ parse_function_bodies $output_filename functions
++ set have_bodies 1
++ } else {
++ verbose -log "$testcase: output file does not exist"
++ }
++
++ set count 0
++ set function_regexp ""
++ set label {^(\S+):$}
++
++ set lineno 1
++ set fd [open $input_filename r]
++ set in_function 0
++ while { [gets $fd line] >= 0 } {
++ if { [string equal -length $prefix_len $line $prefix] } {
++ set line [string trim [string range $line $prefix_len end]]
++ if { !$in_function } {
++ if { [regexp "^(.*?\\S)\\s+{(.*)}\$" $line dummy \
++ line selector] } {
++ set selector [dg-process-target $selector]
++ } else {
++ set selector "P"
++ }
++ if { ![regexp $label $line dummy function_name] } {
++ close $fd
++ error "check-function-bodies: line $lineno does not have a function label"
++ }
++ set in_function 1
++ set function_regexp ""
++ } elseif { [string equal $line "("] } {
++ append function_regexp "(?:"
++ } elseif { [string equal $line "|"] } {
++ append function_regexp "|"
++ } elseif { [string equal $line ")"] } {
++ append function_regexp ")"
++ } elseif { [string equal $line "..."] } {
++ append function_regexp ".*"
++ } else {
++ append function_regexp "\t" $line "\n"
++ }
++ } elseif { [string equal -length $terminator_len $line $terminator] } {
++ if { ![string equal $selector "N"] } {
++ if { $xfail_all || [string equal $selector "F"] } {
++ setup_xfail "*-*-*"
++ }
++ set testname "$testcase check-function-bodies $function_name"
++ if { !$have_bodies } {
++ unresolved $testname
++ } elseif { [check_function_body functions $function_name \
++ $function_regexp] } {
++ pass $testname
++ } else {
++ fail $testname
++ }
++ }
++ set in_function 0
++ incr count
++ }
++ incr lineno
++ }
++ close $fd
++ if { $in_function } {
++ error "check-function-bodies: missing \"$terminator\""
++ }
++ if { $count == 0 } {
++ error "check-function-bodies: no matches found"
++ }
++}
+--
+2.34.1
+
+
+From 4dd8925d95d3d6d89779b494b5f4cfadcf9fa96e Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 27 Jun 2023 15:11:44 +0100
+Subject: [PATCH 07/10] aarch64: Tweak stack clash boundary condition
+
+The AArch64 ABI says that, when stack clash protection is used,
+there can be a maximum of 1KiB of unprobed space at sp on entry
+to a function. Therefore, we need to probe when allocating
+>= guard_size - 1KiB of data (>= rather than >). This is what
+GCC does.
+
+If an allocation is exactly guard_size bytes, it is enough to allocate
+those bytes and probe once at offset 1024. It isn't possible to use a
+single probe at any other offset: higher would conmplicate later code,
+by leaving more unprobed space than usual, while lower would risk
+leaving an entire page unprobed. For simplicity, the code probes all
+allocations at offset 1024.
+
+Some register saves also act as probes. If we need to allocate
+more space below the last such register save probe, we need to
+probe the allocation if it is > 1KiB. Again, this allocation is
+then sometimes (but not always) probed at offset 1024. This sort of
+allocation is currently only used for outgoing arguments, which are
+rarely this big.
+
+However, the code also probed if this final outgoing-arguments
+allocation was == 1KiB, rather than just > 1KiB. This isn't
+necessary, since the register save then probes at offset 1024
+as required. Continuing to probe allocations of exactly 1KiB
+would complicate later patches.
+
+gcc/
+ * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space):
+ Don't probe final allocations that are exactly 1KiB in size (after
+ unprobed space above the final allocation has been deducted).
+
+gcc/testsuite/
+ * gcc.target/aarch64/stack-check-prologue-17.c: New test.
+---
+ gcc/config/aarch64/aarch64.c | 6 +-
+ .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++
+ 2 files changed, 60 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 2681e0c2bb9..4c9e11cd7cf 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -5506,6 +5506,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ HOST_WIDE_INT guard_size
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
+ HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
++ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
++ gcc_assert (multiple_p (poly_size, byte_sp_alignment));
+ /* When doing the final adjustment for the outgoing argument size we can't
+ assume that LR was saved at position 0. So subtract it's offset from the
+ ABI safe buffer so that we don't accidentally allow an adjustment that
+@@ -5513,7 +5515,9 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ probing. */
+ HOST_WIDE_INT min_probe_threshold
+ = final_adjustment_p
+- ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM]
++ ? (guard_used_by_caller
++ + byte_sp_alignment
++ - cfun->machine->frame.reg_offset[LR_REGNUM])
+ : guard_size - guard_used_by_caller;
+
+ poly_int64 frame_size = cfun->machine->frame.frame_size;
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+new file mode 100644
+index 00000000000..0d8a25d73a2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+@@ -0,0 +1,55 @@
++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void f(int, ...);
++void g();
++
++/*
++** test1:
++** ...
++** str x30, \[sp\]
++** sub sp, sp, #1024
++** cbnz w0, .*
++** bl g
++** ...
++*/
++int test1(int z) {
++ __uint128_t x = 0;
++ int y[0x400];
++ if (z)
++ {
++ f(0, 0, 0, 0, 0, 0, 0, &y,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
++ }
++ g();
++ return 1;
++}
++
++/*
++** test2:
++** ...
++** str x30, \[sp\]
++** sub sp, sp, #1040
++** str xzr, \[sp\]
++** cbnz w0, .*
++** bl g
++** ...
++*/
++int test2(int z) {
++ __uint128_t x = 0;
++ int y[0x400];
++ if (z)
++ {
++ f(0, 0, 0, 0, 0, 0, 0, &y,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x);
++ }
++ g();
++ return 1;
++}
+--
+2.34.1
+
+
+From 12517baf6c88447e3bda3a459ac4c29d61f84e6c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 27 Jun 2023 15:12:55 +0100
+Subject: [PATCH 08/10] aarch64: Put LR save probe in first 16 bytes
+
+-fstack-clash-protection uses the save of LR as a probe for the next
+allocation. The next allocation could be:
+
+* another part of the static frame, e.g. when allocating SVE save slots
+ or outgoing arguments
+
+* an alloca in the same function
+
+* an allocation made by a callee function
+
+However, when -fomit-frame-pointer is used, the LR save slot is placed
+above the other GPR save slots. It could therefore be up to 80 bytes
+above the base of the GPR save area (which is also the hard fp address).
+
+aarch64_allocate_and_probe_stack_space took this into account when
+deciding how much subsequent space could be allocated without needing
+a probe. However, it interacted badly with:
+
+ /* If doing a small final adjustment, we always probe at offset 0.
+ This is done to avoid issues when LR is not at position 0 or when
+ the final adjustment is smaller than the probing offset. */
+ else if (final_adjustment_p && rounded_size == 0)
+ residual_probe_offset = 0;
+
+which forces any allocation that is smaller than the guard page size
+to be probed at offset 0 rather than the usual offset 1024. It was
+therefore possible to construct cases in which we had:
+
+* a probe using LR at SP + 80 bytes (or some other value >= 16)
+* an allocation of the guard page size - 16 bytes
+* a probe at SP + 0
+
+which allocates guard page size + 64 consecutive unprobed bytes.
+
+This patch requires the LR probe to be in the first 16 bytes of the
+save area when stack clash protection is active. Doing it
+unconditionally would cause code-quality regressions.
+
+gcc/
+ * config/aarch64/aarch64.c (aarch64_layout_frame): Ensure that
+ the LR save slot is in the first 16 bytes of the register save area.
+ (aarch64_allocate_and_probe_stack_space): Remove workaround for
+ when LR was not in the first 16 bytes.
+
+gcc/testsuite/
+ * gcc.target/aarch64/stack-check-prologue-18.c: New test.
+---
+ gcc/config/aarch64/aarch64.c | 50 +++++----
+ .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++
+ 2 files changed, 127 insertions(+), 23 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 4c9e11cd7cf..1e8467fdd03 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4686,15 +4686,31 @@ aarch64_layout_frame (void)
+
+ cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
+
++#define ALLOCATE_GPR_SLOT(REGNO) \
++ do \
++ { \
++ cfun->machine->frame.reg_offset[REGNO] = offset; \
++ if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) \
++ cfun->machine->frame.wb_candidate1 = (REGNO); \
++ else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM) \
++ cfun->machine->frame.wb_candidate2 = (REGNO); \
++ offset += UNITS_PER_WORD; \
++ } \
++ while (0)
++
+ if (cfun->machine->frame.emit_frame_chain)
+ {
+ /* FP and LR are placed in the linkage record. */
+- cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
+- cfun->machine->frame.wb_candidate1 = R29_REGNUM;
+- cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
+- cfun->machine->frame.wb_candidate2 = R30_REGNUM;
+- offset = 2 * UNITS_PER_WORD;
++ ALLOCATE_GPR_SLOT (R29_REGNUM);
++ ALLOCATE_GPR_SLOT (R30_REGNUM);
+ }
++ else if (flag_stack_clash_protection
++ && cfun->machine->frame.reg_offset[R30_REGNUM] == SLOT_REQUIRED)
++ /* Put the LR save slot first, since it makes a good choice of probe
++ for stack clash purposes. The idea is that the link register usually
++ has to be saved before a call anyway, and so we lose little by
++ stopping it from being individually shrink-wrapped. */
++ ALLOCATE_GPR_SLOT (R30_REGNUM);
+
+ /* With stack-clash, LR must be saved in non-leaf functions. */
+ gcc_assert (crtl->is_leaf
+@@ -4704,14 +4720,9 @@ aarch64_layout_frame (void)
+ /* Now assign stack slots for them. */
+ for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
+ if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
+- {
+- cfun->machine->frame.reg_offset[regno] = offset;
+- if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
+- cfun->machine->frame.wb_candidate1 = regno;
+- else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
+- cfun->machine->frame.wb_candidate2 = regno;
+- offset += UNITS_PER_WORD;
+- }
++ ALLOCATE_GPR_SLOT (regno);
++
++#undef ALLOCATE_GPR_SLOT
+
+ HOST_WIDE_INT max_int_offset = offset;
+ offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+@@ -5508,16 +5519,9 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
+ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+ gcc_assert (multiple_p (poly_size, byte_sp_alignment));
+- /* When doing the final adjustment for the outgoing argument size we can't
+- assume that LR was saved at position 0. So subtract it's offset from the
+- ABI safe buffer so that we don't accidentally allow an adjustment that
+- would result in an allocation larger than the ABI buffer without
+- probing. */
+ HOST_WIDE_INT min_probe_threshold
+ = final_adjustment_p
+- ? (guard_used_by_caller
+- + byte_sp_alignment
+- - cfun->machine->frame.reg_offset[LR_REGNUM])
++ ? guard_used_by_caller + byte_sp_alignment
+ : guard_size - guard_used_by_caller;
+
+ poly_int64 frame_size = cfun->machine->frame.frame_size;
+@@ -5697,8 +5701,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ if (final_adjustment_p && rounded_size != 0)
+ min_probe_threshold = 0;
+ /* If doing a small final adjustment, we always probe at offset 0.
+- This is done to avoid issues when LR is not at position 0 or when
+- the final adjustment is smaller than the probing offset. */
++ This is done to avoid issues when the final adjustment is smaller
++ than the probing offset. */
+ else if (final_adjustment_p && rounded_size == 0)
+ residual_probe_offset = 0;
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+new file mode 100644
+index 00000000000..82447d20fff
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+@@ -0,0 +1,100 @@
++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void f(int, ...);
++void g();
++
++/*
++** test1:
++** ...
++** str x30, \[sp\]
++** sub sp, sp, #4064
++** str xzr, \[sp\]
++** cbnz w0, .*
++** bl g
++** ...
++** str x26, \[sp, #?4128\]
++** ...
++*/
++int test1(int z) {
++ __uint128_t x = 0;
++ int y[0x400];
++ if (z)
++ {
++ asm volatile ("" :::
++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
++ f(0, 0, 0, 0, 0, 0, 0, &y,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
++ }
++ g();
++ return 1;
++}
++
++/*
++** test2:
++** ...
++** str x30, \[sp\]
++** sub sp, sp, #1040
++** str xzr, \[sp\]
++** cbnz w0, .*
++** bl g
++** ...
++*/
++int test2(int z) {
++ __uint128_t x = 0;
++ int y[0x400];
++ if (z)
++ {
++ asm volatile ("" :::
++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
++ f(0, 0, 0, 0, 0, 0, 0, &y,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x);
++ }
++ g();
++ return 1;
++}
++
++/*
++** test3:
++** ...
++** str x30, \[sp\]
++** sub sp, sp, #1024
++** cbnz w0, .*
++** bl g
++** ...
++*/
++int test3(int z) {
++ __uint128_t x = 0;
++ int y[0x400];
++ if (z)
++ {
++ asm volatile ("" :::
++ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
++ f(0, 0, 0, 0, 0, 0, 0, &y,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
++ }
++ g();
++ return 1;
++}
+--
+2.34.1
+
+
+From f2684e63652bb251d22c79e40081c646df1f36b6 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Tue, 8 Aug 2023 01:57:26 +0100
+Subject: [PATCH 09/10] aarch64: Simplify probe of final frame allocation
+
+Previous patches ensured that the final frame allocation only needs
+a probe when the size is strictly greater than 1KiB. It's therefore
+safe to use the normal 1024 probe offset in all cases.
+
+The main motivation for doing this is to simplify the code and
+remove the number of special cases.
+
+gcc/
+ * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space):
+ Always probe the residual allocation at offset 1024, asserting
+ that that is in range.
+
+gcc/testsuite/
+ * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
+ to be at offset 1024 rather than offset 0.
+ * gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.c | 12 ++++--------
+ .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +-
+ .../gcc.target/aarch64/stack-check-prologue-18.c | 7 +++++--
+ 3 files changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 1e8467fdd03..705f719a2ea 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -5695,16 +5695,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ are still safe. */
+ if (residual)
+ {
+- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
++ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
++
+ /* If we're doing final adjustments, and we've done any full page
+ allocations then any residual needs to be probed. */
+ if (final_adjustment_p && rounded_size != 0)
+ min_probe_threshold = 0;
+- /* If doing a small final adjustment, we always probe at offset 0.
+- This is done to avoid issues when the final adjustment is smaller
+- than the probing offset. */
+- else if (final_adjustment_p && rounded_size == 0)
+- residual_probe_offset = 0;
+
+ aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
+ if (residual >= min_probe_threshold)
+@@ -5715,8 +5711,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
+ HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
+ "\n", residual);
+
+- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+- residual_probe_offset));
++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
++ guard_used_by_caller));
+ emit_insn (gen_blockage ());
+ }
+ }
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+index 0d8a25d73a2..f0ec1389771 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+@@ -33,7 +33,7 @@ int test1(int z) {
+ ** ...
+ ** str x30, \[sp\]
+ ** sub sp, sp, #1040
+-** str xzr, \[sp\]
++** str xzr, \[sp, #?1024\]
+ ** cbnz w0, .*
+ ** bl g
+ ** ...
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+index 82447d20fff..71d33ba34e9 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+@@ -8,8 +8,9 @@ void g();
+ ** test1:
+ ** ...
+ ** str x30, \[sp\]
++** ...
+ ** sub sp, sp, #4064
+-** str xzr, \[sp\]
++** str xzr, \[sp, #?1024\]
+ ** cbnz w0, .*
+ ** bl g
+ ** ...
+@@ -49,8 +50,9 @@ int test1(int z) {
+ ** test2:
+ ** ...
+ ** str x30, \[sp\]
++** ...
+ ** sub sp, sp, #1040
+-** str xzr, \[sp\]
++** str xzr, \[sp, #?1024\]
+ ** cbnz w0, .*
+ ** bl g
+ ** ...
+@@ -77,6 +79,7 @@ int test2(int z) {
+ ** test3:
+ ** ...
+ ** str x30, \[sp\]
++** ...
+ ** sub sp, sp, #1024
+ ** cbnz w0, .*
+ ** bl g
+--
+2.34.1
+
+
+From bf3eeaa0182a92987570d9c787bd45079eebf528 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandiford@arm.com>
+Date: Thu, 15 Jun 2023 19:16:52 +0100
+Subject: [PATCH 10/10] aarch64: Make stack smash canary protect saved
+ registers
+
+AArch64 normally puts the saved registers near the bottom of the frame,
+immediately above any dynamic allocations. But this means that a
+stack-smash attack on those dynamic allocations could overwrite the
+saved registers without needing to reach as far as the stack smash
+canary.
+
+The same thing could also happen for variable-sized arguments that are
+passed by value, since those are allocated before a call and popped on
+return.
+
+This patch avoids that by putting the locals (and thus the canary) below
+the saved registers when stack smash protection is active.
+
+The patch fixes CVE-2023-4039.
+
+gcc/
+ * config/aarch64/aarch64.c (aarch64_save_regs_above_locals_p):
+ New function.
+ (aarch64_layout_frame): Use it to decide whether locals should
+ go above or below the saved registers.
+ (aarch64_expand_prologue): Update stack layout comment.
+ Emit a stack tie after the final adjustment.
+
+gcc/testsuite/
+ * gcc.target/aarch64/stack-protector-8.c: New test.
+ * gcc.target/aarch64/stack-protector-9.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.c | 46 +++++++++++++--
+ .../gcc.target/aarch64/stack-protector-8.c | 58 +++++++++++++++++++
+ .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++++++
+ 3 files changed, 133 insertions(+), 4 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 705f719a2ea..3d094214fac 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4622,6 +4622,20 @@ aarch64_needs_frame_chain (void)
+ return aarch64_use_frame_pointer;
+ }
+
++/* Return true if the current function should save registers above
++ the locals area, rather than below it. */
++
++static bool
++aarch64_save_regs_above_locals_p ()
++{
++ /* When using stack smash protection, make sure that the canary slot
++ comes between the locals and the saved registers. Otherwise,
++ it would be possible for a carefully sized smash attack to change
++ the saved registers (particularly LR and FP) without reaching the
++ canary. */
++ return crtl->stack_protect_guard;
++}
++
+ /* Mark the registers that need to be saved by the callee and calculate
+ the size of the callee-saved registers area and frame record (both FP
+ and LR may be omitted). */
+@@ -4686,6 +4700,16 @@ aarch64_layout_frame (void)
+
+ cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
+
++ bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
++
++ if (regs_at_top_p)
++ {
++ cfun->machine->frame.bytes_below_hard_fp += get_frame_size ();
++ cfun->machine->frame.bytes_below_hard_fp
++ = aligned_upper_bound (cfun->machine->frame.bytes_below_hard_fp,
++ STACK_BOUNDARY / BITS_PER_UNIT);
++ }
++
+ #define ALLOCATE_GPR_SLOT(REGNO) \
+ do \
+ { \
+@@ -4758,9 +4782,11 @@ aarch64_layout_frame (void)
+ HOST_WIDE_INT varargs_and_saved_regs_size
+ = offset + cfun->machine->frame.saved_varargs_size;
+
++ cfun->machine->frame.bytes_above_hard_fp = varargs_and_saved_regs_size;
++ if (!regs_at_top_p)
++ cfun->machine->frame.bytes_above_hard_fp += get_frame_size ();
+ cfun->machine->frame.bytes_above_hard_fp
+- = aligned_upper_bound (varargs_and_saved_regs_size
+- + get_frame_size (),
++ = aligned_upper_bound (cfun->machine->frame.bytes_above_hard_fp,
+ STACK_BOUNDARY / BITS_PER_UNIT);
+
+ /* Both these values are already aligned. */
+@@ -4772,6 +4798,9 @@ aarch64_layout_frame (void)
+
+ cfun->machine->frame.bytes_above_locals
+ = cfun->machine->frame.saved_varargs_size;
++ if (regs_at_top_p)
++ cfun->machine->frame.bytes_above_locals
++ += cfun->machine->frame.saved_regs_size;
+
+ cfun->machine->frame.initial_adjust = 0;
+ cfun->machine->frame.final_adjust = 0;
+@@ -5764,10 +5793,10 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
+ | for register varargs |
+ | |
+ +-------------------------------+
+- | local variables | <-- frame_pointer_rtx
++ | local variables (1) | <-- frame_pointer_rtx
+ | |
+ +-------------------------------+
+- | padding | \
++ | padding (1) | \
+ +-------------------------------+ |
+ | callee-saved registers | | frame.saved_regs_size
+ +-------------------------------+ |
+@@ -5775,6 +5804,10 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
+ +-------------------------------+ |
+ | FP' | / <- hard_frame_pointer_rtx (aligned)
+ +-------------------------------+
++ | local variables (2) |
++ +-------------------------------+
++ | padding (2) |
++ +-------------------------------+
+ | dynamic allocation |
+ +-------------------------------+
+ | padding |
+@@ -5784,6 +5817,9 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
+ +-------------------------------+
+ | | <-- stack_pointer_rtx (aligned)
+
++ The regions marked (1) and (2) are mutually exclusive. (2) is used
++ when aarch64_save_regs_above_locals_p is true.
++
+ Dynamic stack allocations via alloca() decrease stack_pointer_rtx
+ but leave frame_pointer_rtx and hard_frame_pointer_rtx
+ unchanged.
+@@ -5937,6 +5973,8 @@ aarch64_expand_prologue (void)
+ that is assumed by the called. */
+ aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
+ !frame_pointer_needed, true);
++ if (emit_frame_chain && maybe_ne (final_adjust, 0))
++ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
+ }
+
+ /* Return TRUE if we can use a simple_return insn.
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+new file mode 100644
+index 00000000000..c5e7deef6c1
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+@@ -0,0 +1,58 @@
++/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void g(void *);
++
++/*
++** test1:
++** sub sp, sp, #288
++** stp x29, x30, \[sp, #?272\]
++** add x29, sp, #?272
++** mrs (x[0-9]+), tpidr2_el0
++** ldr (x[0-9]+), \[\1, #?16\]
++** str \2, \[sp, #?264\]
++** mov \2, *0
++** add x0, sp, #?8
++** bl g
++** ...
++** mrs .*
++** ...
++** bne .*
++** ...
++** ldp x29, x30, \[sp, #?272\]
++** add sp, sp, #?288
++** ret
++** bl __stack_chk_fail
++*/
++int test1() {
++ int y[0x40];
++ g(y);
++ return 1;
++}
++
++/*
++** test2:
++** stp x29, x30, \[sp, #?-16\]!
++** mov x29, sp
++** sub sp, sp, #1040
++** mrs (x[0-9]+), tpidr2_el0
++** ldr (x[0-9]+), \[\1, #?16\]
++** str \2, \[sp, #?1032\]
++** mov \2, *0
++** add x0, sp, #?8
++** bl g
++** ...
++** mrs .*
++** ...
++** bne .*
++** ...
++** add sp, sp, #?1040
++** ldp x29, x30, \[sp\], #?16
++** ret
++** bl __stack_chk_fail
++*/
++int test2() {
++ int y[0x100];
++ g(y);
++ return 1;
++}
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
+new file mode 100644
+index 00000000000..58f322aa480
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
+@@ -0,0 +1,33 @@
++/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++/*
++** main:
++** ...
++** stp x29, x30, \[sp, #?-[0-9]+\]!
++** ...
++** sub sp, sp, #[0-9]+
++** ...
++** str x[0-9]+, \[x29, #?-8\]
++** ...
++*/
++int f(const char *);
++void g(void *);
++int main(int argc, char* argv[])
++{
++ int a;
++ int b;
++ char c[2+f(argv[1])];
++ int d[0x100];
++ char y;
++
++ y=42; a=4; b=10;
++ c[0] = 'h'; c[1] = '\0';
++
++ c[f(argv[2])] = '\0';
++
++ __builtin_printf("%d %d\n%s\n", a, b, c);
++ g(d);
++
++ return 0;
++}
+--
+2.34.1
+