summaryrefslogtreecommitdiff
diff options
authorAlan Wu <[email protected]>2025-07-07 18:25:20 +0900
committerAlan Wu <[email protected]>2025-07-15 14:47:32 -0400
commitb2ef33b3c5b2aa1ff4742911c84451ec7afd2fde (patch)
treec7a4d8dff723078a41fa4b4c3725ae5eaa7e200c
parent50e2d58af89a661d7c90db001d966a1dbff48332 (diff)
ZJIT: Redo JIT function native stack frame layoutHEADmaster
Previously, gen_param() access slots at `SP-x` for `x≥0` after subtracting from SP, so it was accessing slots from above the top of the stack. Also, the slots gen_entry_params() wrote to at entry point did not correspond to the slots access inside the JIT function. Redo the stack frame layout so that inside the function slots are at `SP+x`. Write to those slots in the entry point by anticipating the size of the frame. Fixes test_spilled_method_args().
-rw-r--r--zjit/src/backend/arm64/mod.rs5
-rw-r--r--zjit/src/backend/lir.rs6
-rw-r--r--zjit/src/backend/x86_64/mod.rs6
-rw-r--r--zjit/src/codegen.rs51
4 files changed, 50 insertions, 18 deletions
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index 4f804a3406..ac5c9908f5 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -206,6 +206,11 @@ impl Assembler
vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG]
}
+ /// How many bytes a call and a [Self::frame_setup] would change native SP
+ pub fn frame_size() -> i32 {
+ 0x10
+ }
+
/// Split platform-specific instructions
/// The transformations done here are meant to make our lives simpler in later
/// stages of the compilation pipeline.
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index dbf083f1a2..e5453e4d55 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -32,13 +32,13 @@ pub enum MemBase
pub struct Mem
{
// Base register number or instruction index
- pub(super) base: MemBase,
+ pub base: MemBase,
// Offset relative to the base pointer
- pub(super) disp: i32,
+ pub disp: i32,
// Size in bits
- pub(super) num_bits: u8,
+ pub num_bits: u8,
}
impl fmt::Debug for Mem {
diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs
index 85a6a0f5b4..1c905a475c 100644
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@@ -110,6 +110,11 @@ impl Assembler
vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG]
}
+ /// How many bytes a call and a [Self::frame_setup] would change native SP
+ pub fn frame_size() -> i32 {
+ 0x8
+ }
+
// These are the callee-saved registers in the x86-64 SysV ABI
// RBX, RSP, RBP, and R12–R15
@@ -475,6 +480,7 @@ impl Assembler
// (e.g. with Linux `perf record --call-graph fp`)
Insn::FrameSetup => {
if false { // We don't support --zjit-perf yet
+ // TODO(alan): Change Assembler::frame_size() when adding --zjit-perf support
push(cb, RBP);
mov(cb, RBP, RSP);
push(cb, RBP);
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index c8029116a1..ce3decd56f 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -107,14 +107,14 @@ fn gen_iseq_entry_point(iseq: IseqPtr) -> *const u8 {
// Compile the High-level IR
let cb = ZJITState::get_code_block();
let (start_ptr, mut branch_iseqs) = match gen_function(cb, iseq, &function) {
- Some((start_ptr, gc_offsets, branch_iseqs)) => {
+ Some((start_ptr, gc_offsets, jit)) => {
// Remember the block address to reuse it later
let payload = get_or_create_iseq_payload(iseq);
payload.start_ptr = Some(start_ptr);
payload.gc_offsets.extend(gc_offsets);
// Compile an entry point to the JIT code
- (gen_entry(cb, iseq, &function, start_ptr), branch_iseqs)
+ (gen_entry(cb, iseq, &function, start_ptr, jit.c_stack_bytes), jit.branch_iseqs)
},
None => (None, vec![]),
};
@@ -145,11 +145,11 @@ fn gen_iseq_entry_point(iseq: IseqPtr) -> *const u8 {
}
/// Compile a JIT entry
-fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_ptr: CodePtr) -> Option<CodePtr> {
+fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_ptr: CodePtr, c_stack_bytes: usize) -> Option<CodePtr> {
// Set up registers for CFP, EC, SP, and basic block arguments
let mut asm = Assembler::new();
gen_entry_prologue(&mut asm, iseq);
- gen_entry_params(&mut asm, iseq, function.block(BlockId(0)));
+ gen_entry_params(&mut asm, iseq, function.block(BlockId(0)), c_stack_bytes);
// Jump to the first block using a call instruction
asm.ccall(function_ptr.raw_ptr(cb) as *const u8, vec![]);
@@ -185,17 +185,17 @@ fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<(Rc<Branc
// Compile the High-level IR
let result = gen_function(cb, iseq, &function);
- if let Some((start_ptr, gc_offsets, branch_iseqs)) = result {
+ if let Some((start_ptr, gc_offsets, jit)) = result {
payload.start_ptr = Some(start_ptr);
payload.gc_offsets.extend(gc_offsets);
- Some((start_ptr, branch_iseqs))
+ Some((start_ptr, jit.branch_iseqs))
} else {
None
}
}
/// Compile a function
-fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Option<(CodePtr, Vec<CodePtr>, Vec<(Rc<Branch>, IseqPtr)>)> {
+fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Option<(CodePtr, Vec<CodePtr>, JITState)> {
let c_stack_bytes = aligned_stack_bytes(max_num_params(function).saturating_sub(ALLOC_REGS.len()));
let mut jit = JITState::new(iseq, function.num_insns(), function.num_blocks(), c_stack_bytes);
let mut asm = Assembler::new();
@@ -249,7 +249,7 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio
}
// Generate code if everything can be compiled
- asm.compile(cb).map(|(start_ptr, gc_offsets)| (start_ptr, gc_offsets, jit.branch_iseqs))
+ asm.compile(cb).map(|(start_ptr, gc_offsets)| (start_ptr, gc_offsets, jit))
}
/// Compile an instruction
@@ -527,7 +527,7 @@ fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
}
/// Assign method arguments to basic block arguments at JIT entry
-fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block) {
+fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block, c_stack_bytes: usize) {
let self_param = gen_param(asm, SELF_PARAM_IDX);
asm.mov(self_param, Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_SELF));
@@ -536,12 +536,32 @@ fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block) {
asm_comment!(asm, "set method params: {num_params}");
// Allocate registers for basic block arguments
- let params: Vec<Opnd> = (0..num_params).map(|idx|
- gen_param(asm, idx + 1) // +1 for self
- ).collect();
+ for idx in 0..num_params {
+ let param = gen_param(asm, idx + 1); // +1 for self
+
+ // Funky offset adjustment to write into the native stack frame of the
+ // HIR function we'll be calling into. This only makes sense in context
+ // of the schedule of instructions in gen_entry() for the JIT entry point.
+ //
+ // The entry point needs to load VALUEs into native stack slots _before_ the
+ // frame containing the slots exists. So, we anticipate the stack frame size
+ // of the Function and subtract offsets based on that.
+ //
+ // native SP at entry point ─────►┌────────────┐ Native SP grows downwards
+ // │ │ ↓ on all arches we support.
+ // SP-0x8 ├────────────┤
+ // │ │
+ // where native SP SP-0x10├────────────┤
+ // would be while │ │
+ // the HIR function ────────────► └────────────┘
+ // is running
+ let param = if let Opnd::Mem(lir::Mem { base, disp, num_bits }) = param {
+ Opnd::Mem(lir::Mem { num_bits, base, disp: disp - c_stack_bytes as i32 - Assembler::frame_size() })
+ } else {
+ param
+ };
- // Assign local variables to the basic block arguments
- for (idx, &param) in params.iter().enumerate() {
+ // Assign local variables to the basic block arguments
let local = gen_entry_param(asm, iseq, idx);
asm.mov(param, local);
}
@@ -1045,11 +1065,12 @@ fn gen_push_frame(asm: &mut Assembler, argc: usize, state: &FrameState, frame: C
/// Return an operand we use for the basic block argument at a given index
fn param_opnd(idx: usize) -> Opnd {
// To simplify the implementation, allocate a fixed register or a stack slot for each basic block argument for now.
+ // Note that this is implemented here as opposed to automatically inside LIR machineries.
// TODO: Allow allocating arbitrary registers for basic block arguments
if idx < ALLOC_REGS.len() {
Opnd::Reg(ALLOC_REGS[idx])
} else {
- Opnd::mem(64, NATIVE_STACK_PTR, -((idx - ALLOC_REGS.len() + 1) as i32) * SIZEOF_VALUE_I32)
+ Opnd::mem(64, NATIVE_STACK_PTR, (idx - ALLOC_REGS.len()) as i32 * SIZEOF_VALUE_I32)
}
}
close