Skip to content

Commit 860589c

Browse files
maximecbk0kubun
authored andcommitted
Use builtin_inline_p to avoid pushing a frame for primitive C methods (#63)
* Use builtin_inline_p to skip a frame of C methods * Fix bugs in primitive cfunc call code * Remove if (push_frame) {} * Remove if (push_frame) {} * Push Aaron's fix to avoid hardcoding insn lengths Co-authored-by: Takashi Kokubun <[email protected]>
1 parent 9f46e6e commit 860589c

File tree

3 files changed

+128
-95
lines changed

3 files changed

+128
-95
lines changed

yjit_codegen.c

Lines changed: 128 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -2253,95 +2253,88 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
22532253
//print_str(cb, "recv");
22542254
//print_ptr(cb, recv);
22552255

2256-
// If this function needs a Ruby stack frame
2257-
const bool push_frame = cfunc_needs_frame(cfunc);
2258-
22592256
// Create a size-exit to fall back to the interpreter
22602257
uint8_t *side_exit = yjit_side_exit(jit, ctx);
22612258

22622259
// Check for interrupts
22632260
yjit_check_ints(cb, side_exit);
22642261

2265-
if (push_frame) {
2266-
// Stack overflow check
2267-
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
2268-
// REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
2269-
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + sizeof(rb_control_frame_t)));
2270-
cmp(cb, REG_CFP, REG0);
2271-
jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow));
2272-
}
2262+
// Stack overflow check
2263+
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
2264+
// REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
2265+
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + sizeof(rb_control_frame_t)));
2266+
cmp(cb, REG_CFP, REG0);
2267+
jle_ptr(cb, COUNTED_EXIT(side_exit, send_se_cf_overflow));
22732268

22742269
// Points to the receiver operand on the stack
22752270
x86opnd_t recv = ctx_stack_opnd(ctx, argc);
22762271

22772272
// Store incremented PC into current control frame in case callee raises.
22782273
jit_save_pc(jit, REG0);
22792274

2280-
if (push_frame) {
2281-
if (block) {
2282-
// Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
2283-
// VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
2284-
// with cfp->block_code.
2285-
jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
2286-
mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
2287-
}
2288-
2289-
// Increment the stack pointer by 3 (in the callee)
2290-
// sp += 3
2291-
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
2292-
2293-
// Write method entry at sp[-3]
2294-
// sp[-3] = me;
2295-
// Put compile time cme into REG1. It's assumed to be valid because we are notified when
2296-
// any cme we depend on become outdated. See rb_yjit_method_lookup_change().
2297-
jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
2298-
mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
2299-
2300-
// Write block handler at sp[-2]
2301-
// sp[-2] = block_handler;
2302-
if (block) {
2303-
// reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
2304-
lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
2305-
or(cb, REG1, imm_opnd(1));
2306-
mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
2307-
}
2308-
else {
2309-
mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
2310-
}
2275+
if (block) {
2276+
// Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
2277+
// VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
2278+
// with cfp->block_code.
2279+
jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
2280+
mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
2281+
}
23112282

2312-
// Write env flags at sp[-1]
2313-
// sp[-1] = frame_type;
2314-
uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
2315-
mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
2283+
// Increment the stack pointer by 3 (in the callee)
2284+
// sp += 3
2285+
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
23162286

2317-
// Allocate a new CFP (ec->cfp--)
2318-
sub(
2319-
cb,
2320-
member_opnd(REG_EC, rb_execution_context_t, cfp),
2321-
imm_opnd(sizeof(rb_control_frame_t))
2322-
);
2287+
// Write method entry at sp[-3]
2288+
// sp[-3] = me;
2289+
// Put compile time cme into REG1. It's assumed to be valid because we are notified when
2290+
// any cme we depend on become outdated. See rb_yjit_method_lookup_change().
2291+
jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
2292+
mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
23232293

2324-
// Setup the new frame
2325-
// *cfp = (const struct rb_control_frame_struct) {
2326-
// .pc = 0,
2327-
// .sp = sp,
2328-
// .iseq = 0,
2329-
// .self = recv,
2330-
// .ep = sp - 1,
2331-
// .block_code = 0,
2332-
// .__bp__ = sp,
2333-
// };
2334-
mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
2335-
mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
2336-
mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
2337-
mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
2338-
mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
2339-
mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
2340-
sub(cb, REG0, imm_opnd(sizeof(VALUE)));
2341-
mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
2342-
mov(cb, REG0, recv);
2343-
mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
2294+
// Write block handler at sp[-2]
2295+
// sp[-2] = block_handler;
2296+
if (block) {
2297+
// reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
2298+
lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
2299+
or(cb, REG1, imm_opnd(1));
2300+
mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
23442301
}
2302+
else {
2303+
mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
2304+
}
2305+
2306+
// Write env flags at sp[-1]
2307+
// sp[-1] = frame_type;
2308+
uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
2309+
mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
2310+
2311+
// Allocate a new CFP (ec->cfp--)
2312+
sub(
2313+
cb,
2314+
member_opnd(REG_EC, rb_execution_context_t, cfp),
2315+
imm_opnd(sizeof(rb_control_frame_t))
2316+
);
2317+
2318+
// Setup the new frame
2319+
// *cfp = (const struct rb_control_frame_struct) {
2320+
// .pc = 0,
2321+
// .sp = sp,
2322+
// .iseq = 0,
2323+
// .self = recv,
2324+
// .ep = sp - 1,
2325+
// .block_code = 0,
2326+
// .__bp__ = sp,
2327+
// };
2328+
mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
2329+
mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
2330+
mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
2331+
mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
2332+
mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
2333+
mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
2334+
sub(cb, REG0, imm_opnd(sizeof(VALUE)));
2335+
mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
2336+
mov(cb, REG0, recv);
2337+
mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
23452338

23462339
// Verify that we are calling the right function
23472340
if (YJIT_CHECK_MODE > 0) {
@@ -2407,15 +2400,12 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
24072400
x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
24082401
mov(cb, stack_ret, RAX);
24092402

2410-
// If this function needs a Ruby stack frame
2411-
if (push_frame) {
2412-
// Pop the stack frame (ec->cfp++)
2413-
add(
2414-
cb,
2415-
member_opnd(REG_EC, rb_execution_context_t, cfp),
2416-
imm_opnd(sizeof(rb_control_frame_t))
2417-
);
2418-
}
2403+
// Pop the stack frame (ec->cfp++)
2404+
add(
2405+
cb,
2406+
member_opnd(REG_EC, rb_execution_context_t, cfp),
2407+
imm_opnd(sizeof(rb_control_frame_t))
2408+
);
24192409

24202410
// Note: gen_oswb_iseq() jumps to the next instruction with ctx->sp_offset == 0
24212411
// after the call, while this does not. This difference prevents
@@ -2463,6 +2453,30 @@ iseq_lead_only_arg_setup_p(const rb_iseq_t *iseq)
24632453
bool rb_iseq_only_optparam_p(const rb_iseq_t *iseq);
24642454
bool rb_iseq_only_kwparam_p(const rb_iseq_t *iseq);
24652455

2456+
// If true, the iseq is leaf and it can be replaced by a single C call.
2457+
static bool
2458+
rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
2459+
{
2460+
unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
2461+
unsigned int leave_len = insn_len(BIN(leave));
2462+
2463+
return iseq->body->iseq_size == (
2464+
(invokebuiltin_len + leave_len) &&
2465+
rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
2466+
rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
2467+
iseq->body->builtin_inline_p
2468+
);
2469+
}
2470+
2471+
// Return an rb_builtin_function if the iseq contains only that leaf builtin function.
2472+
static const struct rb_builtin_function*
2473+
rb_leaf_builtin_function(const rb_iseq_t *iseq)
2474+
{
2475+
if (!rb_leaf_invokebuiltin_iseq_p(iseq))
2476+
return NULL;
2477+
return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
2478+
}
2479+
24662480
static codegen_status_t
24672481
gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc)
24682482
{
@@ -2529,6 +2543,39 @@ gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const r
25292543
// Check for interrupts
25302544
yjit_check_ints(cb, side_exit);
25312545

2546+
const struct rb_builtin_function *leaf_builtin = rb_leaf_builtin_function(iseq);
2547+
2548+
if (leaf_builtin && !block && leaf_builtin->argc + 1 <= NUM_C_ARG_REGS) {
2549+
// TODO: figure out if this is necessary
2550+
// If the calls don't allocate, do they need up to date PC, SP?
2551+
// Save YJIT registers
2552+
yjit_save_regs(cb);
2553+
2554+
// Get a pointer to the top of the stack
2555+
lea(cb, REG0, ctx_stack_opnd(ctx, 0));
2556+
2557+
// Call the builtin func (ec, recv, arg1, arg2, ...)
2558+
mov(cb, C_ARG_REGS[0], REG_EC);
2559+
2560+
// Copy self and arguments
2561+
for (int32_t i = 0; i < leaf_builtin->argc + 1; i++) {
2562+
x86opnd_t stack_opnd = mem_opnd(64, REG0, -(leaf_builtin->argc - i) * SIZEOF_VALUE);
2563+
x86opnd_t c_arg_reg = C_ARG_REGS[i + 1];
2564+
mov(cb, c_arg_reg, stack_opnd);
2565+
}
2566+
ctx_stack_pop(ctx, leaf_builtin->argc + 1);
2567+
call_ptr(cb, REG0, (void *)leaf_builtin->func_ptr);
2568+
2569+
// Load YJIT registers
2570+
yjit_load_regs(cb);
2571+
2572+
// Push the return value
2573+
x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2574+
mov(cb, stack_ret, RAX);
2575+
2576+
return YJIT_KEEP_COMPILING;
2577+
}
2578+
25322579
// Stack overflow check
25332580
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
25342581
ADD_COMMENT(cb, "stack overflow check");

yjit_iface.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -126,19 +126,6 @@ check_cfunc_dispatch(VALUE receiver, struct rb_callinfo *ci, void *callee, rb_ca
126126

127127
MJIT_FUNC_EXPORTED VALUE rb_hash_has_key(VALUE hash, VALUE key);
128128

129-
bool
130-
cfunc_needs_frame(const rb_method_cfunc_t *cfunc)
131-
{
132-
void* fptr = (void*)cfunc->func;
133-
134-
// Leaf C functions do not need a stack frame
135-
// or a stack overflow check
136-
return !(
137-
// Hash#key?
138-
fptr == (void*)rb_hash_has_key
139-
);
140-
}
141-
142129
// GC root for interacting with the GC
143130
struct yjit_root_struct {
144131
int unused; // empty structs are not legal in C99

yjit_iface.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ VALUE *yjit_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx);
100100
int yjit_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc);
101101

102102
void check_cfunc_dispatch(VALUE receiver, struct rb_callinfo *ci, void *callee, rb_callable_method_entry_t *compile_time_cme);
103-
bool cfunc_needs_frame(const rb_method_cfunc_t *cfunc);
104103

105104
RBIMPL_ATTR_NODISCARD() bool assume_bop_not_redefined(block_t *block, int redefined_flag, enum ruby_basic_operators bop);
106105
void assume_method_lookup_stable(VALUE receiver_klass, const rb_callable_method_entry_t *cme, block_t *block);

0 commit comments

Comments
 (0)