@@ -2253,95 +2253,88 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
22532253 //print_str(cb, "recv");
22542254 //print_ptr(cb, recv);
22552255
2256- // If this function needs a Ruby stack frame
2257- const bool push_frame = cfunc_needs_frame (cfunc );
2258-
22592256 // Create a size-exit to fall back to the interpreter
22602257 uint8_t * side_exit = yjit_side_exit (jit , ctx );
22612258
22622259 // Check for interrupts
22632260 yjit_check_ints (cb , side_exit );
22642261
2265- if (push_frame ) {
2266- // Stack overflow check
2267- // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
2268- // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
2269- lea (cb , REG0 , ctx_sp_opnd (ctx , sizeof (VALUE ) * 4 + sizeof (rb_control_frame_t )));
2270- cmp (cb , REG_CFP , REG0 );
2271- jle_ptr (cb , COUNTED_EXIT (side_exit , send_se_cf_overflow ));
2272- }
2262+ // Stack overflow check
2263+ // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
2264+ // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
2265+ lea (cb , REG0 , ctx_sp_opnd (ctx , sizeof (VALUE ) * 4 + sizeof (rb_control_frame_t )));
2266+ cmp (cb , REG_CFP , REG0 );
2267+ jle_ptr (cb , COUNTED_EXIT (side_exit , send_se_cf_overflow ));
22732268
22742269 // Points to the receiver operand on the stack
22752270 x86opnd_t recv = ctx_stack_opnd (ctx , argc );
22762271
22772272 // Store incremented PC into current control frame in case callee raises.
22782273 jit_save_pc (jit , REG0 );
22792274
2280- if (push_frame ) {
2281- if (block ) {
2282- // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
2283- // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
2284- // with cfp->block_code.
2285- jit_mov_gc_ptr (jit , cb , REG0 , (VALUE )block );
2286- mov (cb , member_opnd (REG_CFP , rb_control_frame_t , block_code ), REG0 );
2287- }
2288-
2289- // Increment the stack pointer by 3 (in the callee)
2290- // sp += 3
2291- lea (cb , REG0 , ctx_sp_opnd (ctx , sizeof (VALUE ) * 3 ));
2292-
2293- // Write method entry at sp[-3]
2294- // sp[-3] = me;
2295- // Put compile time cme into REG1. It's assumed to be valid because we are notified when
2296- // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
2297- jit_mov_gc_ptr (jit , cb , REG1 , (VALUE )cme );
2298- mov (cb , mem_opnd (64 , REG0 , 8 * -3 ), REG1 );
2299-
2300- // Write block handler at sp[-2]
2301- // sp[-2] = block_handler;
2302- if (block ) {
2303- // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
2304- lea (cb , REG1 , member_opnd (REG_CFP , rb_control_frame_t , self ));
2305- or (cb , REG1 , imm_opnd (1 ));
2306- mov (cb , mem_opnd (64 , REG0 , 8 * -2 ), REG1 );
2307- }
2308- else {
2309- mov (cb , mem_opnd (64 , REG0 , 8 * -2 ), imm_opnd (VM_BLOCK_HANDLER_NONE ));
2310- }
2275+ if (block ) {
2276+ // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
2277+ // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
2278+ // with cfp->block_code.
2279+ jit_mov_gc_ptr (jit , cb , REG0 , (VALUE )block );
2280+ mov (cb , member_opnd (REG_CFP , rb_control_frame_t , block_code ), REG0 );
2281+ }
23112282
2312- // Write env flags at sp[-1]
2313- // sp[-1] = frame_type;
2314- uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL ;
2315- mov (cb , mem_opnd (64 , REG0 , 8 * -1 ), imm_opnd (frame_type ));
2283+ // Increment the stack pointer by 3 (in the callee)
2284+ // sp += 3
2285+ lea (cb , REG0 , ctx_sp_opnd (ctx , sizeof (VALUE ) * 3 ));
23162286
2317- // Allocate a new CFP (ec->cfp--)
2318- sub (
2319- cb ,
2320- member_opnd ( REG_EC , rb_execution_context_t , cfp ),
2321- imm_opnd ( sizeof ( rb_control_frame_t ))
2322- );
2287+ // Write method entry at sp[-3]
2288+ // sp[-3] = me;
2289+ // Put compile time cme into REG1. It's assumed to be valid because we are notified when
2290+ // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
2291+ jit_mov_gc_ptr ( jit , cb , REG1 , ( VALUE ) cme );
2292+ mov ( cb , mem_opnd ( 64 , REG0 , 8 * -3 ), REG1 );
23232293
2324- // Setup the new frame
2325- // *cfp = (const struct rb_control_frame_struct) {
2326- // .pc = 0,
2327- // .sp = sp,
2328- // .iseq = 0,
2329- // .self = recv,
2330- // .ep = sp - 1,
2331- // .block_code = 0,
2332- // .__bp__ = sp,
2333- // };
2334- mov (cb , REG1 , member_opnd (REG_EC , rb_execution_context_t , cfp ));
2335- mov (cb , member_opnd (REG1 , rb_control_frame_t , pc ), imm_opnd (0 ));
2336- mov (cb , member_opnd (REG1 , rb_control_frame_t , sp ), REG0 );
2337- mov (cb , member_opnd (REG1 , rb_control_frame_t , iseq ), imm_opnd (0 ));
2338- mov (cb , member_opnd (REG1 , rb_control_frame_t , block_code ), imm_opnd (0 ));
2339- mov (cb , member_opnd (REG1 , rb_control_frame_t , __bp__ ), REG0 );
2340- sub (cb , REG0 , imm_opnd (sizeof (VALUE )));
2341- mov (cb , member_opnd (REG1 , rb_control_frame_t , ep ), REG0 );
2342- mov (cb , REG0 , recv );
2343- mov (cb , member_opnd (REG1 , rb_control_frame_t , self ), REG0 );
2294+ // Write block handler at sp[-2]
2295+ // sp[-2] = block_handler;
2296+ if (block ) {
2297+ // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
2298+ lea (cb , REG1 , member_opnd (REG_CFP , rb_control_frame_t , self ));
2299+ or (cb , REG1 , imm_opnd (1 ));
2300+ mov (cb , mem_opnd (64 , REG0 , 8 * -2 ), REG1 );
23442301 }
2302+ else {
2303+ mov (cb , mem_opnd (64 , REG0 , 8 * -2 ), imm_opnd (VM_BLOCK_HANDLER_NONE ));
2304+ }
2305+
2306+ // Write env flags at sp[-1]
2307+ // sp[-1] = frame_type;
2308+ uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL ;
2309+ mov (cb , mem_opnd (64 , REG0 , 8 * -1 ), imm_opnd (frame_type ));
2310+
2311+ // Allocate a new CFP (ec->cfp--)
2312+ sub (
2313+ cb ,
2314+ member_opnd (REG_EC , rb_execution_context_t , cfp ),
2315+ imm_opnd (sizeof (rb_control_frame_t ))
2316+ );
2317+
2318+ // Setup the new frame
2319+ // *cfp = (const struct rb_control_frame_struct) {
2320+ // .pc = 0,
2321+ // .sp = sp,
2322+ // .iseq = 0,
2323+ // .self = recv,
2324+ // .ep = sp - 1,
2325+ // .block_code = 0,
2326+ // .__bp__ = sp,
2327+ // };
2328+ mov (cb , REG1 , member_opnd (REG_EC , rb_execution_context_t , cfp ));
2329+ mov (cb , member_opnd (REG1 , rb_control_frame_t , pc ), imm_opnd (0 ));
2330+ mov (cb , member_opnd (REG1 , rb_control_frame_t , sp ), REG0 );
2331+ mov (cb , member_opnd (REG1 , rb_control_frame_t , iseq ), imm_opnd (0 ));
2332+ mov (cb , member_opnd (REG1 , rb_control_frame_t , block_code ), imm_opnd (0 ));
2333+ mov (cb , member_opnd (REG1 , rb_control_frame_t , __bp__ ), REG0 );
2334+ sub (cb , REG0 , imm_opnd (sizeof (VALUE )));
2335+ mov (cb , member_opnd (REG1 , rb_control_frame_t , ep ), REG0 );
2336+ mov (cb , REG0 , recv );
2337+ mov (cb , member_opnd (REG1 , rb_control_frame_t , self ), REG0 );
23452338
23462339 // Verify that we are calling the right function
23472340 if (YJIT_CHECK_MODE > 0 ) {
@@ -2407,15 +2400,12 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
24072400 x86opnd_t stack_ret = ctx_stack_push (ctx , TYPE_UNKNOWN );
24082401 mov (cb , stack_ret , RAX );
24092402
2410- // If this function needs a Ruby stack frame
2411- if (push_frame ) {
2412- // Pop the stack frame (ec->cfp++)
2413- add (
2414- cb ,
2415- member_opnd (REG_EC , rb_execution_context_t , cfp ),
2416- imm_opnd (sizeof (rb_control_frame_t ))
2417- );
2418- }
2403+ // Pop the stack frame (ec->cfp++)
2404+ add (
2405+ cb ,
2406+ member_opnd (REG_EC , rb_execution_context_t , cfp ),
2407+ imm_opnd (sizeof (rb_control_frame_t ))
2408+ );
24192409
24202410 // Note: gen_oswb_iseq() jumps to the next instruction with ctx->sp_offset == 0
24212411 // after the call, while this does not. This difference prevents
@@ -2463,6 +2453,30 @@ iseq_lead_only_arg_setup_p(const rb_iseq_t *iseq)
24632453bool rb_iseq_only_optparam_p (const rb_iseq_t * iseq );
24642454bool rb_iseq_only_kwparam_p (const rb_iseq_t * iseq );
24652455
2456+ // If true, the iseq is leaf and it can be replaced by a single C call.
2457+ static bool
2458+ rb_leaf_invokebuiltin_iseq_p (const rb_iseq_t * iseq )
2459+ {
2460+ unsigned int invokebuiltin_len = insn_len (BIN (opt_invokebuiltin_delegate_leave ));
2461+ unsigned int leave_len = insn_len (BIN (leave ));
2462+
2463+ return iseq -> body -> iseq_size == (
2464+ (invokebuiltin_len + leave_len ) &&
2465+ rb_vm_insn_addr2opcode ((void * )iseq -> body -> iseq_encoded [0 ]) == BIN (opt_invokebuiltin_delegate_leave ) &&
2466+ rb_vm_insn_addr2opcode ((void * )iseq -> body -> iseq_encoded [invokebuiltin_len ]) == BIN (leave ) &&
2467+ iseq -> body -> builtin_inline_p
2468+ );
2469+ }
2470+
2471+ // Return an rb_builtin_function if the iseq contains only that leaf builtin function.
2472+ static const struct rb_builtin_function *
2473+ rb_leaf_builtin_function (const rb_iseq_t * iseq )
2474+ {
2475+ if (!rb_leaf_invokebuiltin_iseq_p (iseq ))
2476+ return NULL ;
2477+ return (const struct rb_builtin_function * )iseq -> body -> iseq_encoded [1 ];
2478+ }
2479+
24662480static codegen_status_t
24672481gen_send_iseq (jitstate_t * jit , ctx_t * ctx , const struct rb_callinfo * ci , const rb_callable_method_entry_t * cme , rb_iseq_t * block , const int32_t argc )
24682482{
@@ -2529,6 +2543,39 @@ gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const r
25292543 // Check for interrupts
25302544 yjit_check_ints (cb , side_exit );
25312545
2546+ const struct rb_builtin_function * leaf_builtin = rb_leaf_builtin_function (iseq );
2547+
2548+ if (leaf_builtin && !block && leaf_builtin -> argc + 1 <= NUM_C_ARG_REGS ) {
2549+ // TODO: figure out if this is necessary
2550+ // If the calls don't allocate, do they need up to date PC, SP?
2551+ // Save YJIT registers
2552+ yjit_save_regs (cb );
2553+
2554+ // Get a pointer to the top of the stack
2555+ lea (cb , REG0 , ctx_stack_opnd (ctx , 0 ));
2556+
2557+ // Call the builtin func (ec, recv, arg1, arg2, ...)
2558+ mov (cb , C_ARG_REGS [0 ], REG_EC );
2559+
2560+ // Copy self and arguments
2561+ for (int32_t i = 0 ; i < leaf_builtin -> argc + 1 ; i ++ ) {
2562+ x86opnd_t stack_opnd = mem_opnd (64 , REG0 , - (leaf_builtin -> argc - i ) * SIZEOF_VALUE );
2563+ x86opnd_t c_arg_reg = C_ARG_REGS [i + 1 ];
2564+ mov (cb , c_arg_reg , stack_opnd );
2565+ }
2566+ ctx_stack_pop (ctx , leaf_builtin -> argc + 1 );
2567+ call_ptr (cb , REG0 , (void * )leaf_builtin -> func_ptr );
2568+
2569+ // Load YJIT registers
2570+ yjit_load_regs (cb );
2571+
2572+ // Push the return value
2573+ x86opnd_t stack_ret = ctx_stack_push (ctx , TYPE_UNKNOWN );
2574+ mov (cb , stack_ret , RAX );
2575+
2576+ return YJIT_KEEP_COMPILING ;
2577+ }
2578+
25322579 // Stack overflow check
25332580 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
25342581 ADD_COMMENT (cb , "stack overflow check" );
0 commit comments