@@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
109
109
/* Check if there's no conflicting instruction between curins and ref.
110
110
** Also avoid fusing loads if there are multiple references.
111
111
*/
112
- static int noconflict (ASMState * as , IRRef ref , IROp conflict , int noload )
112
+ static int noconflict (ASMState * as , IRRef ref , IROp conflict , int check )
113
113
{
114
114
IRIns * ir = as -> ir ;
115
115
IRRef i = as -> curins ;
@@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
118
118
while (-- i > ref ) {
119
119
if (ir [i ].o == conflict )
120
120
return 0 ; /* Conflict found. */
121
- else if (!noload && (ir [i ].op1 == ref || ir [i ].op2 == ref ))
121
+ else if ((check & 1 ) && (ir [i ].o == IR_NEWREF || ir [i ].o == IR_CALLS ))
122
+ return 0 ;
123
+ else if ((check & 2 ) && (ir [i ].op1 == ref || ir [i ].op2 == ref ))
122
124
return 0 ;
123
125
}
124
126
return 1 ; /* Ok, no conflict. */
@@ -134,7 +136,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
134
136
lj_assertA (irb -> op2 == IRFL_TAB_ARRAY , "expected FLOAD TAB_ARRAY" );
135
137
/* We can avoid the FLOAD of t->array for colocated arrays. */
136
138
if (ira -> o == IR_TNEW && ira -> op1 <= LJ_MAX_COLOSIZE &&
137
- !neverfuse (as ) && noconflict (as , irb -> op1 , IR_NEWREF , 1 )) {
139
+ !neverfuse (as ) && noconflict (as , irb -> op1 , IR_NEWREF , 0 )) {
138
140
as -> mrm .ofs = (int32_t )sizeof (GCtab ); /* Ofs to colocated array. */
139
141
return irb -> op1 ; /* Table obj. */
140
142
}
@@ -456,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
456
458
RegSet xallow = (allow & RSET_GPR ) ? allow : RSET_GPR ;
457
459
if (ir -> o == IR_SLOAD ) {
458
460
if (!(ir -> op2 & (IRSLOAD_PARENT |IRSLOAD_CONVERT )) &&
459
- noconflict (as , ref , IR_RETF , 0 ) &&
461
+ noconflict (as , ref , IR_RETF , 2 ) &&
460
462
!(LJ_GC64 && irt_isaddr (ir -> t ))) {
461
463
as -> mrm .base = (uint8_t )ra_alloc1 (as , REF_BASE , xallow );
462
464
as -> mrm .ofs = 8 * ((int32_t )ir -> op1 - 1 - LJ_FR2 ) +
@@ -467,12 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
467
469
} else if (ir -> o == IR_FLOAD ) {
468
470
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
469
471
if ((irt_isint (ir -> t ) || irt_isu32 (ir -> t ) || irt_isaddr (ir -> t )) &&
470
- noconflict (as , ref , IR_FSTORE , 0 )) {
472
+ noconflict (as , ref , IR_FSTORE , 2 )) {
471
473
asm_fusefref (as , ir , xallow );
472
474
return RID_MRM ;
473
475
}
474
476
} else if (ir -> o == IR_ALOAD || ir -> o == IR_HLOAD || ir -> o == IR_ULOAD ) {
475
- if (noconflict (as , ref , ir -> o + IRDELTA_L2S , 0 ) &&
477
+ if (noconflict (as , ref , ir -> o + IRDELTA_L2S , 2 + ( ir -> o != IR_ULOAD ) ) &&
476
478
!(LJ_GC64 && irt_isaddr (ir -> t ))) {
477
479
asm_fuseahuref (as , ir -> op1 , xallow );
478
480
return RID_MRM ;
@@ -482,7 +484,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
482
484
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
483
485
*/
484
486
if ((!irt_typerange (ir -> t , IRT_I8 , IRT_U16 )) &&
485
- noconflict (as , ref , IR_XSTORE , 0 )) {
487
+ noconflict (as , ref , IR_XSTORE , 2 )) {
486
488
asm_fusexref (as , ir -> op1 , xallow );
487
489
return RID_MRM ;
488
490
}
@@ -815,6 +817,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
815
817
emit_rr (as , XO_UCOMISD , left , tmp );
816
818
emit_rr (as , XO_CVTSI2SD , tmp , dest );
817
819
emit_rr (as , XO_XORPS , tmp , tmp ); /* Avoid partial register stall. */
820
+ checkmclim (as );
818
821
emit_rr (as , XO_CVTTSD2SI , dest , left );
819
822
/* Can't fuse since left is needed twice. */
820
823
}
@@ -857,6 +860,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
857
860
emit_rr (as , XO_SUBSD , dest , bias ); /* Subtract 2^52+2^51 bias. */
858
861
emit_rr (as , XO_XORPS , dest , bias ); /* Merge bias and integer. */
859
862
emit_rma (as , XO_MOVSD , bias , k );
863
+ checkmclim (as );
860
864
emit_mrm (as , XO_MOVD , dest , asm_fuseload (as , lref , RSET_GPR ));
861
865
return ;
862
866
} else { /* Integer to FP conversion. */
@@ -1173,6 +1177,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1173
1177
asm_guardcc (as , CC_E );
1174
1178
else
1175
1179
emit_sjcc (as , CC_E , l_end );
1180
+ checkmclim (as );
1176
1181
if (irt_isnum (kt )) {
1177
1182
if (isk ) {
1178
1183
/* Assumes -0.0 is already canonicalized to +0.0. */
@@ -1232,7 +1237,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1232
1237
#endif
1233
1238
}
1234
1239
emit_sfixup (as , l_loop );
1235
- checkmclim (as );
1236
1240
#if LJ_GC64
1237
1241
if (!isk && irt_isaddr (kt )) {
1238
1242
emit_rr (as , XO_OR , tmp |REX_64 , key );
@@ -1259,6 +1263,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1259
1263
emit_rr (as , XO_ARITH (XOg_SUB ), dest , tmp );
1260
1264
emit_shifti (as , XOg_ROL , tmp , HASH_ROT3 );
1261
1265
emit_rr (as , XO_ARITH (XOg_XOR ), dest , tmp );
1266
+ checkmclim (as );
1262
1267
emit_shifti (as , XOg_ROL , dest , HASH_ROT2 );
1263
1268
emit_rr (as , XO_ARITH (XOg_SUB ), tmp , dest );
1264
1269
emit_shifti (as , XOg_ROL , dest , HASH_ROT1 );
@@ -1276,7 +1281,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1276
1281
} else {
1277
1282
emit_rr (as , XO_MOV , tmp , key );
1278
1283
#if LJ_GC64
1279
- checkmclim (as );
1280
1284
emit_gri (as , XG_ARITHi (XOg_XOR ), dest , irt_toitype (kt ) << 15 );
1281
1285
if ((as -> flags & JIT_F_BMI2 )) {
1282
1286
emit_i8 (as , 32 );
@@ -1373,24 +1377,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1373
1377
static void asm_uref (ASMState * as , IRIns * ir )
1374
1378
{
1375
1379
Reg dest = ra_dest (as , ir , RSET_GPR );
1376
- if (irref_isk (ir -> op1 )) {
1380
+ int guarded = (irt_t (ir -> t ) & (IRT_GUARD |IRT_TYPE )) == (IRT_GUARD |IRT_PGC );
1381
+ if (irref_isk (ir -> op1 ) && !guarded ) {
1377
1382
GCfunc * fn = ir_kfunc (IR (ir -> op1 ));
1378
1383
MRef * v = & gcref (fn -> l .uvptr [(ir -> op2 >> 8 )])-> uv .v ;
1379
1384
emit_rma (as , XO_MOV , dest |REX_GC64 , v );
1380
1385
} else {
1381
1386
Reg uv = ra_scratch (as , RSET_GPR );
1382
- Reg func = ra_alloc1 (as , ir -> op1 , RSET_GPR );
1383
- if (ir -> o == IR_UREFC ) {
1387
+ if (ir -> o == IR_UREFC )
1384
1388
emit_rmro (as , XO_LEA , dest |REX_GC64 , uv , offsetof(GCupval , tv ));
1385
- asm_guardcc (as , CC_NE );
1386
- emit_i8 (as , 1 );
1389
+ else
1390
+ emit_rmro (as , XO_MOV , dest |REX_GC64 , uv , offsetof(GCupval , v ));
1391
+ if (guarded ) {
1392
+ asm_guardcc (as , ir -> o == IR_UREFC ? CC_E : CC_NE );
1393
+ emit_i8 (as , 0 );
1387
1394
emit_rmro (as , XO_ARITHib , XOg_CMP , uv , offsetof(GCupval , closed ));
1395
+ }
1396
+ if (irref_isk (ir -> op1 )) {
1397
+ GCfunc * fn = ir_kfunc (IR (ir -> op1 ));
1398
+ GCobj * o = gcref (fn -> l .uvptr [(ir -> op2 >> 8 )]);
1399
+ emit_loada (as , uv , o );
1388
1400
} else {
1389
- emit_rmro (as , XO_MOV , dest |REX_GC64 , uv , offsetof(GCupval , v ));
1401
+ emit_rmro (as , XO_MOV , uv |REX_GC64 , ra_alloc1 (as , ir -> op1 , RSET_GPR ),
1402
+ (int32_t )offsetof(GCfuncL , uvptr ) +
1403
+ (int32_t )sizeof (MRef ) * (int32_t )(ir -> op2 >> 8 ));
1390
1404
}
1391
- emit_rmro (as , XO_MOV , uv |REX_GC64 , func ,
1392
- (int32_t )offsetof(GCfuncL , uvptr ) +
1393
- (int32_t )sizeof (MRef ) * (int32_t )(ir -> op2 >> 8 ));
1394
1405
}
1395
1406
}
1396
1407
@@ -1547,6 +1558,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1547
1558
if (irt_islightud (ir -> t )) {
1548
1559
Reg dest = asm_load_lightud64 (as , ir , 1 );
1549
1560
if (ra_hasreg (dest )) {
1561
+ checkmclim (as );
1550
1562
asm_fuseahuref (as , ir -> op1 , RSET_GPR );
1551
1563
if (ir -> o == IR_VLOAD ) as -> mrm .ofs += 8 * ir -> op2 ;
1552
1564
emit_mrm (as , XO_MOV , dest |REX_64 , RID_MRM );
@@ -1594,6 +1606,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1594
1606
if (LJ_64 && irt_type (ir -> t ) >= IRT_NUM ) {
1595
1607
lj_assertA (irt_isinteger (ir -> t ) || irt_isnum (ir -> t ),
1596
1608
"bad load type %d" , irt_type (ir -> t ));
1609
+ checkmclim (as );
1597
1610
#if LJ_GC64
1598
1611
emit_u32 (as , LJ_TISNUM << 15 );
1599
1612
#else
0 commit comments