-
Notifications
You must be signed in to change notification settings - Fork 758
/
Copy pathConstantFieldPropagation.cpp
595 lines (522 loc) · 22.8 KB
/
ConstantFieldPropagation.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
/*
* Copyright 2021 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Find struct fields that are always written to with a constant value, and
// replace gets of them with that value.
//
// For example, if we have a vtable of type T, and we always create it with one
// of the fields containing a ref.func of the same function F, and there is no
// write to that field of a different value (even using a subtype of T), then
// anywhere we see a get of that field we can place a ref.func of F.
//
// A variation of this pass also uses ref.test to optimize. This is riskier, as
// adding a ref.test means we are adding a non-trivial amount of work, and
// whether it helps overall depends on subsequent optimizations, so we do not do
// it by default. In this variation, if we inferred a field has exactly two
// possible values, and we can differentiate between them using a ref.test, then
// we do
//
// (struct.get $T x (..ref..))
// =>
// (select
// (..constant1..)
// (..constant2..)
// (ref.test $U (..ref..))
// )
//
// This is valid if, of all the subtypes of $T, those that pass the test have
// constant1 in that field, and those that fail the test have constant2. For
// example, a simple case is where $T has two subtypes, $T is never created
// itself, and each of the two subtypes has a different constant value. (Note
// that we do similar things in e.g. GlobalStructInference, where we turn a
// struct.get into a select, but the risk there is much lower since the
// condition for the select is something like a ref.eq - very cheap - while here
// we emit a ref.test which in general is as expensive as a cast.)
//
// FIXME: This pass assumes a closed world. When we start to allow multi-module
// wasm GC programs we need to check for type escaping.
//
#include "ir/bits.h"
#include "ir/gc-type-utils.h"
#include "ir/module-utils.h"
#include "ir/possible-constant.h"
#include "ir/struct-utils.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/small_vector.h"
#include "wasm-builder.h"
#include "wasm-traversal.h"
#include "wasm.h"
namespace wasm {
namespace {
using PCVStructValuesMap = StructUtils::StructValuesMap<PossibleConstantValues>;
using PCVFunctionStructValuesMap =
StructUtils::FunctionStructValuesMap<PossibleConstantValues>;
// A wrapper for a boolean value that provides a combine() method as is used in
// the StructUtils propagation logic.
struct Bool {
bool value = false;
Bool() {}
Bool(bool value) : value(value) {}
operator bool() const { return value; }
bool combine(bool other) { return value = value || other; }
};
using BoolStructValuesMap = StructUtils::StructValuesMap<Bool>;
using BoolFunctionStructValuesMap = StructUtils::FunctionStructValuesMap<Bool>;
// Optimize struct gets based on what we've learned about writes.
//
// TODO Aside from writes, we could use information like whether any struct of
// this type has even been created (to handle the case of struct.sets but
// no struct.news).
struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
bool isFunctionParallel() override { return true; }
// Only modifies struct.get operations.
bool requiresNonNullableLocalFixups() override { return false; }
// We receive the propagated infos, that is, info about field types in a form
// that takes into account subtypes for quick computation, and also the raw
// subtyping and new infos (information about struct.news).
std::unique_ptr<Pass> create() override {
return std::make_unique<FunctionOptimizer>(
propagatedInfos, subTypes, rawNewInfos, refTest);
}
FunctionOptimizer(const PCVStructValuesMap& propagatedInfos,
const SubTypes& subTypes,
const PCVStructValuesMap& rawNewInfos,
bool refTest)
: propagatedInfos(propagatedInfos), subTypes(subTypes),
rawNewInfos(rawNewInfos), refTest(refTest) {}
template<typename T> std::optional<HeapType> getRelevantHeapType(T* curr) {
auto type = curr->ref->type;
if (type == Type::unreachable) {
return std::nullopt;
}
auto heapType = type.getHeapType();
if (!heapType.isStruct()) {
return std::nullopt;
}
return heapType;
}
PossibleConstantValues getInfo(HeapType type, Index index) {
if (auto it = propagatedInfos.find(type); it != propagatedInfos.end()) {
// There is information on this type, fetch it.
return it->second[index];
}
return PossibleConstantValues{};
}
// Given information about a constant value, and the struct type and
// StructGet/RMW/Cmpxchg that reads it, create an expression for that value.
template<typename T>
Expression*
makeExpression(const PossibleConstantValues& info, HeapType type, T* curr) {
auto* value = info.makeExpression(*getModule());
auto field = GCTypeUtils::getField(type, curr->index);
assert(field);
// Apply packing, if needed.
if constexpr (std::is_same_v<T, StructGet>) {
value =
Bits::makePackedFieldGet(value, *field, curr->signed_, *getModule());
}
// Check if the value makes sense. The analysis below flows values around
// without considering where they are placed, that is, when we see a parent
// type can contain a value in a field then we assume a child may as well
// (which in general it can, e.g., using a reference to the parent, we can
// write that value to it, but the reference might actually point to a
// child instance). If we tracked the types of fields then we might avoid
// flowing values into places they cannot reside, like when a child field is
// a subtype, and so we could ignore things not refined enough for it (GUFA
// does a better job at this). For here, just check we do not break
// validation, and if we do, then we've inferred the only possible value is
// an impossible one, making the code unreachable.
if (!Type::isSubType(value->type, field->type)) {
Builder builder(*getModule());
value = builder.makeSequence(builder.makeDrop(value),
builder.makeUnreachable());
}
return value;
}
void visitStructGet(StructGet* curr) {
auto type = getRelevantHeapType(curr);
if (!type) {
return;
}
auto heapType = *type;
Builder builder(*getModule());
// Find the info for this field, and see if we can optimize. First, see if
// there is any information for this heap type at all. If there isn't, it is
// as if nothing was ever noted for that field.
PossibleConstantValues info = getInfo(heapType, curr->index);
if (!info.hasNoted()) {
// This field is never written at all. That means that we do not even
// construct any data of this type, and so it is a logic error to reach
// this location in the code. (Unless we are in an open-world situation,
// which we assume we are not in.) Replace this get with a trap. Note that
// we do not need to care about the nullability of the reference, as if it
// should have trapped, we are replacing it with another trap, which we
// allow to reorder (but we do need to care about side effects in the
// reference, so keep it around). We also do not need to care about
// synchronization since trapping accesses do not synchronize with other
// accesses.
replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
builder.makeUnreachable()));
changed = true;
return;
}
if (curr->order != MemoryOrder::Unordered) {
// The analysis we're basing the optimization on is not precise enough to
// rule out the field being used to synchronize between a write of the
// constant value and a subsequent read on another thread. This
// synchronization is possible even when the write does not change the
// value of the field. For now, simply avoid optimizing this case.
// TODO: Track release and acquire operations in the analysis.
return;
}
// If the value is not a constant, then it is unknown and we must give up
// on simply applying a constant. However, we can try to use a ref.test, if
// that is allowed.
if (!info.isConstant()) {
if (refTest) {
optimizeUsingRefTest(curr);
}
return;
}
// We can do this! Replace the get with a trap on a null reference using a
// ref.as_non_null (we need to trap as the get would have done so), plus the
// constant value. (Leave it to further optimizations to get rid of the
// ref.)
auto* value = makeExpression(info, heapType, curr);
auto* replacement = builder.blockify(
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)));
replacement->list.push_back(value);
replacement->type = value->type;
replaceCurrent(replacement);
changed = true;
}
void optimizeUsingRefTest(StructGet* curr) {
auto refType = curr->ref->type;
auto refHeapType = refType.getHeapType();
// We only handle immutable fields in this function, as we will be looking
// at |rawNewInfos|. That is, we are trying to see when a type and its
// subtypes have different values (so that we can differentiate between them
// using a ref.test), and those differences are lost in |propagatedInfos|,
// which has propagated to relevant types so that we can do a single check
// to see what value could be there. So we need to use something more
// precise, |rawNewInfos|, which tracks the values written to struct.news,
// where we know the type exactly (unlike with a struct.set). But for that
// reason the field must be immutable, so that it is valid to only look at
// the struct.news. (A more complex flow analysis could do better here, but
// would be far beyond the scope of this pass.)
if (GCTypeUtils::getField(refType, curr->index)->mutable_ == Mutable) {
return;
}
// We seek two possible constant values. For each we track the constant and
// the types that have that constant. For example, if we have types A, B, C
// and A and B have 42 in their field, and C has 1337, then we'd have this:
//
// values = [ { 42, [A, B] }, { 1337, [C] } ];
struct Value {
PossibleConstantValues constant;
// Use a SmallVector as we'll only have 2 Values, and so the stack usage
// here is fixed.
SmallVector<HeapType, 10> types;
// Whether this slot is used. If so, |constant| has a value, and |types|
// is not empty.
bool used() const {
if (constant.hasNoted()) {
assert(!types.empty());
return true;
}
assert(types.empty());
return false;
}
} values[2];
// Handle one of the subtypes of the relevant type. We check what value it
// has for the field, and update |values|. If we hit a problem, we mark us
// as having failed.
auto fail = false;
auto handleType = [&](HeapType type, Index depth) {
if (fail) {
// TODO: Add a mechanism to halt |iterSubTypes| in the middle, as once
// we fail there is no point to further iterating.
return;
}
auto iter = rawNewInfos.find(type);
if (iter == rawNewInfos.end()) {
// This type has no struct.news, so we can ignore it: it is abstract.
return;
}
auto value = iter->second[curr->index];
if (!value.isConstant()) {
// The value here is not constant, so give up entirely.
fail = true;
return;
}
// Consider the constant value compared to previous ones.
for (Index i = 0; i < 2; i++) {
if (!values[i].used()) {
// There is nothing in this slot: place this value there.
values[i].constant = value;
values[i].types.push_back(type);
break;
}
// There is something in this slot. If we have the same value, append.
if (values[i].constant == value) {
values[i].types.push_back(type);
break;
}
// Otherwise, this value is different than values[i], which is fine:
// we can add it as the second value in the next loop iteration - at
// least, we can do that if there is another iteration: If it's already
// the last, we've failed to find only two values.
if (i == 1) {
fail = true;
return;
}
}
};
subTypes.iterSubTypes(refHeapType, handleType);
if (fail) {
return;
}
// We either filled slot 0, or we did not, and if we did not then cannot
// have filled slot 1 after it.
assert(values[0].used() || !values[1].used());
if (!values[1].used()) {
// We did not see two constant values (we might have seen just one, or
// even no constant values at all).
return;
}
// We have exactly two values to pick between. We can pick between those
// values using a single ref.test if the two sets of types are actually
// disjoint. In general we could compute the LUB of each set and see if it
// overlaps with the other, but for efficiency we only want to do this
// optimization if the type we test on is closed/final, since ref.test on a
// final type can be fairly fast (perhaps constant time). We therefore look
// if one of the sets of types contains a single type and it is final, and
// if so then we'll test on it. (However, see a few lines below on how we
// test for finality.)
// TODO: Consider adding a variation on this pass that uses non-final types.
auto isProperTestType = [&](const Value& value) -> std::optional<HeapType> {
auto& types = value.types;
if (types.size() != 1) {
// Too many types.
return {};
}
auto type = types[0];
// Do not test finality using isOpen(), as that may only be applied late
// in the optimization pipeline. We are in closed-world here, so just
// see if there are subtypes in practice (if not, this can be marked as
// final later, and we assume optimistically that it will).
if (!subTypes.getImmediateSubTypes(type).empty()) {
// There are subtypes.
return {};
}
// Success, we can test on this.
return type;
};
// Look for the index in |values| to test on.
Index testIndex;
if (auto test = isProperTestType(values[0])) {
testIndex = 0;
} else if (auto test = isProperTestType(values[1])) {
testIndex = 1;
} else {
// We failed to find a simple way to separate the types.
return;
}
// Success! We can replace the struct.get with a select over the two values
// (and a trap on null) with the proper ref.test.
Builder builder(*getModule());
auto& testIndexTypes = values[testIndex].types;
assert(testIndexTypes.size() == 1);
auto testType = testIndexTypes[0];
auto* nnRef = builder.makeRefAs(RefAsNonNull, curr->ref);
replaceCurrent(builder.makeSelect(
builder.makeRefTest(nnRef, Type(testType, NonNullable)),
makeExpression(values[testIndex].constant, refHeapType, curr),
makeExpression(values[1 - testIndex].constant, refHeapType, curr)));
changed = true;
}
void doWalkFunction(Function* func) {
WalkerPass<PostWalker<FunctionOptimizer>>::doWalkFunction(func);
// If we changed anything, we need to update parent types as types may have
// changed.
if (changed) {
ReFinalize().walkFunctionInModule(func, getModule());
}
}
private:
const PCVStructValuesMap& propagatedInfos;
const SubTypes& subTypes;
const PCVStructValuesMap& rawNewInfos;
const bool refTest;
bool changed = false;
};
struct PCVScanner
: public StructUtils::StructScanner<PossibleConstantValues, PCVScanner> {
std::unique_ptr<Pass> create() override {
return std::make_unique<PCVScanner>(
functionNewInfos, functionSetGetInfos, functionCopyInfos);
}
PCVScanner(PCVFunctionStructValuesMap& functionNewInfos,
PCVFunctionStructValuesMap& functionSetInfos,
BoolFunctionStructValuesMap& functionCopyInfos)
: StructUtils::StructScanner<PossibleConstantValues, PCVScanner>(
functionNewInfos, functionSetInfos),
functionCopyInfos(functionCopyInfos) {}
void noteExpression(Expression* expr,
HeapType type,
Index index,
PossibleConstantValues& info) {
info.note(expr, *getModule());
}
void noteDefault(Type fieldType,
HeapType type,
Index index,
PossibleConstantValues& info) {
info.note(Literal::makeZero(fieldType));
}
void noteCopy(HeapType type, Index index, PossibleConstantValues& info) {
// Note copies, as they must be considered later. See the comment on the
// propagation of values below.
functionCopyInfos[getFunction()][type][index] = true;
}
void noteRead(HeapType type, Index index, PossibleConstantValues& info) {
// Reads do not interest us.
}
void noteRMW(Expression* expr,
HeapType type,
Index index,
PossibleConstantValues& info) {
// In general RMWs will modify the value of the field, so there is no single
// constant value. We could in principle try to recognize no-op RMWs like
// adds of 0, but we leave that for OptimizeInstructions for simplicity.
info.noteUnknown();
}
BoolFunctionStructValuesMap& functionCopyInfos;
};
struct ConstantFieldPropagation : public Pass {
// Only modifies struct.get operations.
bool requiresNonNullableLocalFixups() override { return false; }
// Whether we are optimizing using ref.test, see above.
const bool refTest;
ConstantFieldPropagation(bool refTest) : refTest(refTest) {}
void run(Module* module) override {
if (!module->features.hasGC()) {
return;
}
// Find and analyze all writes inside each function.
PCVFunctionStructValuesMap functionNewInfos(*module),
functionSetInfos(*module);
BoolFunctionStructValuesMap functionCopyInfos(*module);
PCVScanner scanner(functionNewInfos, functionSetInfos, functionCopyInfos);
auto* runner = getPassRunner();
scanner.run(runner, module);
scanner.runOnModuleCode(runner, module);
// Combine the data from the functions.
PCVStructValuesMap combinedNewInfos, combinedSetInfos;
functionNewInfos.combineInto(combinedNewInfos);
functionSetInfos.combineInto(combinedSetInfos);
BoolStructValuesMap combinedCopyInfos;
functionCopyInfos.combineInto(combinedCopyInfos);
// Prepare data we will need later.
SubTypes subTypes(*module);
PCVStructValuesMap rawNewInfos;
if (refTest) {
// The refTest optimizations require the raw new infos (see above), but we
// can skip copying here if we'll never read this.
rawNewInfos = combinedNewInfos;
}
// Handle subtyping. |combinedInfo| so far contains data that represents
// each struct.new and struct.set's operation on the struct type used in
// that instruction. That is, if we do a struct.set to type T, the value was
// noted for type T. But our actual goal is to answer questions about
// struct.gets. Specifically, when later we see:
//
// (struct.get $A x (REF-1))
//
// Then we want to be aware of all the relevant struct.sets, that is, the
// sets that can write data that this get reads. Given a set
//
// (struct.set $B x (REF-2) (..value..))
//
// then
//
// 1. If $B is a subtype of $A, it is relevant: the get might read from a
// struct of type $B (i.e., REF-1 and REF-2 might be identical, and both
// be a struct of type $B).
// 2. If $B is a supertype of $A that still has the field x then it may
// also be relevant: since $A is a subtype of $B, the set may write to a
// struct of type $A (and again, REF-1 and REF-2 may be identical).
//
// Thus, if either $A <: $B or $B <: $A then we must consider the get and
// set to be relevant to each other. To make our later lookups for gets
// efficient, we therefore propagate information about the possible values
// in each field to both subtypes and supertypes.
//
// struct.new on the other hand knows exactly what type is being written to,
// and so given a get of $A and a new of $B, the new is relevant for the get
// iff $A is a subtype of $B, so we only need to propagate in one direction
// there, to supertypes.
//
// An exception to the above are copies. If a field is copied then even
// struct.new information cannot be assumed to be precise:
//
// // A :> B :> C
// ..
// new B(20);
// ..
// A1->f0 = A2->f0; // Either of these might refer to an A, B, or C.
// ..
// foo(A->f0); // These can contain 20,
// foo(C->f0); // if the copy read from B.
//
// To handle that, copied fields are treated like struct.set ones (by
// copying the struct.new data to struct.set). Note that we must propagate
// copying to subtypes first, as in the example above the struct.new values
// of subtypes must be taken into account (that is, A or a subtype is being
// copied, so we want to do the same thing for B and C as well as A, since
// a copy of A means it could be a copy of B or C).
StructUtils::TypeHierarchyPropagator<Bool> boolPropagator(subTypes);
boolPropagator.propagateToSubTypes(combinedCopyInfos);
for (auto& [type, copied] : combinedCopyInfos) {
for (Index i = 0; i < copied.size(); i++) {
if (copied[i]) {
combinedSetInfos[type][i].combine(combinedNewInfos[type][i]);
}
}
}
StructUtils::TypeHierarchyPropagator<PossibleConstantValues> propagator(
subTypes);
propagator.propagateToSuperTypes(combinedNewInfos);
propagator.propagateToSuperAndSubTypes(combinedSetInfos);
// Combine both sources of information to the final information that gets
// care about.
PCVStructValuesMap combinedInfos = std::move(combinedNewInfos);
combinedSetInfos.combineInto(combinedInfos);
// Optimize.
// TODO: Skip this if we cannot optimize anything
FunctionOptimizer(combinedInfos, subTypes, rawNewInfos, refTest)
.run(runner, module);
}
};
} // anonymous namespace
Pass* createConstantFieldPropagationPass() {
return new ConstantFieldPropagation(false);
}
Pass* createConstantFieldPropagationRefTestPass() {
return new ConstantFieldPropagation(true);
}
} // namespace wasm