src/passes/ConstantFieldPropagation.cpp

/*
 * Copyright 2021 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
// Find struct fields that are always written to with a constant value, and
// replace gets of them with that value.
//
// For example, if we have a vtable of type T, and we always create it with one
// of the fields containing a ref.func of the same function F, and there is no
// write to that field of a different value (even using a subtype of T), then
// anywhere we see a get of that field we can place a ref.func of F.
//
// A variation of this pass also uses ref.test to optimize. This is riskier, as
// adding a ref.test means we are adding a non-trivial amount of work, and
// whether it helps overall depends on subsequent optimizations, so we do not do
// it by default. In this variation, if we inferred a field has exactly two
// possible values, and we can differentiate between them using a ref.test, then
// we do
//
//   (struct.get $T x (..ref..))
//     =>
//   (select
//     (..constant1..)
//     (..constant2..)
//     (ref.test $U (..ref..))
//   )
//
// This is valid if, of all the subtypes of $T, those that pass the test have
// constant1 in that field, and those that fail the test have constant2. For
// example, a simple case is where $T has two subtypes, $T is never created
// itself, and each of the two subtypes has a different constant value. (Note
// that we do similar things in e.g. GlobalStructInference, where we turn a
// struct.get into a select, but the risk there is much lower since the
// condition for the select is something like a ref.eq - very cheap - while here
// we emit a ref.test which in general is as expensive as a cast.)
//
// FIXME: This pass assumes a closed world. When we start to allow multi-module
//        wasm GC programs we need to check for type escaping.
//

#include "ir/bits.h"
#include "ir/gc-type-utils.h"
#include "ir/module-utils.h"
#include "ir/possible-constant.h"
#include "ir/struct-utils.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/small_vector.h"
#include "wasm-builder.h"
#include "wasm-traversal.h"
#include "wasm.h"

namespace wasm {

namespace {

using PCVStructValuesMap = StructUtils::StructValuesMap<PossibleConstantValues>;
using PCVFunctionStructValuesMap =
  StructUtils::FunctionStructValuesMap<PossibleConstantValues>;

// A wrapper for a boolean value that provides a combine() method as is used in
// the StructUtils propagation logic.
struct Bool {
  bool value = false;

  Bool() {}
  Bool(bool value) : value(value) {}

  operator bool() const { return value; }

  bool combine(bool other) { return value = value || other; }
};

using BoolStructValuesMap = StructUtils::StructValuesMap<Bool>;
using BoolFunctionStructValuesMap = StructUtils::FunctionStructValuesMap<Bool>;

// Optimize struct gets based on what we've learned about writes.
//
// TODO Aside from writes, we could use information like whether any struct of
//      this type has even been created (to handle the case of struct.sets but
//      no struct.news).
struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
  bool isFunctionParallel() override { return true; }

  // Only modifies struct.get operations.
  bool requiresNonNullableLocalFixups() override { return false; }

  // We receive the propagated infos, that is, info about field types in a form
  // that takes into account subtypes for quick computation, and also the raw
  // subtyping and new infos (information about struct.news).
  std::unique_ptr<Pass> create() override {
    return std::make_unique<FunctionOptimizer>(
      propagatedInfos, subTypes, rawNewInfos, refTest);
  }

  FunctionOptimizer(const PCVStructValuesMap& propagatedInfos,
                    const SubTypes& subTypes,
                    const PCVStructValuesMap& rawNewInfos,
                    bool refTest)
    : propagatedInfos(propagatedInfos), subTypes(subTypes),
      rawNewInfos(rawNewInfos), refTest(refTest) {}

  template<typename T> std::optional<HeapType> getRelevantHeapType(T* curr) {
    auto type = curr->ref->type;
    if (type == Type::unreachable) {
      return std::nullopt;
    }
    auto heapType = type.getHeapType();
    if (!heapType.isStruct()) {
      return std::nullopt;
    }
    return heapType;
  }

  PossibleConstantValues getInfo(HeapType type, Index index) {
    if (auto it = propagatedInfos.find(type); it != propagatedInfos.end()) {
      // There is information on this type, fetch it.
      return it->second[index];
    }
    return PossibleConstantValues{};
  }

  // Given information about a constant value, and the struct type and
  // StructGet/RMW/Cmpxchg that reads it, create an expression for that value.
  template<typename T>
  Expression*
  makeExpression(const PossibleConstantValues& info, HeapType type, T* curr) {
    auto* value = info.makeExpression(*getModule());
    auto field = GCTypeUtils::getField(type, curr->index);
    assert(field);
    // Apply packing, if needed.
    if constexpr (std::is_same_v<T, StructGet>) {
      value =
        Bits::makePackedFieldGet(value, *field, curr->signed_, *getModule());
    }
    // Check if the value makes sense. The analysis below flows values around
    // without considering where they are placed, that is, when we see a parent
    // type can contain a value in a field then we assume a child may as well
    // (which in general it can, e.g., using a reference to the parent, we can
    // write that value to it, but the reference might actually point to a
    // child instance). If we tracked the types of fields then we might avoid
    // flowing values into places they cannot reside, like when a child field is
    // a subtype, and so we could ignore things not refined enough for it (GUFA
    // does a better job at this). For here, just check we do not break
    // validation, and if we do, then we've inferred the only possible value is
    // an impossible one, making the code unreachable.
    if (!Type::isSubType(value->type, field->type)) {
      Builder builder(*getModule());
      value = builder.makeSequence(builder.makeDrop(value),
                                   builder.makeUnreachable());
    }
    return value;
  }

  void visitStructGet(StructGet* curr) {
    auto type = getRelevantHeapType(curr);
    if (!type) {
      return;
    }
    auto heapType = *type;

    Builder builder(*getModule());

    // Find the info for this field, and see if we can optimize. First, see if
    // there is any information for this heap type at all. If there isn't, it is
    // as if nothing was ever noted for that field.
    PossibleConstantValues info = getInfo(heapType, curr->index);
    if (!info.hasNoted()) {
      // This field is never written at all. That means that we do not even
      // construct any data of this type, and so it is a logic error to reach
      // this location in the code. (Unless we are in an open-world situation,
      // which we assume we are not in.) Replace this get with a trap. Note that
      // we do not need to care about the nullability of the reference, as if it
      // should have trapped, we are replacing it with another trap, which we
      // allow to reorder (but we do need to care about side effects in the
      // reference, so keep it around). We also do not need to care about
      // synchronization since trapping accesses do not synchronize with other
      // accesses.
      replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
                                          builder.makeUnreachable()));
      changed = true;
      return;
    }

    if (curr->order != MemoryOrder::Unordered) {
      // The analysis we're basing the optimization on is not precise enough to
      // rule out the field being used to synchronize between a write of the
      // constant value and a subsequent read on another thread. This
      // synchronization is possible even when the write does not change the
      // value of the field. For now, simply avoid optimizing this case.
      // TODO: Track release and acquire operations in the analysis.
      return;
    }

    // If the value is not a constant, then it is unknown and we must give up
    // on simply applying a constant. However, we can try to use a ref.test, if
    // that is allowed.
    if (!info.isConstant()) {
      if (refTest) {
        optimizeUsingRefTest(curr);
      }
      return;
    }

    // We can do this! Replace the get with a trap on a null reference using a
    // ref.as_non_null (we need to trap as the get would have done so), plus the
    // constant value. (Leave it to further optimizations to get rid of the
    // ref.)
    auto* value = makeExpression(info, heapType, curr);
    auto* replacement = builder.blockify(
      builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)));
    replacement->list.push_back(value);
    replacement->type = value->type;
    replaceCurrent(replacement);
    changed = true;
  }

  void optimizeUsingRefTest(StructGet* curr) {
    auto refType = curr->ref->type;
    auto refHeapType = refType.getHeapType();

    // We only handle immutable fields in this function, as we will be looking
    // at |rawNewInfos|. That is, we are trying to see when a type and its
    // subtypes have different values (so that we can differentiate between them
    // using a ref.test), and those differences are lost in |propagatedInfos|,
    // which has propagated to relevant types so that we can do a single check
    // to see what value could be there. So we need to use something more
    // precise, |rawNewInfos|, which tracks the values written to struct.news,
    // where we know the type exactly (unlike with a struct.set). But for that
    // reason the field must be immutable, so that it is valid to only look at
    // the struct.news. (A more complex flow analysis could do better here, but
    // would be far beyond the scope of this pass.)
    if (GCTypeUtils::getField(refType, curr->index)->mutable_ == Mutable) {
      return;
    }

    // We seek two possible constant values. For each we track the constant and
    // the types that have that constant. For example, if we have types A, B, C
    // and A and B have 42 in their field, and C has 1337, then we'd have this:
    //
    //  values = [ { 42, [A, B] }, { 1337, [C] } ];
    struct Value {
      PossibleConstantValues constant;
      // Use a SmallVector as we'll only have 2 Values, and so the stack usage
      // here is fixed.
      SmallVector<HeapType, 10> types;

      // Whether this slot is used. If so, |constant| has a value, and |types|
      // is not empty.
      bool used() const {
        if (constant.hasNoted()) {
          assert(!types.empty());
          return true;
        }
        assert(types.empty());
        return false;
      }
    } values[2];

    // Handle one of the subtypes of the relevant type. We check what value it
    // has for the field, and update |values|. If we hit a problem, we mark us
    // as having failed.
    auto fail = false;
    auto handleType = [&](HeapType type, Index depth) {
      if (fail) {
        // TODO: Add a mechanism to halt |iterSubTypes| in the middle, as once
        //       we fail there is no point to further iterating.
        return;
      }

      auto iter = rawNewInfos.find(type);
      if (iter == rawNewInfos.end()) {
        // This type has no struct.news, so we can ignore it: it is abstract.
        return;
      }

      auto value = iter->second[curr->index];
      if (!value.isConstant()) {
        // The value here is not constant, so give up entirely.
        fail = true;
        return;
      }

      // Consider the constant value compared to previous ones.
      for (Index i = 0; i < 2; i++) {
        if (!values[i].used()) {
          // There is nothing in this slot: place this value there.
          values[i].constant = value;
          values[i].types.push_back(type);
          break;
        }

        // There is something in this slot. If we have the same value, append.
        if (values[i].constant == value) {
          values[i].types.push_back(type);
          break;
        }

        // Otherwise, this value is different than values[i], which is fine:
        // we can add it as the second value in the next loop iteration - at
        // least, we can do that if there is another iteration: If it's already
        // the last, we've failed to find only two values.
        if (i == 1) {
          fail = true;
          return;
        }
      }
    };
    subTypes.iterSubTypes(refHeapType, handleType);

    if (fail) {
      return;
    }

    // We either filled slot 0, or we did not, and if we did not then cannot
    // have filled slot 1 after it.
    assert(values[0].used() || !values[1].used());

    if (!values[1].used()) {
      // We did not see two constant values (we might have seen just one, or
      // even no constant values at all).
      return;
    }

    // We have exactly two values to pick between. We can pick between those
    // values using a single ref.test if the two sets of types are actually
    // disjoint. In general we could compute the LUB of each set and see if it
    // overlaps with the other, but for efficiency we only want to do this
    // optimization if the type we test on is closed/final, since ref.test on a
    // final type can be fairly fast (perhaps constant time). We therefore look
    // if one of the sets of types contains a single type and it is final, and
    // if so then we'll test on it. (However, see a few lines below on how we
    // test for finality.)
    // TODO: Consider adding a variation on this pass that uses non-final types.
    auto isProperTestType = [&](const Value& value) -> std::optional<HeapType> {
      auto& types = value.types;
      if (types.size() != 1) {
        // Too many types.
        return {};
      }

      auto type = types[0];
      // Do not test finality using isOpen(), as that may only be applied late
      // in the optimization pipeline. We are in closed-world here, so just
      // see if there are subtypes in practice (if not, this can be marked as
      // final later, and we assume optimistically that it will).
      if (!subTypes.getImmediateSubTypes(type).empty()) {
        // There are subtypes.
        return {};
      }

      // Success, we can test on this.
      return type;
    };

    // Look for the index in |values| to test on.
    Index testIndex;
    if (auto test = isProperTestType(values[0])) {
      testIndex = 0;
    } else if (auto test = isProperTestType(values[1])) {
      testIndex = 1;
    } else {
      // We failed to find a simple way to separate the types.
      return;
    }

    // Success! We can replace the struct.get with a select over the two values
    // (and a trap on null) with the proper ref.test.
    Builder builder(*getModule());

    auto& testIndexTypes = values[testIndex].types;
    assert(testIndexTypes.size() == 1);
    auto testType = testIndexTypes[0];

    auto* nnRef = builder.makeRefAs(RefAsNonNull, curr->ref);

    replaceCurrent(builder.makeSelect(
      builder.makeRefTest(nnRef, Type(testType, NonNullable)),
      makeExpression(values[testIndex].constant, refHeapType, curr),
      makeExpression(values[1 - testIndex].constant, refHeapType, curr)));

    changed = true;
  }

  void doWalkFunction(Function* func) {
    WalkerPass<PostWalker<FunctionOptimizer>>::doWalkFunction(func);

    // If we changed anything, we need to update parent types as types may have
    // changed.
    if (changed) {
      ReFinalize().walkFunctionInModule(func, getModule());
    }
  }

private:
  const PCVStructValuesMap& propagatedInfos;
  const SubTypes& subTypes;
  const PCVStructValuesMap& rawNewInfos;
  const bool refTest;

  bool changed = false;
};

struct PCVScanner
  : public StructUtils::StructScanner<PossibleConstantValues, PCVScanner> {
  std::unique_ptr<Pass> create() override {
    return std::make_unique<PCVScanner>(
      functionNewInfos, functionSetGetInfos, functionCopyInfos);
  }

  PCVScanner(PCVFunctionStructValuesMap& functionNewInfos,
             PCVFunctionStructValuesMap& functionSetInfos,
             BoolFunctionStructValuesMap& functionCopyInfos)
    : StructUtils::StructScanner<PossibleConstantValues, PCVScanner>(
        functionNewInfos, functionSetInfos),
      functionCopyInfos(functionCopyInfos) {}

  void noteExpression(Expression* expr,
                      HeapType type,
                      Index index,
                      PossibleConstantValues& info) {
    info.note(expr, *getModule());
  }

  void noteDefault(Type fieldType,
                   HeapType type,
                   Index index,
                   PossibleConstantValues& info) {
    info.note(Literal::makeZero(fieldType));
  }

  void noteCopy(HeapType type, Index index, PossibleConstantValues& info) {
    // Note copies, as they must be considered later. See the comment on the
    // propagation of values below.
    functionCopyInfos[getFunction()][type][index] = true;
  }

  void noteRead(HeapType type, Index index, PossibleConstantValues& info) {
    // Reads do not interest us.
  }

  void noteRMW(Expression* expr,
               HeapType type,
               Index index,
               PossibleConstantValues& info) {
    // In general RMWs will modify the value of the field, so there is no single
    // constant value. We could in principle try to recognize no-op RMWs like
    // adds of 0, but we leave that for OptimizeInstructions for simplicity.
    info.noteUnknown();
  }

  BoolFunctionStructValuesMap& functionCopyInfos;
};

struct ConstantFieldPropagation : public Pass {
  // Only modifies struct.get operations.
  bool requiresNonNullableLocalFixups() override { return false; }

  // Whether we are optimizing using ref.test, see above.
  const bool refTest;

  ConstantFieldPropagation(bool refTest) : refTest(refTest) {}

  void run(Module* module) override {
    if (!module->features.hasGC()) {
      return;
    }

    // Find and analyze all writes inside each function.
    PCVFunctionStructValuesMap functionNewInfos(*module),
      functionSetInfos(*module);
    BoolFunctionStructValuesMap functionCopyInfos(*module);
    PCVScanner scanner(functionNewInfos, functionSetInfos, functionCopyInfos);
    auto* runner = getPassRunner();
    scanner.run(runner, module);
    scanner.runOnModuleCode(runner, module);

    // Combine the data from the functions.
    PCVStructValuesMap combinedNewInfos, combinedSetInfos;
    functionNewInfos.combineInto(combinedNewInfos);
    functionSetInfos.combineInto(combinedSetInfos);
    BoolStructValuesMap combinedCopyInfos;
    functionCopyInfos.combineInto(combinedCopyInfos);

    // Prepare data we will need later.
    SubTypes subTypes(*module);

    PCVStructValuesMap rawNewInfos;
    if (refTest) {
      // The refTest optimizations require the raw new infos (see above), but we
      // can skip copying here if we'll never read this.
      rawNewInfos = combinedNewInfos;
    }

    // Handle subtyping. |combinedInfo| so far contains data that represents
    // each struct.new and struct.set's operation on the struct type used in
    // that instruction. That is, if we do a struct.set to type T, the value was
    // noted for type T. But our actual goal is to answer questions about
    // struct.gets. Specifically, when later we see:
    //
    //  (struct.get $A x (REF-1))
    //
    // Then we want to be aware of all the relevant struct.sets, that is, the
    // sets that can write data that this get reads. Given a set
    //
    //  (struct.set $B x (REF-2) (..value..))
    //
    // then
    //
    //  1. If $B is a subtype of $A, it is relevant: the get might read from a
    //     struct of type $B (i.e., REF-1 and REF-2 might be identical, and both
    //     be a struct of type $B).
    //  2. If $B is a supertype of $A that still has the field x then it may
    //     also be relevant: since $A is a subtype of $B, the set may write to a
    //     struct of type $A (and again, REF-1 and REF-2 may be identical).
    //
    // Thus, if either $A <: $B or $B <: $A then we must consider the get and
    // set to be relevant to each other. To make our later lookups for gets
    // efficient, we therefore propagate information about the possible values
    // in each field to both subtypes and supertypes.
    //
    // struct.new on the other hand knows exactly what type is being written to,
    // and so given a get of $A and a new of $B, the new is relevant for the get
    // iff $A is a subtype of $B, so we only need to propagate in one direction
    // there, to supertypes.
    //
    // An exception to the above are copies. If a field is copied then even
    // struct.new information cannot be assumed to be precise:
    //
    //   // A :> B :> C
    //   ..
    //   new B(20);
    //   ..
    //   A1->f0 = A2->f0; // Either of these might refer to an A, B, or C.
    //   ..
    //   foo(A->f0);      // These can contain 20,
    //   foo(C->f0);      // if the copy read from B.
    //
    // To handle that, copied fields are treated like struct.set ones (by
    // copying the struct.new data to struct.set). Note that we must propagate
    // copying to subtypes first, as in the example above the struct.new values
    // of subtypes must be taken into account (that is, A or a subtype is being
    // copied, so we want to do the same thing for B and C as well as A, since
    // a copy of A means it could be a copy of B or C).
    StructUtils::TypeHierarchyPropagator<Bool> boolPropagator(subTypes);
    boolPropagator.propagateToSubTypes(combinedCopyInfos);
    for (auto& [type, copied] : combinedCopyInfos) {
      for (Index i = 0; i < copied.size(); i++) {
        if (copied[i]) {
          combinedSetInfos[type][i].combine(combinedNewInfos[type][i]);
        }
      }
    }

    StructUtils::TypeHierarchyPropagator<PossibleConstantValues> propagator(
      subTypes);
    propagator.propagateToSuperTypes(combinedNewInfos);
    propagator.propagateToSuperAndSubTypes(combinedSetInfos);

    // Combine both sources of information to the final information that gets
    // care about.
    PCVStructValuesMap combinedInfos = std::move(combinedNewInfos);
    combinedSetInfos.combineInto(combinedInfos);

    // Optimize.
    // TODO: Skip this if we cannot optimize anything
    FunctionOptimizer(combinedInfos, subTypes, rawNewInfos, refTest)
      .run(runner, module);
  }
};

} // anonymous namespace

Pass* createConstantFieldPropagationPass() {
  return new ConstantFieldPropagation(false);
}

Pass* createConstantFieldPropagationRefTestPass() {
  return new ConstantFieldPropagation(true);
}

} // namespace wasm