diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index a24c7b158b9..e7171985c59 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -2430,6 +2430,7 @@ def write_commands(commands, filename): ("--remove-unused-names", "--heap2local",), ("--heap-store-optimization",), ("--generate-stack-ir",), + ("--ldse",), ("--licm",), ("--local-subtyping",), ("--memory-packing",), diff --git a/src/ir/replacer.h b/src/ir/replacer.h new file mode 100644 index 00000000000..626a8f93733 --- /dev/null +++ b/src/ir/replacer.h @@ -0,0 +1,53 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_ir_replacer_h +#define wasm_ir_replacer_h + +#include +#include + +namespace wasm { + +// A map of replacements to perform on expressions. After filling the map, +// simply do a walk(..) and they will be replaced. +// +// This is useful for a general replacement of expressions. Some passes can +// store pointers to expressions and update only those, but then there is the +// nesting problem, +// +// (foo +// (bar +// (baz +// +// If we replace bar with something then the pointer to baz may change. Doing +// the replacements in the order of a walk, as is done here, is always safe. +struct ExpressionReplacer + : PostWalker> { + std::unordered_map replacements; + + void visitExpression(Expression* curr) { + auto iter = replacements.find(curr); + if (iter != replacements.end()) { + replaceCurrent(iter->second); + } + } +}; + +} // namespace wasm + +#endif // wasm_ir_replacer_h diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index c2952e174b8..0260c1823f4 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -30,6 +30,7 @@ set(passes_SOURCES DeadArgumentElimination.cpp DeadArgumentElimination2.cpp DeadCodeElimination.cpp + DeadStoreElimination.cpp DeAlign.cpp DebugLocationPropagation.cpp DeNaN.cpp diff --git a/src/passes/DeadStoreElimination.cpp b/src/passes/DeadStoreElimination.cpp new file mode 100644 index 00000000000..8b5844bef87 --- /dev/null +++ b/src/passes/DeadStoreElimination.cpp @@ -0,0 +1,682 @@ +/* + * Copyright 2021 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Analyzes stores and loads of non-local state, and optimizes them in various +// ways. For example, a store that is never read, because the global state will +// be trampled anyhow, can be removed as dead. +// +// "Store" is used generically here to mean a write to a non-local location, +// which includes: +// +// * Stores to linear memory (Store). +// * Stores to globals (GlobalSet). +// * Stores to GC data (StructSet, ArraySet) +// +// This pass optimizes all of the above. It does so using a generic framework in +// order to share as much code as possible between them. This has downsides for +// globals, in particular, as they could be optimized with an IR that is tailor- +// made for scanning of global indexes (much as we do in our analyses of locals +// in other places). However, global operations are also less common than memory +// and GC operations, so hopefully the tradeoff is reasonable. +// +// The generic framework here can handle both "statically" connected loads and +// stores - for example, a load of a global of index N, after a store to that +// index - and "dynamically" connected loads and stores - for example, a load of +// a GC struct field N from a pointer P, after a store to that same pointer and +// field. "Dynamic" here is used in the sense that we don't have a simple static +// indexing of all the things we care about, sometimes called "lanes" in other +// implementations. Instead we need to care about pointer identity, aliasing, +// etc., which means we need to "dynamically" compare loads and stores and not +// just a "lane" index computed for them. Note that in theory an index could +// still be computed for such things, but probably at the cost of greater +// complexity; if speed becomes an issue, then a refactoring in that direction +// may be necessary. Such a refactoring might have downsides, however: While +// having a single index for each operation is efficient, we would also need a +// a way to represent "wildcards" - things that affect multiple indexes. For +// example, an indirect call must be assumed to affect anything. But more +// complex cases include a GC store of a certain type, which we can infer may +// affect anything with a relevant subtype (but others cannot alias). To handle +// that, we would need to store a set of indexes, already losing much of the +// benefit of the indexed approach. Instead, the current code keeps things very +// simple by just asking "may these two things interact", in which we can just +// check the subtyping, etc. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace wasm { + +namespace { + +// A variation of LocalGraph that can also compare expressions to check for +// their equivalence. Basic LocalGraph just looks at locals, while this class +// goes further and looks at the structure of the expression, taking into +// account fallthrough values and other factors, in order to handle common +// cases of obviously-identical things. To achieve that, it needs to know the +// pass options and features used, which we avoid adding to the basic +// LocalGraph. +struct ComparingLocalGraph : public LocalGraph { + PassOptions& passOptions; + Module& wasm; + + ComparingLocalGraph(Function* func, PassOptions& passOptions, Module& wasm) + : LocalGraph(func), passOptions(passOptions), wasm(wasm) {} + + // Check whether the values of two expressions will definitely be equal at + // runtime. + // TODO: move to LocalGraph if we find more users? + bool equalValues(Expression* a, Expression* b) { + a = Properties::getFallthrough(a, passOptions, wasm); + b = Properties::getFallthrough(b, passOptions, wasm); + if (auto* aGet = a->dynCast()) { + if (auto* bGet = b->dynCast()) { + if (LocalGraph::equivalent(aGet, bGet)) { + return true; + } + } + } + if (auto* aConst = a->dynCast()) { + if (auto* bConst = b->dynCast()) { + return aConst->value == bConst->value; + } + } + return false; + } +}; + +// Parent class of all implementations of the logic of identifying stores etc. +// One implementation of Logic can handle globals, another memory, and another +// GC, etc., implementing the various hooks appropriately. +struct Logic { + Function* func; + + Logic(Function* func, PassOptions& passOptions, Module& wasm) : func(func) {} + + //============================================================================ + // Hooks to identify relevant things to include in the analysis. + //============================================================================ + + // Returns whether an expression is a store. + // + // The main code will automatically ignore unreachable (irrelevant) stores. + bool isStore(Expression* curr) { WASM_UNREACHABLE("unimp"); }; + + // Returns whether an expression is a load. + // + // The main code will automatically ignore unreachable (irrelevant) loads. + bool isLoad(Expression* curr) { WASM_UNREACHABLE("unimp"); }; + + // Returns whether the expression is a barrier to our analysis: something that + // we should stop when we see it, because it could do things that we cannot + // analyze. A barrier will definitely pose a problem for us, as opposed to + // something mayInteract() returns true for - we will check for interactions + // later for mayInteract()s, but with barriers we don't need to. + // + // The default behavior here considers all calls to be barriers. Subclasses + // can use whole-program information to do better. + bool isBarrier(Expression* curr, const ShallowEffectAnalyzer& currEffects) { + // TODO: ignore throws of an exception that is definitely caught in this + // function + // TODO: if we add an "ignore after trap mode" (to assume nothing happens + // after a trap) then we could stop assuming any trap can lead to + // access of global data, likely greatly reducing the number of + // barriers. + return currEffects.calls || currEffects.throws() || currEffects.trap || + currEffects.branchesOut; + }; + + // Returns whether an expression may interact with loads and stores in + // interesting ways. This is only called if isStore(), isLoad(), and + // isBarrier() all return false; that is, if we cannot identify the expression + // as one of those simple categories, this allows us to still care about it in + // our analysis. + bool mayInteract(Expression* curr, const ShallowEffectAnalyzer& currEffects) { + WASM_UNREACHABLE("unimp"); + } + + //============================================================================ + // Hooks that run during the analysis + //============================================================================ + + // Returns whether an expression is a load that corresponds to a store, that + // is, that loads the exact data that the store writes. + bool isLoadFrom(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store) { + WASM_UNREACHABLE("unimp"); + }; + + // Returns whether an expression tramples a store completely, overwriting all + // the store's written data. + // + // This is only called if isLoadFrom() returns false, as we assume there is no + // single instruction of interest to us that can do both. + bool isTrample(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store) { + WASM_UNREACHABLE("unimp"); + }; + + // Returns whether an expression may interact with another in a way that we + // cannot fully analyze, and so we must give up and assume the very worst. + // This is only called if isLoadFrom() and isTrample() both return false. + // + // This is similar to mayInteract(), but considers a specific interaction with + // another particular expression. mayInteract() leads to it being included in + // the analysis, during which mayInteractWith() will be called. + bool mayInteractWith(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store) { + WASM_UNREACHABLE("unimp"); + }; + + //============================================================================ + // Hooks used when applying optimizations after the analysis. + //============================================================================ + + // Given a store that is not needed, get drops of its children to replace it + // with. This effectively removes the store without removing its children. + Expression* replaceStoreWithDrops(Expression* store, Builder& builder) { + WASM_UNREACHABLE("unimp"); + }; +}; + +// Represent all barriers in a simple way. +static Nop nop; +static Expression* const barrier = &nop; + +// Information in a basic block in the main analysis. All we use is a simple +// list of relevant expressions (stores, loads, and things that interact with +// them). +struct BasicBlockInfo { + std::vector exprs; +}; + +// Core code to generate the relevant CFG, analyze it, and optimize it. +// +// This is as generic as possible over what a "store" actually is; all the +// specific logic of handling globals vs memory vs the GC heap is all left to a +// to a LogicType that this is templated on, which subclasses from Logic. +template +struct DeadStoreCFG + : public CFGWalker, + UnifiedExpressionVisitor>, + BasicBlockInfo> { + Function* func; + PassOptions& passOptions; + LogicType logic; + + DeadStoreCFG(Module& wasm, Function* func, PassOptions& passOptions) + : func(func), passOptions(passOptions), logic(func, passOptions, wasm) { + this->setModule(&wasm); + } + + ~DeadStoreCFG() {} + + void visitExpression(Expression* curr) { + if (!this->currBasicBlock) { + return; + } + + ShallowEffectAnalyzer currEffects(passOptions, *this->getModule(), curr); + + auto& exprs = this->currBasicBlock->contents.exprs; + + // Add all relevant things to the list of exprs for the current basic block. + if (isStore(curr) || isLoad(curr)) { + exprs.push_back(curr); + } else if (logic.isBarrier(curr, currEffects)) { + // Barriers can be very common, so as a minor optimization avoid having + // consecutive ones; a single barrier will stop us. + if (exprs.empty() || exprs.back() != barrier) { + exprs.push_back(barrier); + } + } else if (logic.mayInteract(curr, currEffects)) { + exprs.push_back(curr); + } + } + + // Filter out unreachable (irrelevant) loads and stores. + bool isStore(Expression* curr) { + return curr->type != Type::unreachable && logic.isStore(curr); + } + bool isLoad(Expression* curr) { + return curr->type != Type::unreachable && logic.isLoad(curr); + } + + // All the stores we can optimize, that is, stores that write to a non-local + // place from which we have a full understanding of all the loads. This data + // structure maps such an understood store to the list of loads for it. In + // particular, if that list is empty then the store is dead (since we have + // a full understanding of all the loads, and there are none), and if the list + // is non-empty then only those loads read that store's value, and nothing + // else. + std::unordered_map> understoodStores; + + using Self = DeadStoreCFG; + + using BasicBlock = typename CFGWalker, + BasicBlockInfo>::BasicBlock; + + void analyze() { + // create the CFG by walking the IR + this->walkFunction(func); + + // Flow the values and conduct the analysis. This finds each relevant store + // and then flows from it to all possible uses through the CFG. + // + // TODO: Optimize. This is a pretty naive way to flow the values, but it + // should be reasonable assuming most stores are quickly seen as + // having possible interactions (e.g., when we encounter a barrier), + // and so most flows are halted very quickly. + + for (auto& block : this->basicBlocks) { + for (size_t i = 0; i < block->contents.exprs.size(); i++) { + auto* store = block->contents.exprs[i]; + + if (!isStore(store)) { + continue; + } + + // The store is assumed to be understood (and hence present on the map) + // until we see a problem. + understoodStores[store]; + + // Flow this store forward through basic blocks, looking for + // interactions. + UniqueNonrepeatingDeferredQueue work; + + // When we find something we cannot optimize, stop flowing and mark the + // store as unoptimizable. + auto halt = [&]() { + work.clear(); + understoodStores.erase(store); + }; + + // Scan through a block, starting from a certain position, looking for + // interactions. + auto scanBlock = [&](BasicBlock* block, size_t from) { + for (size_t i = from; i < block->contents.exprs.size(); i++) { + auto* curr = block->contents.exprs[i]; + + if (curr == barrier) { + halt(); + return; + } + + ShallowEffectAnalyzer currEffects( + passOptions, *this->getModule(), curr); + + if (logic.isLoadFrom(curr, currEffects, store)) { + // We found a definite load of this store, note it. + understoodStores[store].push_back(curr); + } else if (logic.isTrample(curr, currEffects, store)) { + // We do not need to look any further along this block, or in + // anything it can reach, as this store has been trampled. + return; + } else if (logic.mayInteractWith(curr, currEffects, store)) { + // Stop: we cannot fully analyze the uses of this store. + halt(); + return; + } + } + + // We reached the end of the block, prepare to flow onward. + for (auto* out : block->out) { + work.push(out); + } + + if (block == this->exit) { + // Any value flowing out can be reached by global code outside the + // function after we leave. + halt(); + } + }; + + // Start the flow in the current location in the block, right after the + // store itself. + scanBlock(block.get(), i + 1); + + // Next, continue flowing through other blocks. + while (!work.empty()) { + auto* curr = work.pop(); + scanBlock(curr, 0); + } + } + } + } + + // Optimizes the function, and returns whether we made any changes. + bool optimize() { + analyze(); + + Builder builder(*this->getModule()); + + ExpressionReplacer replacer; + + // Optimize the stores that have no unknown interactions. + for (auto& kv : understoodStores) { + auto* store = kv.first; + const auto& loads = kv.second; + if (loads.empty()) { + // This store has no loads, which means it is trampled by other stores + // before it is read, and so it can just be dropped. + // + // Note that this is valid even if we care about implicit traps, such as + // a trap from a store that is out of bounds. We are removing one store, + // but it was trampled later, which means that a trap will still occur + // at that time, if the store is out of bounds; furthermore, we do not + // delay the trap in a noticeable way since if the path between the + // stores crosses anything that affects global state then we would not + // have considered the store to be trampled (it could have been + // interacted with, which would have stopped the analysis). + replacer.replacements[store] = + logic.replaceStoreWithDrops(store, builder); + } + // TODO: When there are loads, we can replace the loads as well (by saving + // the value to a local for that global, etc.). + // Note that we may need to leave the loads if they have side + // effects, like a possible trap on memory loads, but they can be + // left as dropped, the same as with store inputs. + } + + if (replacer.replacements.empty()) { + return false; + } + + replacer.walk(this->func->body); + + return true; + } +}; + +// A logic that uses a local graph, as it needs to compare pointers. +// TODO: run the LocalGraph only on relevant locals (only i32s can be pointers +// for loads and stores; only references can be pointers for GC) +struct ComparingLogic : public Logic { + ComparingLocalGraph localGraph; + + ComparingLogic(Function* func, PassOptions& passOptions, Module& wasm) + : Logic(func, passOptions, wasm), localGraph(func, passOptions, wasm) {} +}; + +// Optimize module globals: GlobalSet/GlobalGet. +struct GlobalLogic : public Logic { + GlobalLogic(Function* func, PassOptions& passOptions, Module& wasm) + : Logic(func, passOptions, wasm) {} + + bool isStore(Expression* curr) { return curr->is(); } + + bool isLoad(Expression* curr) { return curr->is(); } + + bool mayInteract(Expression* curr, const ShallowEffectAnalyzer& currEffects) { + // Globals are easy to statically analyze: there are no interactions we + // cannot be sure about. + return false; + } + + bool isLoadFrom(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store_) { + if (auto* load = curr->dynCast()) { + auto* store = store_->cast(); + return load->name == store->name; + } + return false; + } + + bool isTrample(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store_) { + if (auto* otherStore = curr->dynCast()) { + auto* store = store_->cast(); + return otherStore->name == store->name; + } + return false; + } + + bool mayInteractWith(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store) { + return false; + } + + Expression* replaceStoreWithDrops(Expression* store, Builder& builder) { + return builder.makeDrop(store->cast()->value); + } +}; + +// Optimize memory stores/loads. +struct MemoryLogic : public ComparingLogic { + MemoryLogic(Function* func, PassOptions& passOptions, Module& wasm) + : ComparingLogic(func, passOptions, wasm) {} + + bool isStore(Expression* curr) { return curr->is(); } + + bool isLoad(Expression* curr) { return curr->is(); } + + bool mayInteract(Expression* curr, const ShallowEffectAnalyzer& currEffects) { + return currEffects.readsMemory || currEffects.writesMemory; + } + + bool isLoadFrom(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store_) { + if (curr->type == Type::unreachable) { + return false; + } + if (auto* load = curr->dynCast()) { + auto* store = store_->cast(); + + // Atomic stores are dangerous, since they have additional trapping + // behavior - they trap on unaligned addresses. For simplicity, only + // consider the case where atomicity is identical. + // TODO: use ignoreImplicitTraps + if (store->isAtomic() != load->isAtomic()) { + return false; + } + + // TODO: For now, only handle the obvious case where the operations are + // identical in size and offset. + return load->bytes == store->bytes && + load->bytes == load->type.getByteSize() && + load->offset == store->offset && + localGraph.equalValues(load->ptr, store->ptr); + } + return false; + } + + bool isTrample(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store_) { + if (auto* otherStore = curr->dynCast()) { + auto* store = store_->cast(); + + // As in isLoadFrom, atomic stores are dangerous. + if (store->isAtomic() != otherStore->isAtomic()) { + return false; + } + + // TODO: Compare in detail. For now, handle the obvious case where the + // stores are identical in size, offset, etc., so that identical + // repeat stores are handled. (An example of a case we do not handle + // yet is a store of 1 byte that is trampled by a store of 2 bytes.) + return otherStore->bytes == store->bytes && + otherStore->offset == store->offset && + localGraph.equalValues(otherStore->ptr, store->ptr); + } + return false; + } + + bool mayInteractWith(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store) { + // Anything we did not identify so far is dangerous. + // + // Among other things, this includes compare-and-swap, which does both a + // read and a write, which our infrastructure is not build to optimize. + return currEffects.readsMemory || currEffects.writesMemory; + } + + Expression* replaceStoreWithDrops(Expression* store, Builder& builder) { + auto* castStore = store->cast(); + return builder.makeSequence(builder.makeDrop(castStore->ptr), + builder.makeDrop(castStore->value)); + } +}; + +// Optimize GC data: StructGet/StructSet. +// TODO: Arrays. +struct GCLogic : public ComparingLogic { + GCLogic(Function* func, PassOptions& passOptions, Module& wasm) + : ComparingLogic(func, passOptions, wasm) {} + + bool isStore(Expression* curr) { return curr->is(); } + + bool isLoad(Expression* curr) { return curr->is(); } + + bool mayInteract(Expression* curr, const ShallowEffectAnalyzer& currEffects) { + return currEffects.readsMutableStruct || currEffects.writesStruct; + } + + bool isLoadFrom(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store_) { + if (auto* load = curr->dynCast()) { + auto* store = store_->cast(); + + // Note that we do not need to check the type: we check that the + // reference is identical, and if it is then the types must be compatible + // in addition to them pointing to the same memory. + return localGraph.equalValues(load->ref, store->ref) && + load->index == store->index; + } + return false; + } + + bool isTrample(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store_) { + if (auto* otherStore = curr->dynCast()) { + auto* store = store_->cast(); + + // See note in isLoadFrom about typing. + return localGraph.equalValues(otherStore->ref, store->ref) && + otherStore->index == store->index; + } + return false; + } + + // Check whether two GC operations may alias memory. + template bool mayAlias(U* u, V* v) { + // If one of the inputs is unreachable, it does not execute, and so there + // cannot be aliasing. + auto uType = u->ref->type; + auto vType = v->ref->type; + if (uType == Type::unreachable || vType == Type::unreachable) { + return false; + } + + // If the index does not match, no aliasing is possible. + if (u->index != v->index) { + return false; + } + + // Even if the index is identical, aliasing still may be impossible. For + // aliasing to occur, the same data must be pointed to by both references, + // which means the actual data is a subtype of both the present types. For + // that to be possible, one of the present heap types must be a subtype of + // the other (note that we check heap types, in order to ignore + // nullability). + auto uHeapType = uType.getHeapType(); + auto vHeapType = vType.getHeapType(); + return HeapType::isSubType(uHeapType, vHeapType) || + HeapType::isSubType(vHeapType, uHeapType); + } + + bool mayInteractWith(Expression* curr, + const ShallowEffectAnalyzer& currEffects, + Expression* store_) { + auto* store = store_->cast(); + + // We already checked isLoadFrom and isTrample and it was neither of those, + // so just check if the memory can possibly alias. + if (auto* otherStore = curr->dynCast()) { + return mayAlias(otherStore, store); + } + if (auto* load = curr->dynCast()) { + return mayAlias(load, store); + } + + // This is not a load or a store that we recognize; check for generic heap + // interactions. + return currEffects.readsMutableStruct || currEffects.writesStruct; + } + + Expression* replaceStoreWithDrops(Expression* store, Builder& builder) { + auto* castStore = store->cast(); + return builder.makeSequence(builder.makeDrop(castStore->ref), + builder.makeDrop(castStore->value)); + } +}; + +// Perform dead store elimination 100% locally, that is, without any whole- +// program analysis. This is not very powerful, but can catch simple patterns of +// obviously dead stores, and is useful for testing. +// +// This does all the optimizations (globals, memory, GC) in sequence on each +// function, which is good for cache locality. That is the reason there are not +// separate passes for each one. +// TODO: the optimizations can perhaps share more things between them +struct LocalDeadStoreElimination + : public WalkerPass> { + bool isFunctionParallel() { return true; } + + std::unique_ptr create() override { + return std::make_unique(); + } + + void doWalkFunction(Function* func) { + // Optimize globals. + DeadStoreCFG(*getModule(), func, getPassOptions()).optimize(); + + // Optimize memory. + DeadStoreCFG(*getModule(), func, getPassOptions()).optimize(); + + // Optimize GC heap. + if (getModule()->features.hasGC()) { + DeadStoreCFG(*getModule(), func, getPassOptions()).optimize(); + } + } +}; + +} // anonymous namespace + +Pass* createLocalDeadStoreEliminationPass() { + return new LocalDeadStoreElimination(); +} + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 0e6e28267c2..7bb306251f5 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -131,6 +131,9 @@ void PassRegistry::registerPasses() { createConstantFieldPropagationRefTestPass); registerPass( "dce", "removes unreachable code", createDeadCodeEliminationPass); + registerPass("ldse", + "removes dead stores (only looking at local info)", + createLocalDeadStoreEliminationPass); registerPass("dealign", "forces all loads and stores to have alignment 1", createDeAlignPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index be06369a9f8..9020bf7a7cf 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -86,6 +86,7 @@ Pass* createInstrumentBranchHintsPass(); Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); Pass* createLLVMMemoryCopyFillLoweringPass(); +Pass* createLocalDeadStoreEliminationPass(); Pass* createLoopInvariantCodeMotionPass(); Pass* createMemory64LoweringPass(); Pass* createMemoryPackingPass(); diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test index 7c2dd577e86..b82a40c78a4 100644 --- a/test/lit/help/wasm-metadce.test +++ b/test/lit/help/wasm-metadce.test @@ -227,6 +227,9 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --intrinsic-lowering lower away binaryen intrinsics ;; CHECK-NEXT: +;; CHECK-NEXT: --ldse removes dead stores (only +;; CHECK-NEXT: looking at local info) +;; CHECK-NEXT: ;; CHECK-NEXT: --legalize-and-prune-js-interface legalizes the import/export ;; CHECK-NEXT: boundary and prunes when needed ;; CHECK-NEXT: diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 8a5ac67c2d5..b5c04c3a8a5 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -259,6 +259,9 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --intrinsic-lowering lower away binaryen intrinsics ;; CHECK-NEXT: +;; CHECK-NEXT: --ldse removes dead stores (only +;; CHECK-NEXT: looking at local info) +;; CHECK-NEXT: ;; CHECK-NEXT: --legalize-and-prune-js-interface legalizes the import/export ;; CHECK-NEXT: boundary and prunes when needed ;; CHECK-NEXT: diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 0969ca927be..ede1c4cd5f8 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -191,6 +191,9 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --intrinsic-lowering lower away binaryen intrinsics ;; CHECK-NEXT: +;; CHECK-NEXT: --ldse removes dead stores (only +;; CHECK-NEXT: looking at local info) +;; CHECK-NEXT: ;; CHECK-NEXT: --legalize-and-prune-js-interface legalizes the import/export ;; CHECK-NEXT: boundary and prunes when needed ;; CHECK-NEXT: diff --git a/test/lit/passes/ldse.wast b/test/lit/passes/ldse.wast new file mode 100644 index 00000000000..fbab79eeb00 --- /dev/null +++ b/test/lit/passes/ldse.wast @@ -0,0 +1,1479 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; (--remove-unused-names avoids names on blocks, which would hamper the +;; work in getFallthrough, as a name implies possible breaks) +;; RUN: wasm-opt %s -all --remove-unused-names --ldse -S -o - | filecheck %s + +(module + ;; CHECK: (type $A (struct (field (mut i32)))) + (type $A (struct (field (mut i32)))) + ;; CHECK: (type $C (struct (field (mut i32)) (field (mut i32)))) + + ;; CHECK: (type $B (struct (field (mut f64)))) + (type $B (struct (field (mut f64)))) + (type $C (struct (field (mut i32)) (field (mut i32)))) + + (memory 10 shared) + + ;; CHECK: (global $global$0 (mut i32) (i32.const 0)) + (global $global$0 (mut i32) (i32.const 0)) + ;; CHECK: (global $global$1 (mut i32) (i32.const 0)) + (global $global$1 (mut i32) (i32.const 0)) + + ;; CHECK: (func $simple-param (param $x (ref $A)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-param (param $x (ref $A)) + ;; a dead store using a parameter + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; another dead store + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + ;; the last store escapes to the outside, and cannot be modified + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $simple-local + ;; CHECK-NEXT: (local $x (ref null $A)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-local + (local $x (ref null $A)) + ;; dead stores using a local + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + ;; the last store escapes to the outside, and cannot be modified + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $simple-reaching-trap + ;; CHECK-NEXT: (local $x (ref null $A)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $simple-reaching-trap + (local $x (ref null $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; a store reaching a trap may be observable from the outside later + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + (unreachable) + ) + + ;; CHECK: (func $fallthrough (result (ref $A)) + ;; CHECK-NEXT: (local $x (ref null $A)) + ;; CHECK-NEXT: (block $func (result (ref $A)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (br_on_cast $func + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (rtt.canon $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $fallthrough (result (ref $A)) + (local $x (ref null $A)) + (block $func (result (ref $A)) + (struct.set $A 0 + ;; the reference can be seen to fall through this, proving the store is + ;; dead (due to the one after it). + (br_on_cast $func (ref null $A) (ref $A) + (local.get $x) + ) + (i32.const 20) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + (unreachable) + ) + ) + + ;; CHECK: (func $simple-fallthrough (param $x (ref $A)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref $A)) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (block (result (ref $A)) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-fallthrough (param $x (ref $A)) + ;; simple fallthrough through a block does not confuse us, this store is dead. + (struct.set $A 0 + (block (result (ref $A)) + (local.get $x) + ) + (i32.const 10) + ) + (struct.set $A 0 + (block (result (ref $A)) + (local.get $x) + ) + (i32.const 20) + ) + ) + + ;; CHECK: (func $get-ref (result (ref $A)) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $get-ref (result (ref $A)) + (unreachable) + ) + + ;; CHECK: (func $ref-changes (param $x (ref $A)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (call $get-ref) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ref-changes (param $x (ref $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; the reference changes here, so the first store is *not* dead + (local.set $x + (call $get-ref) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + ) + + ;; CHECK: (func $ref-may-change (param $x (ref $A)) (param $i i32) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (local.get $i) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (call $get-ref) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $ref-may-change (param $x (ref $A)) (param $i i32) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; the reference may change here + (if + (local.get $i) + (then (local.set $x + (call $get-ref) + )) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + ) + + ;; CHECK: (func $simple-use (param $x (ref $A)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-use (param $x (ref $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + ;; the second store is used by this load, and so it is not dead + (drop + (struct.get $A 0 + (local.get $x) + ) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $incompatible-types (param $x (ref $A)) (param $y (ref $B)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $B 0 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (f64.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $incompatible-types (param $x (ref $A)) (param $y (ref $B)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; the second store cannot alias the first because their types differ, and + ;; so the second store does not interfere in seeing that the first is trampled + ;; (even though the index is identical, 0) + (struct.set $B 0 + (local.get $y) + (f64.const 20) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $incompatible-types-get (param $x (ref $A)) (param $y (ref $B)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $B 0 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $incompatible-types-get (param $x (ref $A)) (param $y (ref $B)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; the types do not allow this to alias the set before it. + (drop + (struct.get $B 0 + (local.get $y) + ) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $compatible-types (param $x (ref $A)) (param $y (ref $C)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $C 0 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $compatible-types (param $x (ref $A)) (param $y (ref $C)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; C is a subtype of A, so we can have aliasing between this store and both + ;; the previous and the subsequent store, and nothing can be optimized. + (struct.set $C 0 + (local.get $y) + (i32.const 20) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $compatible-types-get (param $x (ref $A)) (param $y (ref $C)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $C 0 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $compatible-types-get (param $x (ref $A)) (param $y (ref $C)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + (drop + (struct.get $C 0 + (local.get $y) + ) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $compatible-types-nullability-1 (param $x (ref $A)) (param $y (ref null $C)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $C 0 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $compatible-types-nullability-1 (param $x (ref $A)) (param $y (ref null $C)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; As in $compatible-types, C is a subtype of A, so we can have aliasing + ;; between this store and both the previous and the subsequent store, and + ;; nothing can be optimized. In addition, $y is nullable while $x is not, + ;; which should not confuse us - the heap types matter, that is, the + ;; nullability is irrelevant. + (struct.set $C 0 + (local.get $y) + (i32.const 20) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + ;; CHECK: (func $compatible-types-nullability-2 (param $x (ref null $A)) (param $y (ref $C)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $C 0 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $compatible-types-nullability-2 (param $x (ref null $A)) (param $y (ref $C)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; As $compatible-types-nullability-1 , but nullability is reversed. + (struct.set $C 0 + (local.get $y) + (i32.const 20) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + ;; CHECK: (func $compatible-types-nullability-3 (param $x (ref null $A)) (param $y (ref null $C)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $C 0 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $compatible-types-nullability-3 (param $x (ref null $A)) (param $y (ref null $C)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; As $compatible-types-nullability-1 , but all refs are nullable. + (struct.set $C 0 + (local.get $y) + (i32.const 20) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $foo + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $foo) + + ;; CHECK: (func $call (param $x (ref $A)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $foo) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $call (param $x (ref $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; the analysis gives up on a call, where heap memory may be modified + (call $foo) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $through-branches (param $x (ref $A)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $through-branches (param $x (ref $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; the analysis is not confused by branching and merging; the first store is + ;; dead + (if (i32.const 1) + (then (nop) + (nop)) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $just-one-branch-trample (param $x (ref $A)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $just-one-branch-trample (param $x (ref $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; a trample on just one branch is not enough + (if (i32.const 1) + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + (nop) + ) + ) + + ;; CHECK: (func $two-branch-trample (param $x (ref $A)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $two-branch-trample (param $x (ref $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; a trample on both branch is enough + (if (i32.const 1) + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + ) + + ;; CHECK: (func $just-one-branch-bad (param $x (ref $A)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (call $foo) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $just-one-branch-bad (param $x (ref $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; an unknown interaction on one branch is enough to make us give up + (if (i32.const 1) + (call $foo) + (nop) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $simple-in-branches (param $x (ref $A)) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-in-branches (param $x (ref $A)) + (if (i32.const 1) + (block + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; a dead store in one if arm + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + ) + (block + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ;; another dead store in another arm + (struct.set $A 0 + (local.get $x) + (i32.const 40) + ) + ) + ) + ) + + ;; CHECK: (func $different-refs-same-type (param $x (ref $A)) (param $y (ref $A)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $different-refs-same-type (param $x (ref $A)) (param $y (ref $A)) + (struct.set $A 0 + (local.get $x) + (i32.const 10) + ) + ;; we do not know if x == y or not, and so must assume none of these are dead. + (struct.set $A 0 + (local.get $y) + (i32.const 20) + ) + (struct.set $A 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $different-indexes (param $x (ref $C)) + ;; CHECK-NEXT: (struct.set $C 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $C 1 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $different-indexes (param $x (ref $C)) + (struct.set $C 0 + (local.get $x) + (i32.const 10) + ) + ;; stores to different indexes do not interact with each other. this store is + ;; dead because of the one after it, but the former is not dead. + (struct.set $C 1 + (local.get $x) + (i32.const 20) + ) + (struct.set $C 1 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $different-pointers (param $x (ref $C)) (param $y (ref $C)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $C 1 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $C 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $different-pointers (param $x (ref $C)) (param $y (ref $C)) + (struct.set $C 0 + (local.get $x) + (i32.const 10) + ) + ;; stores to different indexes do not interact with each other, even if the + ;; pointers are not known to be equivalent or not. this allows us to see that + ;; the first store is trampled by the last store (both using index 0), as we + ;; can ignore this store (to index 1) + (struct.set $C 1 + (local.get $y) + (i32.const 20) + ) + (struct.set $C 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $different-pointers-get (param $x (ref $C)) (param $y (ref $C)) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $C 1 + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $C 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $different-pointers-get (param $x (ref $C)) (param $y (ref $C)) + (struct.set $C 0 + (local.get $x) + (i32.const 10) + ) + ;; a load of a different index cannot interact with the first store, allowing + ;; us to see the store is trampled (by the last store) before it has any + ;; uses, and so it can be dropped + (drop + (struct.get $C 1 + (local.get $y) + ) + ) + (struct.set $C 0 + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $no-basic-blocks + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $no-basic-blocks + ;; Check we don't crash on a function with no basic blocks at all. + (unreachable) + ) + + ;; CHECK: (func $global + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.set $global$1 + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.set $global$0 + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $global + ;; globals are optimized as well, and we have more precise data there than on + ;; GC references - aliasing is impossible, and so we can tell this first one + ;; is dead due to the last, ignoring the unaliasing one in the middle + (global.set $global$0 + (i32.const 10) + ) + (global.set $global$1 + (i32.const 20) + ) + (global.set $global$0 + (i32.const 30) + ) + ) + + ;; CHECK: (func $global-trap + ;; CHECK-NEXT: (global.set $global$0 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.set $global$0 + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $global-trap + (global.set $global$0 + (i32.const 10) + ) + ;; a trap (even conditional) prevents our optimizations, global state may be + ;; observed if another export is called later after the trap. + (if + (i32.const 1) + (unreachable) + ) + (global.set $global$0 + (i32.const 20) + ) + ) + + ;; CHECK: (func $memory-const + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-const + ;; test dead store elimination of writes to memory at constant offsets + (i32.store + (i32.const 10) + (i32.const 20) + ) + (i32.store + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-param (param $x i32) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-param (param $x i32) + ;; test dead store elimination of writes to memory using a local + (i32.store + (local.get $x) + (i32.const 20) + ) + (i32.store + (local.get $x) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-different-const + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-different-const + (i32.store + (i32.const 10) + (i32.const 20) + ) + (i32.store + (i32.const 30) + (i32.const 40) + ) + ) + + ;; CHECK: (func $memory-different-offset + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store offset=1 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-different-offset + (i32.store + (i32.const 10) + (i32.const 20) + ) + (i32.store offset=1 + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-different-size + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store16 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-different-size + (i32.store + (i32.const 10) + (i32.const 20) + ) + (i32.store16 + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-other-interference + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (memory.fill + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-other-interference + (i32.store + (i32.const 10) + (i32.const 20) + ) + (memory.fill + (i32.const 0) + (i32.const 0) + (i32.const 30) + ) + (i32.store + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-load + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.load + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-load + (i32.store + (i32.const 10) + (i32.const 20) + ) + (drop + (i32.load + (i32.const 10) + ) + ) + (i32.store + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-load-different-offset + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.load offset=1 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-load-different-offset + (i32.store + (i32.const 10) + (i32.const 20) + ) + (drop + (i32.load offset=1 + (i32.const 10) + ) + ) + (i32.store + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-load-different-ptr + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.load + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-load-different-ptr + (i32.store + (i32.const 10) + (i32.const 20) + ) + ;; this load's ptr does not match the last store's, and so the analysis + ;; assumes they might interact + (drop + (i32.load + (i32.const 11) + ) + ) + (i32.store + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-load-different-bytes + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.load8_s + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-load-different-bytes + (i32.store + (i32.const 10) + (i32.const 20) + ) + ;; the load's number of bytes does not match the store, so assume they + ;; interact somehow + (drop + (i32.load8_s + (i32.const 10) + ) + ) + (i32.store + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-store-small + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store8 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-store-small + ;; we can optimize dead stores of fewer bytes than the default + (i32.store8 + (i32.const 10) + (i32.const 20) + ) + (i32.store8 + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-store-align + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store align=1 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-store-align + ;; alignment is just a perf hint, and does not prevent our optimizations + (i32.store align=2 + (i32.const 10) + (i32.const 20) + ) + (i32.store align=1 + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $memory-same-size-different-types + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-same-size-different-types + ;; it doesn't matter if we are trampled by a different type; we are still + ;; trampled, and so this store is dead. + (i32.store + (i32.const 10) + (i32.const 0) + ) + (f32.store + (i32.const 10) + (f32.const 0) + ) + ) + + ;; CHECK: (func $memory-same-size-different-types-b + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (f32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (f32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-same-size-different-types-b + (i64.store32 + (i32.const 10) + (i64.const 0) + ) + (f32.store + (i32.const 10) + (f32.const 0) + ) + ) + + ;; CHECK: (func $memory-atomic1 + ;; CHECK-NEXT: (i32.atomic.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-atomic1 + ;; an atomic store is not killed by a normal one (the atomic one would trap + ;; on misalignment, for example) + (i32.atomic.store + (i32.const 10) + (i32.const 0) + ) + (i32.store + (i32.const 10) + (i32.const 0) + ) + ) + + ;; CHECK: (func $memory-atomic2 + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.atomic.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-atomic2 + ;; a normal store cannot be killed by an atomic one: if the atomic store traps + ;; then the first store's value will remain untrampled + (i32.store + (i32.const 10) + (i32.const 0) + ) + (i32.atomic.store + (i32.const 10) + (i32.const 0) + ) + ) + + ;; CHECK: (func $memory-atomic3 + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.atomic.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-atomic3 + ;; atomic stores *can* trample each other. + (i32.atomic.store + (i32.const 10) + (i32.const 0) + ) + (i32.atomic.store + (i32.const 10) + (i32.const 0) + ) + ) + + ;; CHECK: (func $memory-unreachable + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.store + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $memory-unreachable + (i32.store + (i32.const 10) + (i32.const 10) + ) + ;; an unreachable store does not trample + (i32.store + (unreachable) + (i32.const 20) + ) + (i32.store + (i32.const 10) + (i32.const 30) + ) + ) + + ;; CHECK: (func $gc-unreachable (param $x (ref $A)) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (br $loop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $gc-unreachable (param $x (ref $A)) + ;; An unreachable store should be ignored, even if dead. + (struct.set $A 0 + (local.get $x) + ;; Test unreachability without an explicit unreachable, using a loop that + ;; never exits. + (loop $loop + (br $loop) + ) + ) + ;; An apparent trample of that dead store. + (struct.set $A 0 + (local.get $x) + (i32.const 20) + ) + ) +)