-
Notifications
You must be signed in to change notification settings - Fork 857
[wasm-reduce] Empty functions with delta debugging #8640
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| /* | ||
| * Copyright 2026 WebAssembly Community Group participants | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #ifndef wasm_support_delta_debugging_h | ||
| #define wasm_support_delta_debugging_h | ||
|
|
||
| #include <algorithm> | ||
| #include <cassert> | ||
| #include <vector> | ||
|
|
||
| namespace wasm { | ||
|
|
||
| // Use the delta debugging algorithm (Zeller 1999, | ||
| // https://dl.acm.org/doi/10.1109/32.988498) to find the minimal set of | ||
| // items necessary to preserve some property. Returns that minimal set of | ||
| // items, preserving their input order. `tryPartition` should have this | ||
| // signature: | ||
| // | ||
| // bool tryPartition(size_t partitionIndex, | ||
| // size_t numPartitions, | ||
| // const std::vector<T>& partition) | ||
| // | ||
| // It should return true iff the property is preserved while keeping only | ||
| // `partition` items. | ||
| template<typename T, typename F> | ||
| std::vector<T> deltaDebugging(std::vector<T> items, F&& tryPartition) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this function doesn't take ownership of |
||
| size_t numPartitions = 2; | ||
| while (numPartitions <= items.size()) { | ||
| // Partition the items. | ||
| std::vector<std::vector<T>> partitions; | ||
| size_t size = items.size(); | ||
| size_t basePartitionSize = size / numPartitions; | ||
| size_t rem = size % numPartitions; | ||
| size_t idx = 0; | ||
| for (size_t i = 0; i < numPartitions; ++i) { | ||
| size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0); | ||
| if (partitionSize > 0) { | ||
| std::vector<T> partition; | ||
| partition.reserve(partitionSize); | ||
| for (size_t j = 0; j < partitionSize; ++j) { | ||
| partition.push_back(items[idx++]); | ||
| } | ||
| partitions.emplace_back(std::move(partition)); | ||
| } | ||
| } | ||
| assert(numPartitions == partitions.size()); | ||
|
|
||
| bool reduced = false; | ||
|
|
||
| // Try keeping only one partition. Try each partition in turn. | ||
| for (size_t i = 0; i < numPartitions; ++i) { | ||
| if (tryPartition(i, numPartitions, partitions[i])) { | ||
| items = std::move(partitions[i]); | ||
| numPartitions = 2; | ||
| reduced = true; | ||
| break; | ||
| } | ||
| } | ||
| if (reduced) { | ||
| continue; | ||
| } | ||
|
|
||
| // Otherwise, try keeping the complement of a partition. Do not do this with | ||
| // only two partitions because that would be no different from what we | ||
| // already tried. | ||
| if (numPartitions > 2) { | ||
| for (size_t i = 0; i < numPartitions; ++i) { | ||
| std::vector<T> complement; | ||
| complement.reserve(items.size() - partitions[i].size()); | ||
| for (size_t j = 0; j < numPartitions; ++j) { | ||
| if (j != i) { | ||
| complement.insert( | ||
| complement.end(), partitions[j].begin(), partitions[j].end()); | ||
| } | ||
| } | ||
| if (tryPartition(i, numPartitions, complement)) { | ||
| items = std::move(complement); | ||
| numPartitions = std::max(numPartitions - 1, size_t(2)); | ||
| reduced = true; | ||
| break; | ||
| } | ||
| } | ||
| if (reduced) { | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| // Otherwise, make the partitions finer grained. | ||
| if (numPartitions < items.size()) { | ||
| numPartitions = std::min(items.size(), 2 * numPartitions); | ||
| } else { | ||
| break; | ||
| } | ||
| } | ||
| return items; | ||
| } | ||
|
|
||
| } // namespace wasm | ||
|
|
||
| #endif // wasm_support_delta_debugging_h | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,12 +29,12 @@ | |
|
|
||
| #include "ir/branch-utils.h" | ||
| #include "ir/iteration.h" | ||
| #include "ir/literal-utils.h" | ||
| #include "ir/properties.h" | ||
| #include "ir/utils.h" | ||
| #include "pass.h" | ||
| #include "support/colors.h" | ||
| #include "support/command-line.h" | ||
| #include "support/delta_debugging.h" | ||
| #include "support/file.h" | ||
| #include "support/hash.h" | ||
| #include "support/path.h" | ||
|
|
@@ -894,8 +894,45 @@ struct Reducer | |
| } | ||
| } | ||
|
|
||
| // Reduces entire functions at a time. Returns whether we did a significant | ||
| // amount of reduction that justifies doing even more. | ||
| void reduceFunctionBodies() { | ||
| std::cerr << "| try to remove function bodies\n"; | ||
| // Use function indices to speed up finding the complement of the kept | ||
| // partition. | ||
| std::vector<Index> funcs; | ||
| funcs.reserve(module->functions.size()); | ||
| for (Index i = 0; i < module->functions.size(); ++i) { | ||
| funcs.push_back(i); | ||
| } | ||
| deltaDebugging( | ||
| std::move(funcs), | ||
| [&](Index partitionIndex, | ||
| Index numPartitions, | ||
| const std::vector<Index>& partition) { | ||
| std::cerr << "| try partition " << partitionIndex + 1 << " / " | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why add 1 here?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Printing 1-based indices is slightly more intuitive than 0-based indices.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree to disagree 😄 |
||
| << numPartitions << " (size " << partition.size() << ")\n"; | ||
| std::vector<Name> removed; | ||
| removed.reserve(module->functions.size() - partition.size()); | ||
| Index i = 0; | ||
| for (Index j : partition) { | ||
| while (i < j) { | ||
| removed.push_back(module->functions[i++]->name); | ||
| } | ||
| ++i; | ||
| } | ||
| while (i < module->functions.size()) { | ||
| removed.push_back(module->functions[i++]->name); | ||
| } | ||
| if (tryToEmptyFunctions(removed)) { | ||
| // TODO: Consider doing this just once after the delta debugging since | ||
| // we never need to restore from the working copy while removing | ||
| // function bodies. | ||
| noteReduction(removed.size()); | ||
| return true; | ||
| } | ||
| return false; | ||
| }); | ||
| } | ||
|
|
||
| bool reduceFunctions() { | ||
| // try to remove functions | ||
| std::vector<Name> functionNames; | ||
|
|
@@ -936,11 +973,9 @@ struct Reducer | |
| } | ||
| std::cerr << "| trying at i=" << i << " of size " << names.size() | ||
| << "\n"; | ||
| // Try to remove functions and/or empty them. Note that | ||
| // tryToRemoveFunctions() will reload the module if it fails, which means | ||
| // function names may change - for that reason, run it second. | ||
| justReduced = tryToEmptyFunctions(names) || tryToRemoveFunctions(names); | ||
| if (justReduced) { | ||
| // Note that tryToRemoveFunctions() will reload the module if it fails, | ||
| // which means function names may change. | ||
| if (tryToRemoveFunctions(names)) { | ||
| noteReduction(names.size()); | ||
| // Subtract 1 since the loop increments us anyhow by one: we want to | ||
| // skip over the skipped functions, and not any more. | ||
|
|
@@ -967,8 +1002,11 @@ struct Reducer | |
| assert(curr == module.get()); | ||
| curr = nullptr; | ||
|
|
||
| reduceFunctionBodies(); | ||
|
|
||
| // Reduction of entire functions at a time is very effective, and we do it | ||
| // with exponential growth and backoff, so keep doing it while it works. | ||
| // TODO: Figure out how to use delta debugging for this as well. | ||
| while (reduceFunctions()) { | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| #include "support/delta_debugging.h" | ||
| #include "gtest/gtest.h" | ||
| #include <algorithm> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| using namespace wasm; | ||
|
|
||
| TEST(DeltaDebuggingTest, EmptyInput) { | ||
| std::vector<int> items; | ||
| auto result = deltaDebugging( | ||
| items, [](size_t, size_t, const std::vector<int>&) { return false; }); | ||
| EXPECT_TRUE(result.empty()); | ||
| } | ||
|
|
||
| TEST(DeltaDebuggingTest, SingleItem) { | ||
| std::vector<int> items = {0, 1, 2, 3, 4, 5, 6, 7}; | ||
| auto result = deltaDebugging( | ||
| items, [](size_t, size_t, const std::vector<int>& partition) { | ||
| return std::find(partition.begin(), partition.end(), 3) != | ||
| partition.end(); | ||
| }); | ||
| std::vector<int> expected = {3}; | ||
| EXPECT_EQ(result, expected); | ||
| } | ||
|
|
||
| TEST(DeltaDebuggingTest, MultipleItemsAdjacent) { | ||
| std::vector<int> items = {0, 1, 2, 3, 4, 5, 6, 7}; | ||
| auto result = deltaDebugging( | ||
| items, [](size_t, size_t, const std::vector<int>& partition) { | ||
| bool has2 = | ||
| std::find(partition.begin(), partition.end(), 2) != partition.end(); | ||
| bool has3 = | ||
| std::find(partition.begin(), partition.end(), 3) != partition.end(); | ||
| return has2 && has3; | ||
| }); | ||
| std::vector<int> expected = {2, 3}; | ||
| EXPECT_EQ(result, expected); | ||
| } | ||
|
|
||
| TEST(DeltaDebuggingTest, MultipleItemsNonAdjacent) { | ||
| std::vector<int> items = {0, 1, 2, 3, 4, 5, 6, 7}; | ||
| auto result = deltaDebugging( | ||
| items, [](size_t, size_t, const std::vector<int>& partition) { | ||
| bool has2 = | ||
| std::find(partition.begin(), partition.end(), 2) != partition.end(); | ||
| bool has5 = | ||
| std::find(partition.begin(), partition.end(), 5) != partition.end(); | ||
| return has2 && has5; | ||
| }); | ||
| std::vector<int> expected = {2, 5}; | ||
| EXPECT_EQ(result, expected); | ||
| } | ||
|
|
||
| TEST(DeltaDebuggingTest, OrderMaintained) { | ||
| std::vector<int> items = {3, 1, 4, 2}; | ||
| auto result = deltaDebugging( | ||
| items, [](size_t, size_t, const std::vector<int>& partition) { | ||
| bool has3 = | ||
| std::find(partition.begin(), partition.end(), 3) != partition.end(); | ||
| bool has2 = | ||
| std::find(partition.begin(), partition.end(), 2) != partition.end(); | ||
| return has3 && has2; | ||
| }); | ||
| std::vector<int> expected = {3, 2}; | ||
| EXPECT_EQ(result, expected); | ||
| } | ||
|
|
||
| TEST(DeltaDebuggingTest, DifferentTypes) { | ||
| std::vector<std::string> items = {"apple", "banana", "cherry", "date"}; | ||
| auto result = deltaDebugging( | ||
| items, [](size_t, size_t, const std::vector<std::string>& partition) { | ||
| bool hasBanana = | ||
| std::find(partition.begin(), partition.end(), "banana") != | ||
| partition.end(); | ||
| bool hasDate = std::find(partition.begin(), partition.end(), "date") != | ||
| partition.end(); | ||
| return hasBanana && hasDate; | ||
| }); | ||
| std::vector<std::string> expected = {"banana", "date"}; | ||
| EXPECT_EQ(result, expected); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: we could write this as a concept instead of a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will skip this one because this API will be refactored away in a follow-up.