Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Language Features:

Compiler Features:
* EVM-ASM Optimizer: Improve performance of block deduplicator.

Bugfixes:
* NatSpec: Disallow `@return` tag in event documentation.
Expand Down
16 changes: 16 additions & 0 deletions libevmasm/AssemblyItem.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
#include <libsolutil/Common.h>
#include <libsolutil/Numeric.h>
#include <libsolutil/Assertions.h>

#include <boost/container_hash/hash.hpp>

#include <optional>
#include <iostream>
#include <sstream>
Expand Down Expand Up @@ -232,6 +235,19 @@ class AssemblyItem
return data() == _other.data();
}
bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); }
/// Hash function compatible with `operator==`. Found via ADL by `boost::hash`.
friend std::size_t hash_value(AssemblyItem const& _item)
{
std::size_t hash = 0;
boost::hash_combine(hash, static_cast<int>(_item.m_type));
if (_item.m_type == Operation)
boost::hash_combine(hash, static_cast<int>(_item.instruction()));
else if (_item.m_type == VerbatimBytecode)
boost::hash_combine(hash, *_item.m_verbatimBytecode);
else
boost::hash_combine(hash, _item.data());
return hash;
}
Comment on lines +238 to +250
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks OK, I would just move it somewhere else.
Placing it between operator!= and operator< does not seem like the best choice.

Copy link
Copy Markdown
Member Author

@clonker clonker May 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I placed it here so it's evident that the implementation follows the one of the equality operator. Where do you think it should live?

/// Less-than operator compatible with operator==.
bool operator<(AssemblyItem const& _other) const
{
Expand Down
90 changes: 47 additions & 43 deletions libevmasm/BlockDeduplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,69 +27,73 @@
#include <libevmasm/AssemblyItem.h>
#include <libevmasm/SemanticInformation.h>

#include <functional>
#include <set>
#include <boost/container_hash/hash.hpp>

#include <range/v3/algorithm/any_of.hpp>
#include <range/v3/algorithm/equal.hpp>

#include <unordered_set>

using namespace solidity;
using namespace solidity::evmasm;


bool BlockDeduplicator::deduplicate()
{
// Compares indices based on the suffix that starts there, ignoring tags and stopping at
// opcodes that stop the control flow.
// Group basic blocks by a content hash and dedup within each bucket.
// The hash and equality both walk a BlockIterator that ignores tags and stops at
// opcodes that terminate control flow, replacing the block's own self-push by a
// virtual tag so that recursive loops match.

// Virtual tag that signifies "the current block" and which is used to optimise loops.
// Virtual tag that signifies "the current block" and which is used to optimize loops.
// We abort if this virtual tag actually exists.
AssemblyItem pushSelf{PushTag, u256(-4)};
if (
std::count(m_items.cbegin(), m_items.cend(), pushSelf.tag()) ||
std::count(m_items.cbegin(), m_items.cend(), pushSelf.pushTag())
)
return false;

std::function<bool(size_t, size_t)> comparator = [&](size_t _i, size_t _j)
AssemblyItem const pushSelf{PushTag, u256(-4)};
{
if (_i == _j)
AssemblyItem const selfTag = pushSelf.tag();
AssemblyItem const selfPushTag = pushSelf.pushTag();
if (ranges::any_of(m_items, [&](AssemblyItem const& _item) { return _item == selfTag || _item == selfPushTag; }))
return false;
}

// To compare recursive loops, we have to already unify PushTag opcodes of the
// block's own tag.
AssemblyItem pushFirstTag{pushSelf};
AssemblyItem pushSecondTag{pushSelf};

if (_i < m_items.size() && m_items.at(_i).type() == Tag)
pushFirstTag = m_items.at(_i).pushTag();
if (_j < m_items.size() && m_items.at(_j).type() == Tag)
pushSecondTag = m_items.at(_j).pushTag();

using diff_type = BlockIterator::difference_type;
BlockIterator first{m_items.begin() + diff_type(_i), m_items.end(), &pushFirstTag, &pushSelf};
BlockIterator second{m_items.begin() + diff_type(_j), m_items.end(), &pushSecondTag, &pushSelf};
BlockIterator end{m_items.end(), m_items.end()};
BlockIterator const end{m_items.end(), m_items.end()};

if (first != end && (*first).type() == Tag)
++first;
if (second != end && (*second).type() == Tag)
++second;
// yields a block iterator into the body of a block (skips `Tag` typed assembly items at `_blockBegin`)
auto const blockBodyBegin = [&](std::size_t const _blockBegin, AssemblyItem const& _selfTagPush)
{
BlockIterator it{
m_items.begin() + static_cast<BlockIterator::difference_type>(_blockBegin),
m_items.end(),
&_selfTagPush,
&pushSelf
};
if (it != end && (*it).type() == Tag)
++it;
return it;
};

return std::lexicographical_compare(first, end, second, end);
auto const hashBlockAt = [&](std::size_t const _i)
{
return boost::hash_range(blockBodyBegin(_i, m_items[_i].pushTag()), end);
};
auto const blocksAtEqual = [&](std::size_t const _i, std::size_t const _j)
{
return ranges::equal(
blockBodyBegin(_i, m_items[_i].pushTag()), end,
blockBodyBegin(_j, m_items[_j].pushTag()), end
);
};

size_t iterations = 0;
std::size_t iterations = 0;
for (; ; ++iterations)
{
//@todo this should probably be optimized.
std::set<size_t, std::function<bool(size_t, size_t)>> blocksSeen(comparator);
for (size_t i = 0; i < m_items.size(); ++i)
std::unordered_set<std::size_t, decltype(hashBlockAt), decltype(blocksAtEqual)> seen(0, hashBlockAt, blocksAtEqual);
for (std::size_t i = 0; i < m_items.size(); ++i)
{
if (m_items.at(i).type() != Tag)
if (m_items[i].type() != Tag)
continue;
auto it = blocksSeen.find(i);
if (it == blocksSeen.end())
blocksSeen.insert(i);
else
m_replacedTags[m_items.at(i).data()] = m_items.at(*it).data();
auto const [it, inserted] = seen.insert(i);
if (!inserted)
m_replacedTags[m_items[i].data()] = m_items[*it].data();
}

if (!applyTagReplacement(m_items, m_replacedTags))
Expand Down
6 changes: 4 additions & 2 deletions libevmasm/BlockDeduplicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class BlockDeduplicator
using difference_type = std::ptrdiff_t;
using pointer = AssemblyItem const*;
using reference = AssemblyItem const&;
BlockIterator() = default;
Comment thread
blishko marked this conversation as resolved.
BlockIterator(
AssemblyItems::const_iterator _it,
AssemblyItems::const_iterator _end,
Expand All @@ -83,13 +84,14 @@ class BlockDeduplicator
):
it(_it), end(_end), replaceItem(_replaceItem), replaceWith(_replaceWith) {}
BlockIterator& operator++();
BlockIterator operator++(int) { auto tmp = *this; ++*this; return tmp; }
bool operator==(BlockIterator const& _other) const { return it == _other.it; }
bool operator!=(BlockIterator const& _other) const { return it != _other.it; }
AssemblyItem const& operator*() const;
AssemblyItems::const_iterator it;
AssemblyItems::const_iterator end;
AssemblyItem const* replaceItem;
AssemblyItem const* replaceWith;
AssemblyItem const* replaceItem = nullptr;
AssemblyItem const* replaceWith = nullptr;
};

std::map<u256, u256> m_replacedTags;
Expand Down