Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 104 additions & 27 deletions libyul/backends/evm/ssa/StackShuffler.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ struct Target
{
Target(StackData const& _args, LivenessAnalysis::LivenessData const& _liveOut, std::size_t _targetSize);

auto argsRange() const
{
return ranges::views::iota(tailSize, size) | ranges::views::transform([](auto _i) { return StackOffset{_i}; });
}

StackData const& args;
LivenessAnalysis::LivenessData const& liveOut;
std::size_t const size;
Expand Down Expand Up @@ -258,7 +263,6 @@ class StackShuffler
// if there are no args, we should be done now
if (_state.target().args.empty())
return {StackShufflerResult::Status::Admissible};
yulAssert(_stack.size() == _state.target().size);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this not true anymore?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

previously, the fixArgsSlot couldn't refuse to grow and would, worst case, push junk. now it can refuse and ultimately yield NoAction in which case the invariant would be violated.


// check whether we are done
if (_state.admissible())
Expand Down Expand Up @@ -505,53 +509,126 @@ class StackShuffler
if (auto result = dupDeepSlotIfRequired(_stack, _state); result.status != ShuffleHelperResult::Status::NoAction)
return result;

// depth of an args-region offset `_o` after the next grow, clamped from below to zero
auto const depthFromNewTop = [&](StackOffset _o) -> std::size_t
{
if (_o.value >= _stack.size())
return 0u;
return _stack.offsetToDepth(_o).value + 1;
};
// duping up `_arg` only makes progress if some target args-region offset wanting `_arg` is still
// unfilled and reachable from the new top
auto const canReachSomeUnfilledTarget = [&](Slot const& _arg)
{
for (auto offset: _state.target().argsRange())
{
if (_state.targetArg(offset) != _arg)
continue;
if (_state.isArgsCompatible(offset, offset))
continue;
if (depthFromNewTop(offset) <= ReachableStackDepth)
return true;
}
return false;
};
// after the grow, every misaligned args-region offset below the new top must still
// be swap-reachable, otherwise we'd strand it
bool const prefixReachable = [&]
{
for (StackOffset p: _state.stackArgsRange())
if (!_state.isArgsCompatible(p, p) && depthFromNewTop(p) > ReachableStackDepth)
return false;
return true;
}();

// stricter gate for the fast path: every misaligned prefix offset must remain swap-reachable
// after the grow AND after one more placement op (so the prefix can still be fixed later)
bool const pushKeepsDeepPrefixReachable = [&]
{
for (StackOffset p: _state.stackArgsRange())
if (!_state.isArgsCompatible(p, p) && depthFromNewTop(p) + 1 > ReachableStackDepth)
return false;
return true;
}();

// fast path: if the arg the target wants at the new top is under-supplied, place it directly there.
// Skip when growing would push a misaligned prefix offset out of swap range for subsequent fixups;
// otherwise we'd push and then immediately shrink back, oscillating
if (pushKeepsDeepPrefixReachable)
{
// the slot we're about to create lives at the current stack size (i.e. the post-grow top)
StackOffset const targetOffset{_stack.size()};
if (_state.count(_state.targetArg(targetOffset)) < _state.targetMinCount(_state.targetArg(targetOffset)))
Slot const& targetArg = _state.targetArg(targetOffset);
if (
!targetArg.isJunk() &&
(
_state.count(targetArg) < _state.targetMinCount(targetArg) ||
_state.countInArgs(targetArg) < _state.targetArgsCount(targetArg)
)
)
{
auto const sourceDepth = _stack.findSlotDepth(_state.targetArg(targetOffset));
if (!sourceDepth)
// prefer duping an existing copy over generating a fresh one
if (auto sourceDepth = _stack.findSlotDepth(targetArg))
{
_stack.push(_state.targetArg(targetOffset));
if (_stack.dupReachable(*sourceDepth))
{
_stack.dup(*sourceDepth);
return {ShuffleHelperResult::Status::StackModified};
}
// existing copy is out of dup range; if we also can't regenerate it, bail
if (!_stack.canBeFreelyGenerated(targetArg))
return {ShuffleHelperResult::Status::StackTooDeep, targetArg};
}
// no reachable copy (or none at all), push a fresh instance if the slot allows it
if (_stack.canBeFreelyGenerated(targetArg))
{
_stack.push(targetArg);
return {ShuffleHelperResult::Status::StackModified};
}

if (!_stack.dupReachable(*sourceDepth))
return {ShuffleHelperResult::Status::StackTooDeep, _state.targetArg(targetOffset)};
_stack.dup(*sourceDepth);
return {ShuffleHelperResult::Status::StackModified};
}
}

// if we can't directly produce targetOffset, take the deepest arg that we don't have enough of and dup/push that
// First, prioritize duping args that are on the stack over pushing freely-generatable ones
for (StackOffset offset{_state.target().tailSize}; offset < _state.target().size; ++offset.value)
for (StackOffset const offset: _state.target().argsRange())
{
Slot const& arg = _state.targetArg(offset);
if (!arg.isJunk() && (_state.count(arg) < _state.targetMinCount(arg) || _state.countInArgs(arg) < _state.targetArgsCount(arg)))
if (arg.isJunk())
continue;
if (_state.count(arg) >= _state.targetMinCount(arg) && _state.countInArgs(arg) >= _state.targetArgsCount(arg))
continue;
// If none of this arg's unfilled targets are reachable from the new top, growing for it
// won't help — skip and let another arg or a later step (e.g. the tail loop) handle it.
if (!canReachSomeUnfilledTarget(arg))
continue;
if (auto sourceDepth = _stack.findSlotDepth(arg))
{
if (auto sourceDepth = _stack.findSlotDepth(arg))
if (_stack.dupReachable(*sourceDepth))
{
if (_stack.dupReachable(*sourceDepth))
{
_stack.dup(*sourceDepth);
return {ShuffleHelperResult::Status::StackModified};
}
if (!_stack.canBeFreelyGenerated(arg))
return {ShuffleHelperResult::Status::StackTooDeep, arg};
_stack.dup(*sourceDepth);
return {ShuffleHelperResult::Status::StackModified};
}
yulAssert(_stack.canBeFreelyGenerated(arg));
_stack.push(arg);
return {ShuffleHelperResult::Status::StackModified};
if (!_stack.canBeFreelyGenerated(arg))
return {ShuffleHelperResult::Status::StackTooDeep, arg};
}
if (!prefixReachable)
continue;
yulAssert(_stack.canBeFreelyGenerated(arg));
_stack.push(arg);
return {ShuffleHelperResult::Status::StackModified};
}

// Try to dup the optimal slot based on liveness analysis
if (auto slotToDup = selectOptimalSlotToDup(_stack, _state))
{
_stack.dup(*slotToDup);
else
// If no suitable slot found, push junk
_stack.push(Slot::makeJunk());
return {ShuffleHelperResult::Status::StackModified};
}
// Growing would strand a misaligned prefix slot
if (!prefixReachable)
return {ShuffleHelperResult::Status::NoAction};
// If no suitable slot found, push junk
_stack.push(Slot::makeJunk());
return {ShuffleHelperResult::Status::StackModified};
}

Expand Down Expand Up @@ -792,7 +869,7 @@ class StackShuffler
static std::optional<StackOffset> allNecessarySlotsReachableOrFinal(Stack<Callback> const& _stack, detail::State const& _state)
{
// check that args are either in position or reachable
for (StackOffset offset{_state.target().tailSize}; offset < _state.target().size; ++offset.value)
for (StackOffset const offset: _state.target().argsRange())
{
if (_state.isArgsCompatible(offset, offset))
continue;
Expand Down
6 changes: 3 additions & 3 deletions test/libsolidity/semanticTests/externalContracts/base64.sol
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ contract test {
// gas legacy code: 629800
// gas legacyOptimized: 87926
// gas legacyOptimized code: 429800
// gas ssaCFGOptimized: 79931
// gas ssaCFGOptimized code: 332600
// gas ssaCFGOptimized: 79883
// gas ssaCFGOptimized code: 332000
// encode_inline_asm(bytes): 0x20, 0 -> 0x20, 0
// encode_inline_asm(bytes): 0x20, 1, "f" -> 0x20, 4, "Zg=="
// encode_inline_asm(bytes): 0x20, 2, "fo" -> 0x20, 4, "Zm8="
Expand All @@ -66,4 +66,4 @@ contract test {
// gas irOptimized: 3512081
// gas legacy: 4600082
// gas legacyOptimized: 2813075
// gas ssaCFGOptimized: 3089081
// gas ssaCFGOptimized: 3078081
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ contract DepositContract is IDepositContract, ERC165 {
// gas legacy code: 1438800
// gas legacyOptimized: 848699
// gas legacyOptimized code: 878200
// gas ssaCFGOptimized: 809663
// gas ssaCFGOptimized code: 570000
// gas ssaCFGOptimized: 809667
// gas ssaCFGOptimized code: 570200
// supportsInterface(bytes4): 0x0 -> 0
// supportsInterface(bytes4): 0xffffffff00000000000000000000000000000000000000000000000000000000 -> false # defined to be false by ERC-165 #
// supportsInterface(bytes4): 0x01ffc9a700000000000000000000000000000000000000000000000000000000 -> true # ERC-165 id #
Expand All @@ -195,28 +195,28 @@ contract DepositContract is IDepositContract, ERC165 {
// gas irOptimized: 109178
// gas legacy: 142741
// gas legacyOptimized: 117558
// gas ssaCFGOptimized: 109748
// gas ssaCFGOptimized: 109751
// get_deposit_count() -> 0x20, 8, 0 # TODO: check balance and logs after each deposit #
// deposit(bytes,bytes,bytes,bytes32), 32 ether: 0 -> FAILURE # Empty input #
// get_deposit_root() -> 0xd70a234731285c6804c2a4f56711ddb8c82c99740f207854891028af34e27e5e
// gas irOptimized: 109178
// gas legacy: 142741
// gas legacyOptimized: 117558
// gas ssaCFGOptimized: 109748
// gas ssaCFGOptimized: 109751
// get_deposit_count() -> 0x20, 8, 0
// deposit(bytes,bytes,bytes,bytes32), 1 ether: 0x80, 0xe0, 0x120, 0xaa4a8d0b7d9077248630f1a4701ae9764e42271d7f22b7838778411857fd349e, 0x30, 0x933ad9491b62059dd065b560d256d8957a8c402cc6e8d8ee7290ae11e8f73292, 0x67a8811c397529dac52ae1342ba58c9500000000000000000000000000000000, 0x20, 0x00f50428677c60f997aadeab24aabf7fceaef491c96a52b463ae91f95611cf71, 0x60, 0xa29d01cc8c6296a8150e515b5995390ef841dc18948aa3e79be6d7c1851b4cbb, 0x5d6ff49fa70b9c782399506a22a85193151b9b691245cebafd2063012443c132, 0x4b6c36debaedefb7b2d71b0503ffdc00150aaffd42e63358238ec888901738b8 -> # txhash: 0x7085c586686d666e8bb6e9477a0f0b09565b2060a11f1c4209d3a52295033832 #
// ~ emit DepositEvent(bytes,bytes,bytes,bytes,bytes): 0xa0, 0x0100, 0x0140, 0x0180, 0x0200, 0x30, 0x933ad9491b62059dd065b560d256d8957a8c402cc6e8d8ee7290ae11e8f73292, 0x67a8811c397529dac52ae1342ba58c9500000000000000000000000000000000, 0x20, 0xf50428677c60f997aadeab24aabf7fceaef491c96a52b463ae91f95611cf71, 0x08, 0xca9a3b00000000000000000000000000000000000000000000000000000000, 0x60, 0xa29d01cc8c6296a8150e515b5995390ef841dc18948aa3e79be6d7c1851b4cbb, 0x5d6ff49fa70b9c782399506a22a85193151b9b691245cebafd2063012443c132, 0x4b6c36debaedefb7b2d71b0503ffdc00150aaffd42e63358238ec888901738b8, 0x08, 0x00
// get_deposit_root() -> 0x2089653123d9c721215120b6db6738ba273bbc5228ac093b1f983badcdc8a438
// gas irOptimized: 109174
// gas legacy: 142750
// gas legacyOptimized: 117570
// gas ssaCFGOptimized: 109744
// gas ssaCFGOptimized: 109747
// get_deposit_count() -> 0x20, 8, 0x0100000000000000000000000000000000000000000000000000000000000000
// deposit(bytes,bytes,bytes,bytes32), 32 ether: 0x80, 0xe0, 0x120, 0xdbd986dc85ceb382708cf90a3500f500f0a393c5ece76963ac3ed72eccd2c301, 0x30, 0xb2ce0f79f90e7b3a113ca5783c65756f96c4b4673c2b5c1eb4efc22280259441, 0x06d601211e8866dc5b50dc48a244dd7c00000000000000000000000000000000, 0x20, 0x00344b6c73f71b11c56aba0d01b7d8ad83559f209d0a4101a515f6ad54c89771, 0x60, 0x945caaf82d18e78c033927d51f452ebcd76524497b91d7a11219cb3db6a1d369, 0x7595fc095ce489e46b2ef129591f2f6d079be4faaf345a02c5eb133c072e7c56, 0x0c6c3617eee66b4b878165c502357d49485326bc6b31bc96873f308c8f19c09d -> # txhash: 0x404d8e109822ce448e68f45216c12cb051b784d068fbe98317ab8e50c58304ac #
// ~ emit DepositEvent(bytes,bytes,bytes,bytes,bytes): 0xa0, 0x0100, 0x0140, 0x0180, 0x0200, 0x30, 0xb2ce0f79f90e7b3a113ca5783c65756f96c4b4673c2b5c1eb4efc22280259441, 0x06d601211e8866dc5b50dc48a244dd7c00000000000000000000000000000000, 0x20, 0x344b6c73f71b11c56aba0d01b7d8ad83559f209d0a4101a515f6ad54c89771, 0x08, 0x40597307000000000000000000000000000000000000000000000000000000, 0x60, 0x945caaf82d18e78c033927d51f452ebcd76524497b91d7a11219cb3db6a1d369, 0x7595fc095ce489e46b2ef129591f2f6d079be4faaf345a02c5eb133c072e7c56, 0x0c6c3617eee66b4b878165c502357d49485326bc6b31bc96873f308c8f19c09d, 0x08, 0x0100000000000000000000000000000000000000000000000000000000000000
// get_deposit_root() -> 0x40255975859377d912c53aa853245ebd939bdd2b33a28e084babdcc1ed8238ee
// gas irOptimized: 109174
// gas legacy: 142750
// gas legacyOptimized: 117570
// gas ssaCFGOptimized: 109744
// gas ssaCFGOptimized: 109747
// get_deposit_count() -> 0x20, 8, 0x0200000000000000000000000000000000000000000000000000000000000000
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ contract test {
// gas legacy code: 523600
// gas legacyOptimized: 82667
// gas legacyOptimized code: 369200
// gas ssaCFGOptimized: 77956
// gas ssaCFGOptimized code: 313400
// gas ssaCFGOptimized: 77804
// gas ssaCFGOptimized code: 311200
// prb_pi() -> 3141592656369545286
// gas irOptimized: 55036
// gas legacy: 100657
Expand Down
4 changes: 2 additions & 2 deletions test/libsolidity/semanticTests/externalContracts/snark.sol
Original file line number Diff line number Diff line change
Expand Up @@ -297,10 +297,10 @@ contract Test {
// gas irOptimized: 275229
// gas legacy: 293579
// gas legacyOptimized: 276313
// gas ssaCFGOptimized: 275795
// gas ssaCFGOptimized: 275791
// verifyTx() -> true
// ~ emit Verified(string): 0x20, 0x16, "Successfully verified."
// gas irOptimized: 818076
// gas legacy: 904397
// gas legacyOptimized: 816770
// gas ssaCFGOptimized: 820130
// gas ssaCFGOptimized: 820120
6 changes: 3 additions & 3 deletions test/libsolidity/semanticTests/externalContracts/strings.sol
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ contract test {
// gas legacy code: 932600
// gas legacyOptimized: 102639
// gas legacyOptimized code: 612400
// gas ssaCFGOptimized: 96160
// gas ssaCFGOptimized code: 532200
// gas ssaCFGOptimized: 95989
// gas ssaCFGOptimized code: 530000
// toSlice(string): 0x20, 11, "hello world" -> 11, 0xa0
// gas irOptimized: 22646
// gas legacy: 23168
Expand All @@ -80,4 +80,4 @@ contract test {
// gas irOptimized: 1976778
// gas legacy: 4234020
// gas legacyOptimized: 2318668
// gas ssaCFGOptimized: 1882355
// gas ssaCFGOptimized: 1841235
6 changes: 3 additions & 3 deletions test/libyul/ssa/stackLayoutGenerator/nested_for.yul
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,11 @@
// Block0_3 [label="\
// IN: [phi0, phi3, JUNK]\l\
// \l\
// [JUNK, phi3, lit2, phi0]\l\
// [phi3, lit2, phi0]\l\
// add\l\
// [JUNK, phi3, v5]\l\
// [phi3, v5]\l\
// \l\
// OUT: [JUNK, phi3, v5]\l\
// OUT: [phi3, v5]\l\
// "];
// Block0_3 -> Block0_3Exit [arrowhead=none];
// Block0_3Exit [label="Jump" shape=oval];
Expand Down
35 changes: 7 additions & 28 deletions test/libyul/ssa/stackShuffler/deep_values_grow_to_args.stack
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,12 @@ targetStackTop: [JUNK, v2, JUNK, JUNK, JUNK, JUNK, JUNK, JUNK, JUNK, JUNK, JUNK,
// POP| * * * * * * * * * * * * * v1 v2
// PUSH lit2| * * * * * * * * * * * * * v1 v2 lit2
// DUP2| * * * * * * * * * * * * * v1 v2 lit2 v2
// PUSH lit3| * * * * * * * * * * * * * v1 v2 lit2 v2 lit3
// DUP2| * * * * * * * * * * * * * v1 v2 lit2 v2 lit3 v2
// PUSH lit1| * * * * * * * * * * * * * v1 v2 lit2 v2 lit3 v2 lit1
// SWAP6| * * * * * * * * * * * * * lit1 v2 lit2 v2 lit3 v2 v1
// SWAP1| * * * * * * * * * * * * * lit1 v2 lit2 v2 lit3 v1 v2
// SWAP7| * * * * * * * * * * * * v2 lit1 v2 lit2 v2 lit3 v1 *
// POP| * * * * * * * * * * * * v2 lit1 v2 lit2 v2 lit3 v1
// DUP3| * * * * * * * * * * * * v2 lit1 v2 lit2 v2 lit3 v1 v2
// SWAP8| * * * * * * * * * * * v2 v2 lit1 v2 lit2 v2 lit3 v1 *
// POP| * * * * * * * * * * * v2 v2 lit1 v2 lit2 v2 lit3 v1
// DUP3| * * * * * * * * * * * v2 v2 lit1 v2 lit2 v2 lit3 v1 v2
// SWAP9| * * * * * * * * * * v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 *
// POP| * * * * * * * * * * v2 v2 v2 lit1 v2 lit2 v2 lit3 v1
// DUP3| * * * * * * * * * * v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 v2
// SWAP10| * * * * * * * * * v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 *
// POP| * * * * * * * * * v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1
// DUP3| * * * * * * * * * v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 v2
// SWAP11| * * * * * * * * v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 *
// POP| * * * * * * * * v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1
// DUP3| * * * * * * * * v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 v2
// SWAP12| * * * * * * * v2 v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 *
// POP| * * * * * * * v2 v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1
// DUP3| * * * * * * * v2 v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 v2
// SWAP13| * * * * * * v2 v2 v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 *
// POP| * * * * * * v2 v2 v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1
// DUP3| * * * * * * v2 v2 v2 v2 v2 v2 v2 lit1 v2 lit2 v2 lit3 v1 v2
// ...|
// DUP1| * * * * * * * * * * * * * v1 v2 lit2 v2 v2
// SWAP16| * v2 * * * * * * * * * * * v1 v2 lit2 v2 *
// PUSH lit1| * v2 * * * * * * * * * * * v1 v2 lit2 v2 * lit1
// SWAP5| * v2 * * * * * * * * * * * lit1 v2 lit2 v2 * v1
// PUSH lit3| * v2 * * * * * * * * * * * lit1 v2 lit2 v2 * v1 lit3
// SWAP2| * v2 * * * * * * * * * * * lit1 v2 lit2 v2 lit3 v1 *
// +--------------------------------------------------------------------------------------------------------------------------------------------
// (target)| * v2 * * * * * * * * * * * lit1 v2 lit2 v2 lit3 v1 *
// Status: MaxIterationsReached
// Status: Admissible
Loading