Challenge | Remove duplicate elements |
---|---|

Submitter | zac.creditmint.eth |

Submitted at | 2018-53-27 |

Gas used | 114967 |

```
pragma solidity ^0.4.23;
/**
* This file is part of the 1st Solidity Gas Golfing Contest.
*
* Author: Zachary Williamson
*
* This work is licensed under Creative Commons Attribution ShareAlike 3.0.
* https://creativecommons.org/licenses/by-sa/3.0/
*/
contract Unique {
/**
* @dev Removes all but the first occurrence of each element from a list of
* integers, preserving the order of original elements, and returns the list.
*
* The input list may be of any length.
*
* @return The input list, with any duplicate elements removed.
*/
// It's a bloom filter! When 'hmm, probably' is a good substitute for 'definitely'
// ...which in a real-world smart contract seems unlikely. But hey, this is code golf!
// In all seriousness, a bloom filter would serve as a good 'first pass' to validate
// set membership: if a 100% set membership test is expensive the bloom filter will
// limit the number of instances where the expensive test is required.
// A real-world example of this would be if the data to be iterated over is in storage, not
// memory, where sload costs 200 gas. A small bloom filter could be used which would be much cheaper to recall
// NB. My previous evm-optimized entry had a 'minor' bug where I forgot to label
// the uniquify function as 'external'. As a result the calldata was being dumped into
// memory at the indices of my bloom filter, filling it up with junk.
// I thought it was odd that I needed a filter length about 10x larger than I calculated for...
// probability of a hash collision = (1-e^(-kn/m))^k
// k = number of hash functions
// n = number of elements in set
// m = bit-length of bloom filter
// assuming n tops out at around 256 (although...)
// then m = 32*256 bits and k=4 gives probability p of 0.002%
// the value of k seems to be the overriding gas guzzler despite my best efforts
// setting k=1 and having a larger bitfield (0x40 machine words: 2096 bytes, 2^21 bits)
// seems to be the most gas-efficient way of getting an acceptably low p-value
// Variable declarations
// w = value of input array element
// h = 'hash' of w
// i = memory index of bloom filter limb being examined
// b = 256-bit value that we're comparing with bloom filter limb
// r = updated representation of bloom filter limb
// s = current calldata pointer
// l = length of output array in bytes
function uniquify(uint[]) external view returns(uint[]) {
assembly {
// first, let's check there's actually some data to operate on
0x24 calldataload has_data jumpi
0x40 0x00 return
has_data:
0x44
dup1 calldataload
dup2 0x20 add calldataload
test_run:
lt input_is_ordered jumpi
jump(has_non_trivial_structure)
input_is_ordered:
0x20 add
dup1 calldataload
dup2 0x20 add calldataload
dup1 dup3 or test_run jumpi
calldatacopy(0, 0x04, calldatasize)
return(0x00, sub(calldatasize, 0x04))
// if we reach here, all unique
has_non_trivial_structure:
pop
// Push the calldata pointer onto the stack. First array element will be at index 0x44
0x44
// Create the hash table: converts a 8-bit key into a 256-bit
// value. Only one bit is set high and there are 256 unique
// permutations in the lookup table
1 0x0 mstore
2 0x20 mstore
4 0x40 mstore
8 0x60 mstore
16 0x80 mstore
32 0xa0 mstore
64 0xc0 mstore
128 0xe0 mstore
// We want to use 'msize' as our pointer to the next element in our
// output array. It's self-incrementing, so we don't need to call
// '0x20 add' every iteration. It also costs 2 gas, as opposed to
// duplicating a stack-based pointer which costs 3 gas.
// Reducing the stack depth also removes the need for 1 swap op (3 gas),
// as we would otherwise need to increment both the output array pointer
// and the calldata pointer, which requires a swap
// Total gas saving: 10 gas per iteration
// in order to do this, we store data in a word that is one word after the
// reserved bloom filter memory.
// We use the memory from 0x100 to 0x900 to store our bloom filter,
// which is 64 machine words. Having a value that is a power of 2 allows for
// very cheap indexing in our main loop, which is worth the extra gas costs of a larger filter
0x01 0x500 mstore
// ### MAIN LOOP
// We know there's at least one array element, so fall into the loop
loop_start:
dup1 calldataload // stack state: v
0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 mul // stack state: h s
dup1 0x3e0 and 0x100 add // stack state: i h s
swap1 28 byte mload // stack state: b i s
dup2 mload dup2 and skip_add_to_set jumpi
dup3 calldataload msize mstore
dup2 mload or // stack state: r i s
swap1 mstore // stack state: s
0x20 add // stack state: s'
// 2nd iteration
dup1 calldataload // stack state: v
0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 mul // stack state: h s
dup1 0x3e0 and 0x100 add // stack state: i h s
swap1 28 byte mload // stack state: b i s
dup2 mload dup2 and skip_add_to_set jumpi
dup3 calldataload msize mstore
dup2 mload or // stack state: r i s
swap1 mstore // stack state: s
0x20 add // stack state: s'
calldatasize dup2 lt loop_start jumpi
0x20 0x4e0 mstore // stack state: s
0x520 msize sub // stack state: l s
0x20 dup2 div 0x500 mstore // stack state: l s
0x40 add 0x4e0 return
skip_add_to_set:
pop pop
0x20 add
calldatasize dup2 lt loop_start jumpi
0x20 0x4e0 mstore // stack state:
0x520 msize sub // stack state: l
0x20 dup2 div 0x500 mstore // stack state: l
0x40 add 0x4e0 return
// the variable number of 'pop' instructions in the main loop upsets the compiler, poor thing
// pop a stack variable to prevent it from throwing errors
pop
}
}
}
```