1 year ago · 6a660d3edd
--- a/SemSEpaper/exercises.pdf
+++ b/SemSEpaper/exercises.pdf
--- a/SemSEpaper/exercises.tex
+++ b/SemSEpaper/exercises.tex
 
															
															 \definecolor{codegray}{rgb}{0.5,0.5,0.5}
														
 
															
															 \definecolor{codepurple}{rgb}{0.58,0,0.82}
														
 
															
															 \definecolor{backcolour}{rgb}{0.95,0.95,0.92}
														
 
															
															+\definecolor{verylightgray}{rgb}{.97,.97,.97}
														
 
															
															+
														
 
															
															+\lstdefinelanguage{Solidity}{
														
 
															
															+	keywords=[1]{anonymous, assembly, assert, balance, break, call, callcode, case, catch, class, constant, continue, constructor, contract, debugger, default, delegatecall, delete, do, else, emit, event, experimental, export, external, false, finally, for, function, gas, if, implements, import, in, indexed, instanceof, interface, internal, is, length, library, log0, log1, log2, log3, log4, memory, modifier, new, payable, pragma, private, protected, public, pure, push, require, return, returns, revert, selfdestruct, send, solidity, storage, struct, suicide, super, switch, then, this, throw, transfer, true, try, typeof, using, value, view, while, with, addmod, ecrecover, keccak256, mulmod, ripemd160, sha256, sha3}, % generic keywords including crypto operations
														
 
															
															+	keywordstyle=[1]\color{blue}\bfseries,
														
 
															
															+	keywords=[2]{address, bool, byte, bytes, bytes1, bytes2, bytes3, bytes4, bytes5, bytes6, bytes7, bytes8, bytes9, bytes10, bytes11, bytes12, bytes13, bytes14, bytes15, bytes16, bytes17, bytes18, bytes19, bytes20, bytes21, bytes22, bytes23, bytes24, bytes25, bytes26, bytes27, bytes28, bytes29, bytes30, bytes31, bytes32, enum, int, int8, int16, int24, int32, int40, int48, int56, int64, int72, int80, int88, int96, int104, int112, int120, int128, int136, int144, int152, int160, int168, int176, int184, int192, int200, int208, int216, int224, int232, int240, int248, int256, mapping, string, uint, uint8, uint16, uint24, uint32, uint40, uint48, uint56, uint64, uint72, uint80, uint88, uint96, uint104, uint112, uint120, uint128, uint136, uint144, uint152, uint160, uint168, uint176, uint184, uint192, uint200, uint208, uint216, uint224, uint232, uint240, uint248, uint256, var, void, ether, finney, szabo, wei, days, hours, minutes, seconds, weeks, years},	% types; money and time units
														
 
															
															+	keywordstyle=[2]\color{teal}\bfseries,
														
 
															
															+	keywords=[3]{block, blockhash, coinbase, difficulty, gaslimit, number, timestamp, msg, data, gas, sender, sig, value, now, tx, gasprice, origin},	% environment variables
														
 
															
															+	keywordstyle=[3]\color{violet}\bfseries,
														
 
															
															+	identifierstyle=\color{black},
														
 
															
															+	sensitive=true,
														
 
															
															+	comment=[l]{//},
														
 
															
															+	morecomment=[s]{/*}{*/},
														
 
															
															+	commentstyle=\color{gray}\ttfamily,
														
 
															
															+	stringstyle=\color{red}\ttfamily,
														
 
															
															+	morestring=[b]',
														
 
															
															+	morestring=[b]"
														
 
															
															+}
														
 
															
															+
														
 
															
															+\lstset{
														
 
															
															+	language=Solidity,
														
 
															
															+	backgroundcolor=\color{verylightgray},
														
 
															
															+	extendedchars=true,
														
 
															
															+	basicstyle=\footnotesize\ttfamily,
														
 
															
															+	showstringspaces=false,
														
 
															
															+	showspaces=false,
														
 
															
															+	numbers=left,
														
 
															
															+	numberstyle=\footnotesize,
														
 
															
															+	numbersep=9pt,
														
 
															
															+	tabsize=2,
														
 
															
															+	breaklines=true,
														
 
															
															+	showtabs=false,
														
 
															
															+	captionpos=b
														
 
															
															+}
														
 
															
															 \lstdefinestyle{mystyle}{
														
 
															
															 	backgroundcolor=\color{backcolour},
														
 
															
															 \maketitle
														
 
															
															+\begin{abstract}
														
 
															
															+	This paper outlines different forms of the common smart contract weakness with the SWC number 124, commonly referred to as "Write to Arbitrary Storage Location". While this paper focuses on applications within the context of Ethereum's EVM and higher-level language Solidity, we will also briefly touch on other research that deals with the Hyperledger Fabric environment. We will begin with a gentle introduction to the Solidity storage layout design that allows this weakness to occur, followed by common forms of exploit, alongside their associated consequences. Finally, we will outline the code characteristics that are detectable by automated tools as well as an exploit sketch.
														
 
															
															+\end{abstract}
														
 
															
															+
														
 
															
															 \section{Weakness and consequences}
														
 
															
															 \subsection{Solidity storage layout}
														
 
															
															 A trivial example of such a vulnerable write operation is shown in Algorithm~\ref{alg:vuln-write}.
														
 
															
															-\lstset{style=mystyle}
														
 
															
															+
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															+	\begin{lstlisting}[language=Solidity]
														
 
															
															 	pragma solidity 0.4.25;
														
 
															
															 	contract MyContract {
														
 
															
															 \medspace
														
 
															
															-\lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															+	\begin{lstlisting}[language=Solidity]
														
 
															
															 	pragma solidity 0.4.25;
														
 
															
															 	contract MyContract {
														
 
															
															   \label{alg:pop-incorrect}
														
 
															
															 \end{algorithm}
														
 
															
															-Another weakness that allows arbitrary storage access is unchecked assembly code. Assembly is a powerful tool that allows the developers to get as close to the EVM as they can,
														
 
															
															-but it may also be very dangerous when not tested correctly. As per the documentation\footnote{\url{https://docs.soliditylang.org/en/latest/assembly.html}, accessed: Oct. 30th 2023}: \textit{"this [inline assembly]
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															+Another weakness that allows arbitrary storage access is unchecked assembly code. Assembly is a powerful tool that allows the developers to get as close to the EVM as they can, but it may also be very dangerous when not used correctly. As per the documentation\footnote{\url{https://docs.soliditylang.org/en/latest/assembly.html}, accessed: Oct. 30th 2023}: \textit{"this [inline assembly]
														
 
															
															 bypasses important safety features and checks of Solidity. You should only use it for tasks that need it, and only if you are confident with using it."}
														
 
															
															-When given access to such lowlevel structures, a programmer can built-in not only weaknesses similar to the ones described previously, but also others, such as overwriting map locations,
														
 
															
															-contract variables etc.
														
 
															
															+When given access to such low-level instructions, a programmer can construct not only weaknesses similar to the ones described previously, but also others, such as overwriting map locations, contract variables etc.
														
 
															
															 An example for such a weakness is given in Algorithm~\ref{alg:unchecked-assembly}.
														
 
															
															 \medspace
														
 
															
															-\lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															+	\begin{lstlisting}[language=Solidity]
														
 
															
															 	pragma solidity 0.4.25;
														
 
															
															 	contract MyContract {
														
 
															
															   \label{alg:unchecked-assembly}
														
 
															
															 \end{algorithm}
														
 
															
															-The contract has a manager mapping, which should be used as a stack.
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															+The contract has a manager mapping, which is intended to be used as a stack.
														
 
															
															 The developer has added the \texttt{setNextManager} function, which should set the top of the stack to the latest user as a manager.
														
 
															
															-The issue is that the function is implemented in such a way, that the stack would not grow, but the first element would always be overwritten - this arises from the fact that the memory slot
														
 
															
															+The issue is that the function is implemented in such a way, that the stack does not grow, but the first element is always overwritten - this arises from the fact that the memory slot
														
 
															
															 of the managers mapping does not point to the memory address on the top of the stack, but instead to the base of it.
														
 
															
															 The function is then using this slot address directly, without calculating any offset, overwriting the base of the stack. If social engineeering is applied, an attacker can persuade the
														
 
															
															 owner to set them as a manager, which would result in the weakness being exploited directly and the owner giving up their own management rights.
														
 
															
															+
														
 
															
															 \subsection{Consequences}
														
 
															
															 The consequences of exploiting an arbitrary storage access weakness can be of different types and severity.
														
 
															
															 According to Li Duan et al.~\cite{multilayer}, an attacker may also be able to destroy the contract storage structure and thus cause
														
 
															
															 unexpected program flow, abnormal function execution or contract freeze.
														
 
															
															+\subsection{Similar yet safe code example}
														
 
															
															+
														
 
															
															+Using dynamic arrays is naturally not inherently dangerous, as long as they're used properly. The following version of Algorithm~\ref{alg:pop-incorrect} correctly checks for array length, and thereby prevents the integer underflow of the length value. This code example is not vulnerable to the techniques shown in this paper.
														
 
															
															+
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															+\begin{algorithm}[H]
														
 
															
															+	\begin{lstlisting}[language=Solidity]
														
 
															
															+		pragma solidity 0.4.25;
														
 
															
															+		
														
 
															
															+		contract MyContract {
														
 
															
															+			address private owner;
														
 
															
															+			uint[] private arr;
														
 
															
															+			
														
 
															
															+			constructor() public {
														
 
															
															+				arr = new uint[](0);
														
 
															
															+				owner = msg.sender;
														
 
															
															+			}
														
 
															
															+			
														
 
															
															+			function push(value) {
														
 
															
															+				arr[arr.length] = value;
														
 
															
															+				arr.length++;
														
 
															
															+			}
														
 
															
															+			
														
 
															
															+			function pop() {
														
 
															
															+				require(arr.length > 0);
														
 
															
															+				arr.length--;
														
 
															
															+			}
														
 
															
															+			
														
 
															
															+			function update(unit index, uint value) {
														
 
															
															+				require(index < arr.length);
														
 
															
															+				arr[index] = value;
														
 
															
															+			}
														
 
															
															+		}
														
 
															
															+	\end{lstlisting}
														
 
															
															+	\caption{Correctly managed array length}
														
 
															
															+	\label{alg:pop-correct}
														
 
															
															+\end{algorithm}
														
 
															
															+
														
 
															
															 \section{Vulnerable contracts in literature}
														
 
															
															 One example for vulnerable contracts, which is similar to Algorithm~\ref{alg:pop-incorrect}, is mentioned in the paper by Li Duan et al.~\cite{multilayer}:
														
 
															
															 \medspace
														
 
															
															-\lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															+	\begin{lstlisting}[language=Solidity]
														
 
															
															     function PopBonusCode() public {
														
 
															
															       require(0 <= bonusCodes.length);
														
 
															
															       bonusCodes.length--;
														
 
															
															   \label{alg:multilayer-example}
														
 
															
															 \end{algorithm}
														
 
															
															-We will not go into a detailed explanation, as we already did this in the previous section.
														
 
															
															-
														
 
															
															+\medspace
														
 
															
															-A more sophisticated example is presented in the paper by Sukrit Kalra et al.~\cite{Kalra2018ZEUSAS}:
														
 
															
															+We will not go into a detailed explanation, as we already did this in the previous section. A more sophisticated example is presented in the paper by Sukrit Kalra et al.~\cite{Kalra2018ZEUSAS}:
														
 
															
															 \medspace
														
 
															
															-\lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															+	\begin{lstlisting}[language=Solidity]
														
 
															
															     uint payout = balance/participants.length;
														
 
															
															     for (var i = 0; i < participants.length; i++)
														
 
															
															       participants[i].send(payout);
														
 
															
															   \label{alg:zeus-example}
														
 
															
															 \end{algorithm}
														
 
															
															-The vulnerability here is an integer overflow - as the variable \texttt{i} is dinamically typed, it will get the smallest possible type that will be able to hold the value 0 - that being \texttt{uint8}, which is able to hold positive integers up to 255.
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															+The vulnerability here is an integer overflow - as the variable \texttt{i} is dynamically typed, it will get the smallest possible type that will be able to hold the value 0 - that being \texttt{uint8}, which is able to hold positive integers up to 255.
														
 
															
															-Because of this, if the length of the \texttt{participants} arrays is greater than 255, the integer overflows on the 256th iteration and instead of moving on to \texttt{participants[255]}, it reverts back to the first element in the array. As a result, the first 255 paricipants will split all the balance of the contract, whereas the rest will get nothing.
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															+Because of this, if the length of the \texttt{participants} arrays is greater than 255, the integer overflows on the 256th iteration and instead of moving on to \texttt{participants[255]}, it reverts back to the first element in the array. As a result, the first 255 participants will split all the balance of the contract, whereas the rest will get nothing.
														
 
															
															 \section{Code properties and automatic detection}
														
 
															
															 \medspace
														
 
															
															-The fuzz-driven approach to vulnerability detection is more abstract, as general-purpose fuzzing tools generally don't have knowledge of the analysed program. For the tool SmartFuzzDriverGenerator~\cite{fuzzdrivegen}, a multitude of these fuzzing libraries can be used. The problem at hand is, however, that the technique cannot interface with a smart contract out of the box. The "glue" between fuzzer and program is called a driver, hence the name of "driver-generator".
														
 
															
															+The fuzz-driven approach to vulnerability detection is more abstract, as general-purpose fuzzing tools generally don't have knowledge of the analysed program. For the tool SmartFuzzDriverGenerator~\cite{fuzzdrivegen}, a multitude of these fuzzing libraries can be used, although its application is limited to the Hyperledger Fabric permissioned blockchain. The problem at hand is, that the technique cannot interface with a smart contract out of the box. The "glue" between fuzzer and program is called a driver, hence the name of "driver-generator".
														
 
															
															 \medspace
														
 
															
															-SmartFuzzDriverGenerator aims to automatically generate such a driver by %TODO: I have no idea how it does this actually%
														
 
															
															+SmartFuzzDriverGenerator aims to automatically generate such a driver by inferring the available APIs from the bytecode. There are multiple approaches to decide the order of available fuzzing steps, including a heuristic based on code complexity (i.e. nested conditions, loops, array operations, etc.), random sequences, and user-generated strategies.
														
 
															
															 \medspace
														
 
															
															-The Smartian tool~\cite{smartian} attempts to find a middle-ground between static and dynamic analysis by first transforming the EVM bytecode into control-flow facts. Based on this information, a set of seed-inputs is generated that are expected to have a high probability of yielding useable results. Should no exploit be found, the seed-inputs are then mutated in order to yield a higher code coverage. %TODO: This is probably extemely inprecise and should be re-written%
														
 
															
															+The Smartian tool~\cite{smartian} attempts to find a middle-ground between static and dynamic analysis by first transforming the EVM bytecode into control-flow facts. Based on this information, a set of seed-inputs is generated that are expected to have a high probability of yielding useable results. Should no exploit be found, the seed-inputs are then mutated in order to yield a higher code coverage.
														
 
															
															 \section{Exploit sketch}
														
 
															
															 An exploitation sketch to Algorithm~\ref{alg:pop-incorrect} and to Algorithm~\ref{alg:multilayer-example} is available from Doughoyte~\cite{doughoyte}.
														
 
															
															 \textbf{Checkpoint A}
														
 
															
															-We assume that the following events have ocurred:
														
 
															
															+We assume that the following events have occurred:
														
 
															
															 \begin{enumerate}
														
 
															
															   \item the contract MerdeToken\footnote{\url{https://github.com/Arachnid/uscc/blob/master/submissions-2017/doughoyte/MerdeToken.sol}, accessed: Oct. 30th 2023} has been created;
														
 
															
															   \item the investor has set a withdrawal limit of 1 ether, which only they can change;
														
 
															
															 \lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															+	\begin{lstlisting}
														
 
															
															     "storage": {
														
 
															
															         // The address of the contract owner:
														
 
															
															         "0000000000000000000000000000000000000000000000000000000000000000": "94b898c1a30adcff67208fd79b9e5a4d339f3cc6d2",
														
 
															
															   \label{alg:exploit-checkpoint-a}
														
 
															
															 \end{algorithm}
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															 \textbf{Checkpoint B}
														
 
															
															 Afterwards, the malicious owner calls the vulnerable function \texttt{popBonusCode()} and the length of the array is set to the max value. This happened, because prior to the underflow, the array length was zero and, to save space, it was omitted from the memory:
														
 
															
															 \lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															+	\begin{lstlisting}
														
 
															
															     "storage": {
														
 
															
															         "0000000000000000000000000000000000000000000000000000000000000000": "94b898c1a30adcff67208fd79b9e5a4d339f3cc6d2",
														
 
															
															         "0000000000000000000000000000000000000000000000000000000000000001": "948bc7317ad44d6f34f0f0b6e3c8c7bf739ba666fa",
														
 
															
															   \label{alg:exploit-checkpoint-b}
														
 
															
															 \end{algorithm}
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															 Increasing the length of the array to the maximum allowed by \texttt{uint256} was important, as this will now allow the owner to pass the requirement set in \texttt{modifyBonusCode} and still
														
 
															
															 use the function for storage modification.
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															 \textbf{Checkpoint C} The owner is then able to use \texttt{modifyBonusCode} to increase the fixed withdraw limit to the max \texttt{uint256} value. Had the contract not have this vulnerability,
														
 
															
															 this action should only have been possible through the \texttt{setWithdrawLimit}, which is only available to the investor.
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															 In order to overwrite the withdrawal limit, the owner must calculate the hex value to use as a first argument (index) to the function.
														
 
															
															 Since the array \texttt{bonusCodes} underflow is defined in the sixth place in the contract storage, its length is in the fifth storage slot (counting from zero)
														
 
															
															-The limit is defined at the fourth storage slot. Then, in order to manipulate the withdrawal limit, the owner must convert the address of the length to hexadecimal:\\
														
 
															
															+
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															+The limit is defined at the fourth storage slot. Then, in order to manipulate the withdrawal limit, the owner must convert the address of the length to hexadecimal:
														
 
															
															+
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															 \lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															-    > web3.sha3("0x0000000000000000000000000000000000000000000000000000000000000005", { encoding: 'hex' })
														
 
															
															-    "0x036b6384b5eca791c62761152d0c79bb0604c104a5fb6f4eb0703f3154bb3db0"
														
 
															
															+	\begin{lstlisting}
														
 
															
															+    $ web3.sha3("0x0000000000000000000000000000000000000000000000000000000000000005", { encoding: 'hex' })
														
 
															
															+    > "0x036b6384b5eca791c62761152d0c79bb0604c104a5fb6f4eb0703f3154bb3db0"
														
 
															
															   \end{lstlisting}
														
 
															
															 	\caption{Exploit - Convert length address to hex}
														
 
															
															   \label{alg:exploit-convert-address}
														
 
															
															 \end{algorithm}
														
 
															
															-and then just calculate the array index that will wrap around using the formula $2^{256} - H + 4$, where $2^{256}$ is the max \texttt{uint256} value, H is the hex obtained from the previous command and 4 is the offset of the withdrawal limit storage slot from the base of the contract. This, converted to hex, will give the owner the address to use with \texttt{modifyBonusCode}. The Perl snippet below does that:\\
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															+and then just calculate the array index that will wrap around using the formula $2^{256} - H + 4$, where $2^{256}$ is the max \texttt{uint256} value, H is the hex obtained from the previous command and 4 is the offset of the withdrawal limit storage slot from the base of the contract. This, converted to hex, will give the owner the address to use with \texttt{modifyBonusCode}. The Perl snippet below does that:
														
 
															
															+
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															 \lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															 	\begin{lstlisting}[language=Octave]
														
 
															
															-    \$ perl -Mbigint -E 'say ((2**256 - 0x036b6384b5eca791c62761152d0c79bb0604c104a5fb6f4eb0703f3154bb3db0 + 4)->as_hex)'
														
 
															
															-    0xfc949c7b4a13586e39d89eead2f38644f9fb3efb5a0490b14f8fc0ceab44c254
														
 
															
															+    $ perl -Mbigint -E 'say ((2**256 - 0x036b6384b5eca791c62761152d0c79bb0604c104a5fb6f4eb0703f3154bb3db0 + 4)->as_hex)'
														
 
															
															+    > 0xfc949c7b4a13586e39d89eead2f38644f9fb3efb5a0490b14f8fc0ceab44c254
														
 
															
															   \end{lstlisting}
														
 
															
															 	\caption{Exploit - Convert limit offset to address}
														
 
															
															   \label{alg:exploit-convert-offset}
														
 
															
															 \end{algorithm}
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															 As a result, the memory now looks like this:
														
 
															
															 \medspace
														
 
															
															 \lstset{style=mystyle}
														
 
															
															 \begin{algorithm}[H]
														
 
															
															-	\begin{lstlisting}[language=Octave]
														
 
															
															+	\begin{lstlisting}
														
 
															
															     "storage": {
														
 
															
															         "0000000000000000000000000000000000000000000000000000000000000000": "94b898c1a30adcff67208fd79b9e5a4d339f3cc6d2",
														
 
															
															         "0000000000000000000000000000000000000000000000000000000000000001": "948bc7317ad44d6f34f0f0b6e3c8c7bf739ba666fa",
														
 
															
															   \label{alg:exploit-checkpoint-c}
														
 
															
															 \end{algorithm}
														
 
															
															+\medspace
														
 
															
															+
														
 
															
															 \textbf{Checkpoint D} The owner can now call \texttt{withdraw()} with the full amount of ether in the contract and drain it. The investor has not increased the limit at any point.
														
 
															
															+\section{Conclusion}
														
 
															
															+
														
 
															
															+We presented different forms of the common weakness SWC-124: Write to Arbitrary Storage Location and how they might be detected using automated tools. We have shown how a possible exploit may be constructed, and how this can lead to the complete compromise of a smart contract's storage and control flow. We have given multiple attackable and benign code examples to illustrate this weakness. We believe this weakness to be of particular practical relevance, as it is very easy to introduce by accident, and hard to for a developer to spot without advanced knowledge of the underlying mechanisms that cause it. 
														
 
															
															+
														
 
															
															+As for preventative measures, we would recommend developers not to interact with low-level building blocks like an array's length value or inline assembly instructions if possible, and instead to employ standard library functions when ever available.
														
 
															
															+
														
 
															
															+
														
 
															
															 \bibliography{exercise.bib}
														
 
															
															 \end{document}