report/report.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

\documentclass{article}

\usepackage{geometry}
\usepackage{hyperref}
\usepackage{pgfplots}
\usepackage{tikz}
\usepackage{minted}
\usepackage{booktabs}
\usepackage{siunitx}
\usepackage{algorithm2e}
\usepackage{float}
\geometry{a4paper}

\begin{document}

\begin{center}
    \Huge
    Sorting in x86\_64 linux Assembly
    \vspace{1cm}

    \Large
    Navid S.

    \vspace{0.5cm}
    Andreas K. L.

    \vspace{0.5cm}
    Mikkel T.

    \vspace{1cm}
    \large
    October 20$^\text{th}$ 2025
\end{center}

\newpage
\tableofcontents
\newpage

\section{Introduction}
Sorting is one of the most fundamental operations in programming. Sorting algorithms play a crucial role
in efficient searching, organization and interpretation of data, and optimization problems. Furthermore,
sorting is often times the first step towards designing complicated algorithms in computer science. This
project aims to study efficient sorting of a list of coordinates by the second value of each coordinate,
in Assembly. Although abstract high level languages such as Python and Java allow faster and convenient development,
Assembly provides precise control over program flow, memory usage and instruction level execution, making Assembly an excellent
evironment for benchmarking and performance analysis. In this project the two algorithms quick-sort and insertion-sort
will be implemented and compared against each other. Quick-sort is expected to excel on large random datasets,
while insertion-sort is more suitable for small or nearly sorted cases.
Several test
instances consisting of uniformly random coordinates will be created with the goal of comparing the implementations
actual runtime to their theoretical runtimes. These datasets will be ranging from 10,000 to 5,000,000 elements, including edge
cases with duplicates or nearly sorted lists.

\section{Design}
\begin{itemize}
    \item 3 stage process, read $\rightarrow$ sort $\rightarrow$ print
    \item Array format
\end{itemize}

\begin{figure}[H]
    \centering
    \includegraphics[scale=0.6]{flowchart.drawio.png}
    \caption{Program flow}
    \label{fig:flowchart}
\end{figure}

\subsection{The Array}
In order to be a litte more general, and make our program easily expandable to
different types of data, we wanted to create out own array datatype, that
allowes us to sort something more abstract, with respect to a key in that
abstract object.
To do this we settled on an array of memory addresses, that point to where the
abstract objects are, see \autoref{fig:array}. These object would in practice
also be represented as 
arrays. With this we can specify which index (key) in the sub-arrays we want to
sort by, and then when we swap elements in the array, we simply swap the memory
addresses around. 

\begin{figure}[H]
    \centering
    \begin{tikzpicture}
        \draw (0,0) rectangle (4,1); % outer box
        \node at (-0.5,0.5) {$A=$};

        % vertical dividers
        \draw (1,0) -- (1,1);
        \draw (2,0) -- (2,1);
        \draw (3,0) -- (3,1);
        \node at (0.5,0.5) {$p_1$};
        \node at (1.5,0.5) {$p_2$};
        \node at (2.5,0.5) {$\cdots$};
        \node at (3.5,0.5) {$p_n$};

        \draw (0.1,2) rectangle (0.8,5);
        \node at (0.5,2.5) {$e_{11}$};
        \node at (0.45,3.6) {$\vdots$};
        \node at (0.5,4.5) {$e_{1m}$};

        \draw (1.1,2) rectangle (1.8,5);
        \node at (1.5,2.5) {$e_{21}$};
        \node at (1.45,3.6) {$\vdots$};
        \node at (1.5,4.5) {$e_{2m}$};

        \draw (3.1,2) rectangle (3.8,5);
        \node at (3.5,2.5) {$e_{n1}$};
        \node at (3.45,3.6) {$\vdots$};
        \node at (3.5,4.5) {$e_{nm}$};

        \draw[->] (0.5,1.1) -- (0.5,1.9);
        \draw[->] (1.5,1.1) -- (1.5,1.9);
        \draw[->] (3.5,1.1) -- (3.5,1.9);
    \end{tikzpicture}
    \caption{The format of the array $A$, where $p_n$ is the $n^\text{th}$
    memory address, pointing to an array where $e_nm$ is the $m^\text{th}$
    element of the $n^\text{th}$ element in $A$.}
    \label{fig:array}
\end{figure}

\section{Implementation}

\section{Evaluation}
\begin{itemize}
    \item Test insertion-sort
    \item Test quick-sort
    \item Benchmark insertion-sort vs. quick-sort random data
    \item Benchmark insertion-sort vs. quick-sort sorted data
    \item Benchmark insertion-sort vs. quick-sort unsorted data
\end{itemize}

\begin{tikzpicture}
    \begin{axis}[xlabel={size},ylabel={seconds}]
        \addplot[only marks] table [x=size, y=real, col sep=comma] 
        {../benchmark_results.csv};
    \end{axis}
\end{tikzpicture}

\section{Conclusion}
    
\end{document}