aboutsummaryrefslogtreecommitdiff
path: root/test/generate_test_data.sh
blob: 95e7ddb4a0ce5fe219f57ed5c3c42e9b9d85929b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/sh
# Note generating test data
# may take a while

# using 'printf' instead of 'echo'
# due to performance issues
random_file() {
{
    for ((i=1; i<=$2; i++)); do
        printf "%d\t%d\n" $((RANDOM)) $((RANDOM))
    done
} > "$1"
}

sorted_file() {
{
    for ((i=1; i<=$2; i++)); do
        printf "%d\t%d\n" $i $i
    done
} > "$1"
}

reverse_file() {
{
    for ((i=$2; i>=1; i--)); do
        printf "%d\t%d\n" $i $i
    done
} > "$1"
}

mkdir -p data
sizes=(0 10000 20000 30000 40000 50000 60000 70000 80000 90000 100000) # 500000 1000000 5000000)
types=("random" "sorted" "reverse")

for data_type in "${types[@]}"; do
    for size in "${sizes[@]}"; do
        for run in {1..3}; do
            outfile="data/${data_type}_${size}_${run}.tsv"     # '.tsv' stands for
            echo "Generating $outfile ..."                  # tab seperated values

            if [ "$data_type" = "sorted" ]; then
                sorted_file $outfile $size
            elif [ "$data_type" = "reverse" ]; then
                reverse_file $outfile $size
            else
                random_file $outfile $size
            fi
        done
    done
done

echo "All random data files gernated in ./data/"