From 90843f07558f8b27d68036740c1e4697ed4e0b51 Mon Sep 17 00:00:00 2001 From: Andreas Kapp Lindquist Date: Thu, 23 Oct 2025 17:56:42 +0200 Subject: test(generate_test_data.sh): moved to test, and added sorted and reverse sorted files --- generate_test_data.sh | 22 -------------------- test/generate_test_data.sh | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 22 deletions(-) delete mode 100755 generate_test_data.sh create mode 100755 test/generate_test_data.sh diff --git a/generate_test_data.sh b/generate_test_data.sh deleted file mode 100755 index d935dfa..0000000 --- a/generate_test_data.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh -# Note generating test data -# may take a while - -mkdir -p data -sizes=(10000 50000 100000 500000 1000000 5000000) - -for size in "${sizes[@]}"; do - for run in {1..3}; do - outfile="data/random_${size}_run${run}.tsv" # '.tsv' stands for - echo "Generating $outfile ..." # tab seperaed values - { - for ((i=1; i<=size; i++)); do - # using 'printf' instead of 'echo' - # due to performance issues - printf "%d\t%d\n" $((RANDOM)) $((RANDOM)) - done - } > "$outfile" - done -done - -echo "All random data files gernated in ./data/" diff --git a/test/generate_test_data.sh b/test/generate_test_data.sh new file mode 100755 index 0000000..b71eb30 --- /dev/null +++ b/test/generate_test_data.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# Note generating test data +# may take a while + +# using 'printf' instead of 'echo' +# due to performance issues +random_file() { +{ + for ((i=1; i<=$2; i++)); do + printf "%d\t%d\n" $((RANDOM)) $((RANDOM)) + done +} > "$1" +} + +sorted_file() { +{ + for ((i=1; i<=$2; i++)); do + printf "%d\t%d\n" $i $i + done +} > "$1" +} + +reverse_file() { +{ + for ((i=$2; i>=1; i--)); do + printf "%d\t%d\n" $i $i + done +} > "$1" +} + +mkdir -p data +sizes=(10000 50000 100000) # 500000 1000000 5000000) +types=("random" "sorted" "reverse") + +for data_type in "${types[@]}"; do + for size in "${sizes[@]}"; do + for run in {1..3}; do + outfile="data/${data_type}_${size}_${run}.tsv" # '.tsv' stands for + echo "Generating $outfile ..." # tab seperated values + + if [ "$data_type" = "sorted" ]; then + sorted_file $outfile $size + elif [ "$data_type" = "reverse" ]; then + reverse_file $outfile $size + else + random_file $outfile $size + fi + done + done +done + +echo "All random data files gernated in ./data/" -- cgit v1.2.3-70-g09d2