diff --git a/BENCHMARKING.md b/BENCHMARKING.md index 8ee5a14..32f510f 100644 --- a/BENCHMARKING.md +++ b/BENCHMARKING.md @@ -122,6 +122,8 @@ These kernels DO NOT do a KNN search query. The only work measured is the distan - SIMD+L2: ```/benchmarks/KernelNaryL2``` - SIMD+IP: ```/benchmarks/KernelNaryIP``` +*Only support vector element type of float32, for other types like uint8, checkout the updates on branch `main`* + All these executables have two obligatory parameters: - `` and ``. These determine the random collection to be used for the test. The values are limited to: `n_vectors=(64 128 512 1024 4096 8192 16384 65536 131072 262144 1048576)`, `dimensions=(8 16 32 64 128 192 256 384 512 768 1024 1536 2048 4096 8192)`. diff --git a/benchmarks/bench_kernels/nary_ip.cpp b/benchmarks/bench_kernels/nary_ip.cpp index 79d8121..2eea317 100644 --- a/benchmarks/bench_kernels/nary_ip.cpp +++ b/benchmarks/bench_kernels/nary_ip.cpp @@ -61,13 +61,6 @@ int main(int argc, char *argv[]) { std::cout << std::setprecision(16) << distances[N_VECTORS-1] << "\n"; // Dummy print so the compiler does the job BenchmarkMetadata results_metadata = { - dataset, - ALGORITHM, - NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, - 1, - 0, - 0 - }; + dataset, ALGORITHM, NUM_MEASURE_RUNS, 1, 1, 0, 0}; BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata); } diff --git a/benchmarks/bench_kernels/nary_l1.cpp b/benchmarks/bench_kernels/nary_l1.cpp index fb4bdbb..237e28b 100644 --- a/benchmarks/bench_kernels/nary_l1.cpp +++ b/benchmarks/bench_kernels/nary_l1.cpp @@ -61,13 +61,6 @@ int main(int argc, char *argv[]) { std::cout << std::setprecision(16) << distances[N_VECTORS-1] << "\n"; // Dummy print so the compiler does the job BenchmarkMetadata results_metadata = { - dataset, - ALGORITHM, - NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, - 1, - 0, - 0 - }; + dataset, ALGORITHM, NUM_MEASURE_RUNS, 1, 1, 0, 0}; BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata); } diff --git a/benchmarks/bench_kernels/nary_l2.cpp b/benchmarks/bench_kernels/nary_l2.cpp index c9a378a..3f518b7 100644 --- a/benchmarks/bench_kernels/nary_l2.cpp +++ b/benchmarks/bench_kernels/nary_l2.cpp @@ -66,13 +66,6 @@ int main(int argc, char *argv[]) { std::cout << std::setprecision(16) << distances[N_VECTORS-1] << "\n"; // Dummy print so the compiler does the job BenchmarkMetadata results_metadata = { - dataset, - ALGORITHM, - NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, - 1, - 0, - 0 - }; + dataset, ALGORITHM, NUM_MEASURE_RUNS, 1, 1, 0, 0}; BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata); } diff --git a/benchmarks/bench_kernels/pdx_ip.cpp b/benchmarks/bench_kernels/pdx_ip.cpp index cc3823b..aef3f5f 100644 --- a/benchmarks/bench_kernels/pdx_ip.cpp +++ b/benchmarks/bench_kernels/pdx_ip.cpp @@ -7,6 +7,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -25,7 +26,8 @@ int main(int argc, char *argv[]) { std::cout << "RUNS: " << NUM_MEASURE_RUNS << "\n"; std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_PDX_IP.csv"; - std::string filename = std::to_string(N_VECTORS) + "x"+ std::to_string(DIMENSION) + "-pdx"; + std::string filename = std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -65,13 +67,6 @@ int main(int argc, char *argv[]) { std::cout << std::setprecision(16) << PDXScanner::distances[PDXScanner<>::PDX_VECTOR_SIZE - 1] << "\n"; BenchmarkMetadata results_metadata = { - dataset, - ALGORITHM, - NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, - 1, - 0, - 0 - }; + dataset, ALGORITHM, NUM_MEASURE_RUNS, 1, 1, 0, 0}; BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata); } diff --git a/benchmarks/bench_kernels/pdx_l1.cpp b/benchmarks/bench_kernels/pdx_l1.cpp index 32165f8..ddfba7e 100644 --- a/benchmarks/bench_kernels/pdx_l1.cpp +++ b/benchmarks/bench_kernels/pdx_l1.cpp @@ -7,6 +7,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -26,7 +27,8 @@ int main(int argc, char *argv[]) { std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_PDX_L1.csv"; - std::string filename = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION) + "-pdx"; + std::string filename = std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -66,13 +68,6 @@ int main(int argc, char *argv[]) { std::cout << std::setprecision(16) << PDXScanner::distances[PDXScanner<>::PDX_VECTOR_SIZE - 1] << "\n"; BenchmarkMetadata results_metadata = { - dataset, - ALGORITHM, - NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, - 1, - 0, - 0 - }; + dataset, ALGORITHM, NUM_MEASURE_RUNS, 1, 1, 0, 0}; BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata); } diff --git a/benchmarks/bench_kernels/pdx_l2.cpp b/benchmarks/bench_kernels/pdx_l2.cpp index 22ac062..29ad1fd 100644 --- a/benchmarks/bench_kernels/pdx_l2.cpp +++ b/benchmarks/bench_kernels/pdx_l2.cpp @@ -11,6 +11,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -30,7 +31,8 @@ int main(int argc, char *argv[]) { std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_GATHER_PDX_L2.csv"; - std::string filename = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION) + "-pdx"; + std::string filename = std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -68,13 +70,6 @@ int main(int argc, char *argv[]) { std::cout << std::setprecision(16) << PDXScanner::distances[PDXScanner<>::PDX_VECTOR_SIZE - 1] << "\n"; BenchmarkMetadata results_metadata = { - dataset, - ALGORITHM, - NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, - 1, - 0, - 0 - }; + dataset, ALGORITHM, NUM_MEASURE_RUNS, 1, 1, 0, 0}; BenchmarkUtils::SaveResults(runtimes, RESULTS_PATH, results_metadata); } diff --git a/benchmarks/bench_kernels/pdx_l2_128.cpp b/benchmarks/bench_kernels/pdx_l2_128.cpp index 7bdb360..26f31b0 100644 --- a/benchmarks/bench_kernels/pdx_l2_128.cpp +++ b/benchmarks/bench_kernels/pdx_l2_128.cpp @@ -7,6 +7,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -26,7 +27,8 @@ int main(int argc, char *argv[]) { std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_PDX_L2_128.csv"; - std::string filename = "128x" + std::to_string(N_VECTORS) + "x"+ std::to_string(DIMENSION) + "-pdx"; + std::string filename = "128x" + std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -69,7 +71,7 @@ int main(int argc, char *argv[]) { dataset, ALGORITHM, NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, + 1, 1, 0, 0 diff --git a/benchmarks/bench_kernels/pdx_l2_16.cpp b/benchmarks/bench_kernels/pdx_l2_16.cpp index 396c5cf..e153ff2 100644 --- a/benchmarks/bench_kernels/pdx_l2_16.cpp +++ b/benchmarks/bench_kernels/pdx_l2_16.cpp @@ -7,6 +7,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -26,7 +27,8 @@ int main(int argc, char *argv[]) { std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_PDX_L2_16.csv"; - std::string filename = "16x" + std::to_string(N_VECTORS) + "x"+ std::to_string(DIMENSION) + "-pdx"; + std::string filename = "16x" + std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -69,7 +71,7 @@ int main(int argc, char *argv[]) { dataset, ALGORITHM, NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, + 1, 1, 0, 0 diff --git a/benchmarks/bench_kernels/pdx_l2_256.cpp b/benchmarks/bench_kernels/pdx_l2_256.cpp index fe947e0..dfb93f9 100644 --- a/benchmarks/bench_kernels/pdx_l2_256.cpp +++ b/benchmarks/bench_kernels/pdx_l2_256.cpp @@ -7,6 +7,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -26,7 +27,8 @@ int main(int argc, char *argv[]) { std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_PDX_L2_256.csv"; - std::string filename = "256x" + std::to_string(N_VECTORS) + "x"+ std::to_string(DIMENSION) + "-pdx"; + std::string filename = "256x" + std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -69,7 +71,7 @@ int main(int argc, char *argv[]) { dataset, ALGORITHM, NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, + 1, 1, 0, 0 diff --git a/benchmarks/bench_kernels/pdx_l2_32.cpp b/benchmarks/bench_kernels/pdx_l2_32.cpp index a40e8a9..fd91e1d 100644 --- a/benchmarks/bench_kernels/pdx_l2_32.cpp +++ b/benchmarks/bench_kernels/pdx_l2_32.cpp @@ -7,6 +7,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -26,7 +27,8 @@ int main(int argc, char *argv[]) { std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_PDX_L2_32.csv"; - std::string filename = "32x" + std::to_string(N_VECTORS) + "x"+ std::to_string(DIMENSION) + "-pdx"; + std::string filename = "32x" + std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -69,7 +71,7 @@ int main(int argc, char *argv[]) { dataset, ALGORITHM, NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, + 1, 1, 0, 0 diff --git a/benchmarks/bench_kernels/pdx_l2_512.cpp b/benchmarks/bench_kernels/pdx_l2_512.cpp index 5ef4c5e..272a046 100644 --- a/benchmarks/bench_kernels/pdx_l2_512.cpp +++ b/benchmarks/bench_kernels/pdx_l2_512.cpp @@ -7,6 +7,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -26,7 +27,8 @@ int main(int argc, char *argv[]) { std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_PDX_L2_512.csv"; - std::string filename = "512x" + std::to_string(N_VECTORS) + "x"+ std::to_string(DIMENSION) + "-pdx"; + std::string filename = "512x" + std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -69,7 +71,7 @@ int main(int argc, char *argv[]) { dataset, ALGORITHM, NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, + 1, 1, 0, 0 diff --git a/benchmarks/bench_kernels/pdx_l2_64.cpp b/benchmarks/bench_kernels/pdx_l2_64.cpp index bb84eb4..05aab57 100644 --- a/benchmarks/bench_kernels/pdx_l2_64.cpp +++ b/benchmarks/bench_kernels/pdx_l2_64.cpp @@ -7,6 +7,7 @@ int main(int argc, char *argv[]) { std::string ALGORITHM = "pdx"; size_t DIMENSION; size_t N_VECTORS; + std::string dtype = "float32"; if (argc > 1){ N_VECTORS = atoi(argv[1]); } @@ -26,7 +27,8 @@ int main(int argc, char *argv[]) { std::string RESULTS_PATH = BENCHMARK_UTILS.RESULTS_DIR_PATH + "PURESCAN_PDX_L2_64.csv"; - std::string filename = std::to_string(N_VECTORS) + "x"+ std::to_string(DIMENSION) + "-pdx"; + std::string filename = std::to_string(N_VECTORS) + "x" + + std::to_string(DIMENSION) + "-pdx-" + dtype; std::string dataset = std::to_string(N_VECTORS) + "x" + std::to_string(DIMENSION); float *raw_data = MmapFile32( BenchmarkUtils::PURESCAN_DATA + filename); @@ -69,7 +71,7 @@ int main(int argc, char *argv[]) { dataset, ALGORITHM, NUM_MEASURE_RUNS, - NUM_WARMUP_RUNS, + 1, 1, 0, 0 diff --git a/benchmarks/python_scripts/setup_purescan.py b/benchmarks/python_scripts/setup_purescan.py index ba0f956..96981ca 100644 --- a/benchmarks/python_scripts/setup_purescan.py +++ b/benchmarks/python_scripts/setup_purescan.py @@ -6,6 +6,10 @@ # Setup random collections of vectors with PDX and Nary def generate_synthetic_data(BLOCK_SIZES=(), dimensions=(), vectors=(), dtype=np.float32): + if dtype not in [np.float32]: + # for more dtype support, checkout the updates on the `main` branch + raise ValueError('dtype must be np.float32') + type_size = np.dtype(dtype).itemsize adaptive_block_size = int(256 / type_size) # 256 bytes can fit in registers across architectures print('Block size =', adaptive_block_size)