The code is looking for a min value of an array of doubles of 100M size.
The result is arrow::compute::MinMax is 3-4 times faster than std::minmax_element without noticeable memory overhead.

#include <iostream> | |
#include <chrono> | |
#include <vector> | |
#include <random> | |
#include <algorithm> | |
#include <arrow/api.h> | |
#include <arrow/builder.h> | |
#include <arrow/compute/api.h> | |
using namespace std; | |
std::shared_ptr<std::vector<double>> vector_generate(uint data_size) { | |
std::random_device rd; | |
std::mt19937 gen(rd()); | |
std::uniform_real_distribution<> distrib(1.1, 100.9); | |
std::vector<double> data(data_size); | |
std::generate_n(data.begin(), data.size(), [&]() { return distrib(gen); }); | |
return std::make_shared<std::vector<double>>(std::move(data)); | |
} | |
std::shared_ptr<arrow::Array> arrow_from_vector(std::shared_ptr<std::vector<double>> data) { | |
arrow::DoubleBuilder builder; | |
auto _ = builder.AppendValues(data->begin(), data->end()); | |
return builder.Finish().ValueOrDie(); | |
} | |
template<typename Func> | |
int timed(Func func, std::string func_name) { | |
auto start = std::chrono::high_resolution_clock::now(); | |
func(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); | |
std::cout << "Execution time (" << func_name << "): " << duration.count() / 1000000.0 << " seconds" << std::endl; | |
return duration.count(); | |
} | |
void work(uint data_size) { | |
std::cout << "---- New experiment (size): " << data_size << std::endl; | |
const auto vec = vector_generate(data_size); | |
const auto arr = arrow_from_vector(vec); | |
auto duration1 = timed([vec](){std::minmax_element(vec->begin(), vec->end());}, "std::min_element"); | |
auto duration2 = timed([arr](){arrow::compute::MinMax(arr).ValueOrDie();}, "arrow::compute::MinMax"); | |
std::cout << "arrow::compute::MinMax is faster: " << (double)duration1 / duration2 << " times" << std::endl; | |
} | |
int main() | |
{ | |
for (uint i = 1000U; i < 1000000000; i*=10) | |
work(i); | |
return 0; | |
} |
