Apache Arrow is blazingly fast

The code is looking for a min value of an array of doubles of 100M size.
The result is arrow::compute::MinMax is 3-4 times faster than std::minmax_element without noticeable memory overhead.

#include <iostream>
#include <chrono>
#include <vector>
#include <random>
#include <algorithm>
#include <arrow/api.h>
#include <arrow/builder.h>
#include <arrow/compute/api.h>
using namespace std;
std::shared_ptr<std::vector<double>> vector_generate(uint data_size) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> distrib(1.1, 100.9);
std::vector<double> data(data_size);
std::generate_n(data.begin(), data.size(), [&]() { return distrib(gen); });
return std::make_shared<std::vector<double>>(std::move(data));
}
std::shared_ptr<arrow::Array> arrow_from_vector(std::shared_ptr<std::vector<double>> data) {
arrow::DoubleBuilder builder;
auto _ = builder.AppendValues(data->begin(), data->end());
return builder.Finish().ValueOrDie();
}
template<typename Func>
int timed(Func func, std::string func_name) {
auto start = std::chrono::high_resolution_clock::now();
func();
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Execution time (" << func_name << "): " << duration.count() / 1000000.0 << " seconds" << std::endl;
return duration.count();
}
void work(uint data_size) {
std::cout << "---- New experiment (size): " << data_size << std::endl;
const auto vec = vector_generate(data_size);
const auto arr = arrow_from_vector(vec);
auto duration1 = timed([vec](){std::minmax_element(vec->begin(), vec->end());}, "std::min_element");
auto duration2 = timed([arr](){arrow::compute::MinMax(arr).ValueOrDie();}, "arrow::compute::MinMax");
std::cout << "arrow::compute::MinMax is faster: " << (double)duration1 / duration2 << " times" << std::endl;
}
int main()
{
for (uint i = 1000U; i < 1000000000; i*=10)
work(i);
return 0;
}