The code is looking for a min value of an array of doubles of 100M size.
The result is arrow::compute::MinMax is 3-4 times faster than std::minmax_element without noticeable memory overhead.

This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <chrono> | |
#include <vector> | |
#include <random> | |
#include <algorithm> | |
#include <arrow/api.h> | |
#include <arrow/builder.h> | |
#include <arrow/compute/api.h> | |
using namespace std; | |
std::shared_ptr<std::vector<double>> vector_generate(uint data_size) { | |
std::random_device rd; | |
std::mt19937 gen(rd()); | |
std::uniform_real_distribution<> distrib(1.1, 100.9); | |
std::vector<double> data(data_size); | |
std::generate_n(data.begin(), data.size(), [&]() { return distrib(gen); }); | |
return std::make_shared<std::vector<double>>(std::move(data)); | |
} | |
std::shared_ptr<arrow::Array> arrow_from_vector(std::shared_ptr<std::vector<double>> data) { | |
arrow::DoubleBuilder builder; | |
auto _ = builder.AppendValues(data->begin(), data->end()); | |
return builder.Finish().ValueOrDie(); | |
} | |
template<typename Func> | |
int timed(Func func, std::string func_name) { | |
auto start = std::chrono::high_resolution_clock::now(); | |
func(); | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); | |
std::cout << "Execution time (" << func_name << "): " << duration.count() / 1000000.0 << " seconds" << std::endl; | |
return duration.count(); | |
} | |
void work(uint data_size) { | |
std::cout << "---- New experiment (size): " << data_size << std::endl; | |
const auto vec = vector_generate(data_size); | |
const auto arr = arrow_from_vector(vec); | |
auto duration1 = timed([vec](){std::minmax_element(vec->begin(), vec->end());}, "std::min_element"); | |
auto duration2 = timed([arr](){arrow::compute::MinMax(arr).ValueOrDie();}, "arrow::compute::MinMax"); | |
std::cout << "arrow::compute::MinMax is faster: " << (double)duration1 / duration2 << " times" << std::endl; | |
} | |
int main() | |
{ | |
for (uint i = 1000U; i < 1000000000; i*=10) | |
work(i); | |
return 0; | |
} |
