Apache Arrow is blazingly fast

The code is looking for a min value of an array of doubles of 100M size.
The result is arrow::compute::MinMax is 3-4 times faster than std::minmax_element without noticeable memory overhead.

#include <iostream>
#include <chrono>
#include <vector>
#include <random>
#include <algorithm>
#include <arrow/api.h>
#include <arrow/builder.h>
#include <arrow/compute/api.h>
using namespace std;
std::shared_ptr<std::vector<double>> vector_generate(uint data_size) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> distrib(1.1, 100.9);
std::vector<double> data(data_size);
std::generate_n(data.begin(), data.size(), [&]() { return distrib(gen); });
return std::make_shared<std::vector<double>>(std::move(data));
}
std::shared_ptr<arrow::Array> arrow_from_vector(std::shared_ptr<std::vector<double>> data) {
arrow::DoubleBuilder builder;
auto _ = builder.AppendValues(data->begin(), data->end());
return builder.Finish().ValueOrDie();
}
template<typename Func>
int timed(Func func, std::string func_name) {
auto start = std::chrono::high_resolution_clock::now();
func();
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Execution time (" << func_name << "): " << duration.count() / 1000000.0 << " seconds" << std::endl;
return duration.count();
}
void work(uint data_size) {
std::cout << "---- New experiment (size): " << data_size << std::endl;
const auto vec = vector_generate(data_size);
const auto arr = arrow_from_vector(vec);
auto duration1 = timed([vec](){std::minmax_element(vec->begin(), vec->end());}, "std::min_element");
auto duration2 = timed([arr](){arrow::compute::MinMax(arr).ValueOrDie();}, "arrow::compute::MinMax");
std::cout << "arrow::compute::MinMax is faster: " << (double)duration1 / duration2 << " times" << std::endl;
}
int main()
{
for (uint i = 1000U; i < 1000000000; i*=10)
work(i);
return 0;
}

Let’s make code SOLID: Conway’s Game of Life

Our online Coding Dojo sessions became the most powerful learning experience for me, and recently we came across Conway’s Game of Life.

I remember playing this game on paper when I was a child. Yesterday I had the pleasure to code the rules and see them working.

To make it more fun, we in our Coding Dojo club decided to separate concerns between “cells” and “grid” where the cells exist. After an exciting discussion for which I am grateful to Dennis, Saeed and Byron, I had started an all-nighter.

Here is what I got with a sunrise:

using System;
using System.Threading.Tasks;
namespace GameOfLife
{
internal class Program
{
private static void Main()
{
const int rows = 30;
const int cols = 30;
var grid = RandomCellsGrid(rows, cols, new Random());
while (!Console.KeyAvailable)
{
Console.WriteLine(grid.ToString());
grid = grid.NextGeneration();
Task.Delay(1000).Wait();
}
Console.WriteLine("Done!");
}
private static Grid RandomCellsGrid(int rows, int cols, Random random)
{
var cells = new Cell[rows, cols];
for (var row = 0; row < rows; row++)
for (var col = 0; col < cols; col++)
cells[row, col] = new Cell(RandomlyAlive(random));
return new Grid(cells);
}
private static bool RandomlyAlive(Random random)
{
const double threshold = 0.5;
return random.NextDouble() > threshold;
}
}
}
namespace GameOfLife
{
internal abstract class AbstractLifeObject
{
private protected AbstractSpace Belongs;
public void BelongsTo(AbstractSpace space)
{
Belongs = space;
}
public abstract AbstractLifeObject NextGeneration();
}
}
using System.Linq;
namespace GameOfLife
{
internal class Cell : AbstractLifeObject
{
private readonly bool _live;
public Cell(bool live)
{
_live = live;
}
public override AbstractLifeObject NextGeneration()
{
var neighbors = Belongs.GetNeighbors(this).OfType<Cell>().Count(c => c._live);
return new Cell(IsNextGenerationCellAlive(neighbors));
}
public override string ToString()
{
return _live ? "[]" : "--";
}
private bool IsNextGenerationCellAlive(int neighbors)
{
if (_live)
return neighbors is not (< 2 or > 3);
return neighbors == 3;
}
}
}
view raw Cell.cs hosted with ❤ by GitHub
using System.Collections.Generic;
namespace GameOfLife
{
internal abstract class AbstractSpace
{
public abstract IEnumerable<AbstractLifeObject> GetNeighbors(AbstractLifeObject obj);
}
}
using System;
using System.Collections.Generic;
using System.Text;
namespace GameOfLife
{
internal class Grid : AbstractSpace
{
private readonly int _cols;
private readonly AbstractLifeObject[,] _lifeObjects;
private readonly int _rows;
public Grid(AbstractLifeObject[,] objects)
{
_rows = objects.GetLength(0);
_cols = objects.GetLength(1);
BelongsToThis(objects);
_lifeObjects = objects;
}
public Grid NextGeneration()
{
var newLifeObjects = new AbstractLifeObject[_rows, _cols];
for (var row = 0; row < _rows; row++)
for (var col = 0; col < _cols; col++)
newLifeObjects[row, col] = _lifeObjects[row, col].NextGeneration();
return new Grid(newLifeObjects);
}
public override IEnumerable<AbstractLifeObject> GetNeighbors(AbstractLifeObject lifeObject)
{
var (objectRow, objectCol) = GetLocation(lifeObject);
for (var row = objectRow - 1; row <= objectRow + 1; row++)
for (var col = objectCol - 1; col <= objectCol + 1; col++)
{
if (row == objectRow && col == objectCol) continue;
if (row < 0 || col < 0) continue;
if (row >= _rows || col >= _cols) continue;
yield return _lifeObjects[row, col];
}
}
public override string ToString()
{
var strBuilder = new StringBuilder();
for (var row = 0; row < _rows; row++)
{
for (var col = 0; col < _cols; col++) strBuilder.Append(_lifeObjects[row, col]);
strBuilder.Append(Environment.NewLine);
}
return strBuilder.ToString();
}
private void BelongsToThis(AbstractLifeObject[,] objects)
{
for (var row = 0; row < _rows; row++)
for (var col = 0; col < _cols; col++)
objects[row, col].BelongsTo(this);
}
private (int row, int col) GetLocation(AbstractLifeObject lifeObject)
{
for (var row = 0; row < _rows; row++)
for (var col = 0; col < _cols; col++)
if (_lifeObjects[row, col].Equals(lifeObject))
return (row, col);
throw new ArgumentException("Grid does not contain this life object");
}
}
}
view raw Grid.cs hosted with ❤ by GitHub

Conclusion:

I can not deny that SOLID code has its price. It takes much more time (2x or maybe 3x to compare to writing a monolithic code).
Nevertheless, we get benefits such as extensibility and reusability.

Let’s say we want to run the same game in 3D space. We can easily make this modification with SOLID code.

LeetCode: Divide Two Integers (Solution)

The link to the problem https://leetcode.com/problems/divide-two-integers

public class Solution
{
public int Divide(int dividend, int divisor)
{
var negative = IsResultNegative(dividend, divisor);
var dividendPositive = ToPositive(dividend);
var divisorPositive = ToPositive(divisor);
var resultPositive = DividePositive(dividendPositive, divisorPositive);
return FromPositive(resultPositive, negative);
}
private static uint ToPositive(int input)
{
if (input == int.MinValue)
{
return int.MaxValue + 1u;
}
return (uint)Math.Abs(input);
}
private static int FromPositive(uint input, bool toNegative)
{
var limit = toNegative ? int.MaxValue + 1u : int.MaxValue;
var result = input > limit ? limit : input;
return toNegative ? -(int)result : (int)result;
}
private static uint DividePositive(uint dividend, uint divisor, uint subResult = 0)
{
if (dividend < divisor)
return subResult;
var divisorTemp = divisor;
var newSubResult = 1u;
while (dividend >= divisorTemp << 1 && IsSafeToShiftLeft(divisorTemp))
{
divisorTemp <<= 1;
newSubResult <<= 1;
}
return DividePositive(dividend - divisorTemp, divisor, subResult + newSubResult);
}
private static bool IsSafeToShiftLeft(uint divisorTemp)
{
return divisorTemp < 0x80000000;
}
private static bool IsResultNegative(int dividend, int divisor)
{
return dividend < 0 ^ divisor < 0; // XOR operation
}
}

LeetCode: Video Stitching (Solution)

The LeetCode page for this task can be found here

public class Solution
{
public int VideoStitching(int[][] clips, int T)
{
var dictionary = CreateClipDictionary(clips);
var periodStart = 0;
var periodEnd = 0;
var minClipsNeeded = 0;
do
{
var periodEndCandidates = GetEndCandidates(dictionary, periodStart, periodEnd);
if (!periodEndCandidates.Any())
return -1;
periodStart = periodEnd + 1;
periodEnd = periodEndCandidates.Max();
minClipsNeeded++;
} while (periodEnd < T);
return minClipsNeeded;
}
private int[] GetEndCandidates(Dictionary<int, int[]> dict, int start, int end)
{
var candidates = new List<int>();
for (var index = start; index <= end; index++)
{
if (dict.ContainsKey(index))
{
candidates.AddRange(dict[index]);
}
}
return candidates.ToArray();
}
private Dictionary<int, int[]> CreateClipDictionary(int[][] clips)
{
var dictionary = new Dictionary<int, List<int>>();
foreach (var clip in clips)
{
var clipStart = clip[0];
var clipEnd = clip[1];
if (dictionary.ContainsKey(clipStart))
{
dictionary[clipStart].Add(clipEnd);
}
else
{
dictionary[clipStart] = new List<int> { clipEnd };
}
}
return dictionary.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.ToArray());
}
}

LeetCode: Longest Substring Without Repeating Characters (Solution)

Link to the description of the task: https://leetcode.com/problems/longest-substring-without-repeating-characters/

public class Solution
{
public int LengthOfLongestSubstring(string s)
{
var given = s.ToCharArray();
var charsPositions = new Dictionary<char, int>();
int startPosition = 0, maxLength = 0;
for (var endPosition = 0; endPosition < given.Count(); endPosition++)
{
var ch = given[endPosition];
if (!charsPositions.ContainsKey(ch))
{
charsPositions.Add(ch, endPosition);
maxLength = Math.Max(maxLength, charsPositions.Count);
}
else
{
var prevCharPosition = charsPositions[ch];
foreach (var charToRemove in given[startPosition..prevCharPosition])
{
charsPositions.Remove(charToRemove);
}
charsPositions[ch] = endPosition;
startPosition = prevCharPosition + 1;
}
}
return maxLength;
}
}

LeetCode: The Earliest Moment When Everyone Become Friends (Solution with Time Complexity Analysis)

Link to the description of the task: https://leetcode.com/problems/the-earliest-moment-when-everyone-become-friends/

Straightforward Solution:

using System;
using System.Collections.Generic;
using System.Linq;
namespace ConsoleApp1
{
class Program
{
static void Main(string[] args)
{
var log = new FrienshipRecord[] {
new FrienshipRecord() { Time = 0, Member1 = "a", Member2 = "b" },
new FrienshipRecord() { Time = 1, Member1 = "c", Member2 = "d" },
new FrienshipRecord() { Time = 2, Member1 = "d", Member2 = "e" },
new FrienshipRecord() { Time = 3, Member1 = "f", Member2 = "e" },
new FrienshipRecord() { Time = 4, Member1 = "a", Member2 = "f" },
new FrienshipRecord() { Time = 5, Member1 = "c", Member2 = "a" },
};
var socialNetwork = new List<List<string>>();
int totalMembersCount = 6;
foreach (var record in log)
{
var group1 = socialNetwork.FirstOrDefault(g => g.Contains(record.Member1)); // O(n)
var group2 = socialNetwork.FirstOrDefault(g => g.Contains(record.Member2)); // O(n)
switch (group1 is null, group2 is null)
{
case (false, false) when group1 == group2:
// do nothing
break;
case (false, false) when group1 != group2:
group1.AddRange(group2); // O(l)
socialNetwork.Remove(group2); // O(1)
break;
case (false, true):
group1.Add(record.Member2); // O(1) or O(k)
break;
case (true, false):
group2.Add(record.Member1); // O(1) or O(k)
break;
case (true, true):
socialNetwork.Add(new List<string>() { record.Member1, record.Member2 }); // O(2)
break;
}
// If the social network contains only one group
// and also the grop contains all the members
if (socialNetwork.Count == 1 && socialNetwork[0].Count == totalMembersCount) // O(1)
{
Console.WriteLine("All members were friends at " + record.Time);
return;
}
}
Console.WriteLine("Not all of the members are friends yet");
}
}
internal struct FrienshipRecord
{
internal int Time;
internal string Member1;
internal string Member2;
}
}

Time Complexity Analysis:

List.Add(newFriend) -> O(1) if Count is less than Capacity, otherwise O(k) where k is Count

List.AddRange(anotherList) -> O(l) if Capacity is less than Count plus l, otherwise O(Count + l) where l is anotherList.Count

List.Contains(member) -> O(k) where k is Count

Loop:

  1. Take a record from the log
  2. Get groups ids for each member {List.Contains(member)} -> 2 * O(n)
  3. If the ids are the same then do nothing
  4. If one of the ids is null then add the member {List.Add(newFriend) -> O(1) or O(k)
  5. If both are null then create new Group {new List{a, b}} -> O(2)
  6. If not null and different then merge groups {List.AddRange} -> O(l)
  7. If the base List.Count is 1 and List[0].Count equals n then the current record is the answer

The total complexity is m * O(n). Also, m must be equal or larger then log(n) to make all members connected. This way the solution complexity is at least O(log(n) * n)

Two problems with this method:

  1. Getting a group id for a member is too slow (O(n))
  2. Merging groups is slow (O(n)) in the worst-case scenario

Straightforward Solution with HashSet:

using System;
using System.Collections.Generic;
using System.Linq;
namespace ConsoleApp1
{
class Program
{
static void Main(string[] args)
{
var log = new FrienshipRecord[] {
new FrienshipRecord() { Time = 0, Member1 = "a", Member2 = "b" },
new FrienshipRecord() { Time = 1, Member1 = "c", Member2 = "d" },
new FrienshipRecord() { Time = 2, Member1 = "d", Member2 = "e" },
new FrienshipRecord() { Time = 3, Member1 = "f", Member2 = "e" },
new FrienshipRecord() { Time = 4, Member1 = "a", Member2 = "f" },
new FrienshipRecord() { Time = 5, Member1 = "c", Member2 = "a" },
};
var socialNetwork = new List<HashSet<string>>();
int totalMembersCount = 6;
foreach (var record in log)
{
var group1 = socialNetwork.FirstOrDefault(g => g.Contains(record.Member1));
var group2 = socialNetwork.FirstOrDefault(g => g.Contains(record.Member2));
switch (group1 is null, group2 is null)
{
case (false, false) when group1 == group2:
// do nothing
break;
case (false, false) when group1 != group2:
MergeGroups(socialNetwork, group1, group2);
break;
case (false, true):
group1.Add(record.Member2);
break;
case (true, false):
group2.Add(record.Member1);
break;
case (true, true):
socialNetwork.Add(new HashSet<string>() { record.Member1, record.Member2 });
break;
}
// If the social network contains only one group
// and also the grop contains all the members
if (socialNetwork.Count == 1 && socialNetwork[0].Count == totalMembersCount)
{
Console.WriteLine("All members were friends at " + record.Time);
return;
}
}
Console.WriteLine("Not all of the members are friends yet");
}
private static void MergeGroups(
List<HashSet<string>> socialNetwork,
HashSet<string> group1,
HashSet<string> group2)
{
if (group1.Count > group2.Count)
{
group1.UnionWith(group2);
socialNetwork.Remove(group2);
}
else
{
group2.UnionWith(group1);
socialNetwork.Remove(group1);
}
}
}
internal struct FrienshipRecord
{
internal int Time;
internal string Member1;
internal string Member2;
}
}

Time Complexity Analysis:

HashSet.Add(newFriend) -> O(1) if Count is less than Capacity, otherwise O(k) where k is Count

HashSet.UnionWith(anotherHashSet) -> O(l) if Capacity is less than Count plus l, otherwise O(Count + l) where l is anotherHashSet.Count

HashSet.Contains(member) -> O(1)

Loop:

  1. Take a record from the log
  2. Get groups ids for each member {HashSet.Contains(member)} -> 2 * O(1)
  3. If the ids are the same then do nothing
  4. If one of the ids is null then add the member {HashSet.Add(newFriend) -> O(1) or O(k)
  5. If both are null then create new Group {new HashSet{a, b}} -> O(2)
  6. If not null and different then merge groups {HashSet.UnionWith} -> O(l)
  7. If the base List.Count is 1 and List[0].Count equals n then the current record is the answer

The total complexity is m * O(l). It is possible to minimize l by comparing HashSet.Count and anotherHashSet.Count, and then adding smallest to the biggest.

This way the total complexity is about O(m * log(n)) which makes the answer acceptable.

LeetCode: Add Two Numbers (Solution)

Link to the description of the task: https://leetcode.com/problems/add-two-numbers/

public class ListNode
{
public int val;
public ListNode next;
public ListNode(int val = 0, ListNode next = null)
{
this.val = val;
this.next = next;
}
}
public class Solution
{
public ListNode AddTwoNumbers(ListNode firstListNode, ListNode secondListNode)
{
var currentNodes = (first: firstListNode, second: secondListNode);
var carry = 0;
var resultFirstNode = SumNodes(currentNodes, ref carry);
var resultLastNode = resultFirstNode;
while (TryMoveForward(ref currentNodes))
{
Extend(ref resultLastNode, SumNodes(currentNodes, ref carry));
}
if (carry > 0)
{
Extend(ref resultLastNode, new ListNode(carry));
}
return resultFirstNode;
}
private static bool HaveComeToEnd((ListNode first, ListNode second) nodes) =>
nodes.first is null && nodes.second is null;
private static void Extend(ref ListNode node, ListNode extension)
{
node.next = extension;
node = node.next;
}
private static bool TryMoveForward(ref (ListNode first, ListNode second) currentNodes)
{
currentNodes.first = currentNodes.first?.next;
currentNodes.second = currentNodes.second?.next;
return !HaveComeToEnd(currentNodes);
}
private static ListNode SumNodes((ListNode first, ListNode second) nodes, ref int carry)
{
var firstDigit = nodes.first is null ? 0 : nodes.first.val;
var secondDigit = nodes.second is null ? 0 : nodes.second.val;
var sum = SumDigits(firstDigit, secondDigit, ref carry);
return new ListNode(sum);
}
private static int SumDigits(int first, int second, ref int carry)
{
var total = first + second + carry;
var sum = total % 10;
carry = (total - sum) / 10;
return sum;
}
}

Conclusion:

It looks pretty nice!