perf(Composition): Internally switched from map -> vector

This brings a major performance improvment as all memory is contiguous
on the heap rather than spread around.
This commit is contained in:
2025-12-08 11:31:46 -05:00
parent 184df676ca
commit 284e8cd10a
17 changed files with 909 additions and 475 deletions

View File

@@ -0,0 +1,123 @@
#include "benchmark_utils.h"
#include "fourdst/composition/composition.h"
#include "fourdst/atomic/species.h"
#include <chrono>
#include <random>
#include <ranges>
std::chrono::duration<double, std::nano> benchmark_construction(const size_t iterations, const size_t nSpecies) {
using namespace fourdst::composition;
using namespace fourdst::atomic;
// Setup random machine to get random double between 0 and 1 for molar abundances
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(0.0, 1.0);
std::vector<Species> species_to_register;
std::vector<double> molarAbundances;
size_t count = 0;
for (const auto& sp : species | std::views::values) {
if (count >= nSpecies) {
break;
}
species_to_register.push_back(sp);
molarAbundances.push_back(dis(gen));
count++;
}
const auto duration = fdst_benchmark_function([&]() {
for (size_t i = 0; i < iterations; ++i) {
fourdst::composition::Composition comp(species_to_register, molarAbundances);
}
});
return duration / static_cast<double>(iterations);
}
std::chrono::duration<double, std::nano> benchmark_access(const size_t iterations, const size_t nSpecies) {
using namespace fourdst::composition;
using namespace fourdst::atomic;
// Setup random machine to get random double between 0 and 1 for molar abundances
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(0.0, 1.0);
std::vector<Species> species_to_register;
std::vector<double> molarAbundances;
size_t count = 0;
for (const auto& sp : species | std::views::values) {
if (count >= nSpecies) {
break;
}
species_to_register.push_back(sp);
molarAbundances.push_back(dis(gen));
count++;
}
const Composition comp(species_to_register, molarAbundances);
std::uniform_int_distribution<>(0, nSpecies - 1);
std::vector<Species> random_lookup_species;
for (size_t i = 0; i < iterations; ++i) {
random_lookup_species.push_back(species_to_register[static_cast<size_t>(dis(gen))]);
}
const auto duration = fdst_benchmark_function([&]() {
for (size_t i = 0; i < iterations; ++i) {
volatile double y = comp.getMolarAbundance(random_lookup_species[i]);
do_not_optimize(y);
}
});
return duration / static_cast<double>(iterations);
}
int main () {
constexpr size_t nIterations = 1000;
constexpr size_t nSpecies = 100;
std::vector<double> durations;
durations.resize(nIterations);
for (size_t i = 0; i < nIterations; ++i) {
std::print("Iteration {}/{}\r", i + 1, nIterations);
auto duration = benchmark_construction(10, nSpecies);
durations[i] = duration.count();
}
std::println("");
std::println("Average time to construct composition over {} iterations: {} ns", nIterations,
std::accumulate(durations.begin(), durations.end(), 0.0) / nIterations);
std::println("Max time to construct composition over {} iterations: {} ns", nIterations,
*std::ranges::max_element(durations));
std::println("Min time to construct composition over {} iterations: {} ns", nIterations,
*std::ranges::min_element(durations));
plot_ascii_histogram(durations, "Composition Construction Time Histogram");
durations.clear();
durations.resize(nIterations);
for (size_t i = 0; i < nIterations; ++i) {
std::print("Iteration {}/{}\r", i + 1, nIterations);
auto duration = benchmark_access(1000, nSpecies);
durations[i] = duration.count();
}
std::println("");
std::println("Average time to access composition over {} iterations: {} ns", nIterations,
std::accumulate(durations.begin(), durations.end(), 0.0) / nIterations);
std::println("Max time to access composition over {} iterations: {} ns", nIterations,
*std::ranges::max_element(durations));
std::println("Min time to access composition over {} iterations: {} ns", nIterations,
*std::ranges::min_element(durations));
plot_ascii_histogram(durations, "Composition Access Time Histogram");
}

View File

@@ -0,0 +1 @@
executable('construction_and_iteration_bench', 'benchmark_composition_construction_and_iteration.cpp', dependencies: [composition_dep], include_directories: [benchmark_utils_includes])

View File

@@ -1,58 +1,16 @@
#include "fourdst/composition/composition.h"
#include "fourdst/composition/utils/composition_hash.h"
#include "fourdst/composition/utils.h"
#include "fourdst/atomic/atomicSpecies.h"
#include "fourdst/atomic/species.h"
#include <chrono>
#include <numeric>
#include <print>
#include <string>
#include <vector>
#include <cstdint>
#include <ranges>
#include <chrono>
template <class T>
void do_not_optimize(T&& datum) {
asm volatile("" : "+r" (datum));
}
#include "benchmark_utils.h"
uint32_t calc_num_bins(const std::vector<double>& data) {
// Use Sturges' formula
const size_t n = data.size();
return static_cast<uint32_t>(std::ceil(std::log2(n) + 1));
}
std::string plot_ascii_histogram(std::vector<double> data, std::string title) {
// Use std::format
const uint32_t nBins = calc_num_bins(data);
const double minVal = *std::ranges::min_element(data);
const double maxVal = *std::ranges::max_element(data);
std::string histogram;
histogram += std::format("{:^60}\n", title);
histogram += std::string(60, '=') + "\n";
std::vector<uint32_t> bins(nBins, 0);
const double binWidth = (maxVal - minVal) / nBins;
for (const auto& value : data) {
const uint32_t binIndex = static_cast<uint32_t>((value - minVal) / binWidth);
if (binIndex < nBins) {
bins[binIndex]++;
} else {
bins[nBins - 1]++;
}
}
const uint32_t maxBinCount = *std::ranges::max_element(bins);
for (uint32_t i = 0; i < nBins; ++i) {
const double binStart = minVal + i * binWidth;
const double binEnd = binStart + binWidth;
const uint32_t barLength = static_cast<uint32_t>(std::round((static_cast<double>(bins[i]) / maxBinCount) * 50.0));
histogram += std::format("[{:.2e}, {:.2e}): {:>15} | {:}\n",
binStart, binEnd, bins[i], std::string(barLength, '*'));
}
return histogram;
}
std::chrono::duration<double, std::nano> build_and_hash_compositions(const size_t iter, const size_t nSpecies = 8) {
using namespace fourdst::composition;
@@ -69,15 +27,14 @@ std::chrono::duration<double, std::nano> build_and_hash_compositions(const size_
count++;
}
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < iter; ++i) {
uint64_t hashValue = utils::CompositionHash::hash_exact(comp);
do_not_optimize(hashValue);
}
const auto end = std::chrono::high_resolution_clock::now();
const auto duration = fdst_benchmark_function([&]() {
for (size_t i = 0; i < iter; ++i) {
uint64_t hashValue = utils::CompositionHash::hash_exact(comp);
do_not_optimize(hashValue);
}
});
const std::chrono::duration<double, std::nano> duration = (end - start)/iter;
return duration;
return duration / static_cast<double>(iter);
}
int main() {

View File

@@ -1 +1 @@
executable('hashing_bench', 'benchmark_composition_hash.cpp', dependencies: [composition_dep])
executable('hashing_bench', 'benchmark_composition_hash.cpp', dependencies: [composition_dep], include_directories: [benchmark_utils_includes])

View File

@@ -1 +1,4 @@
subdir('hashing')
benchmark_utils_includes = include_directories('utils')
subdir('hashing')
subdir('ConstructionAndIteration')

View File

@@ -0,0 +1,65 @@
#pragma once
#include <algorithm>
#include <cstdint>
#include <vector>
#include <string>
#include <cmath>
#include <format>
#include <chrono>
template <class T>
void do_not_optimize(T&& datum) {
asm volatile("" : "+r" (datum));
}
inline uint32_t calc_num_bins(const std::vector<double>& data) {
const size_t n = data.size();
return static_cast<uint32_t>(std::ceil(std::log2(n) + 1));
}
inline std::string plot_ascii_histogram(std::vector<double> data, std::string title) {
const uint32_t nBins = calc_num_bins(data);
const double minVal = *std::ranges::min_element(data);
const double maxVal = *std::ranges::max_element(data);
std::string histogram;
histogram += std::format("{:^60}\n", title);
histogram += std::string(60, '=') + "\n";
std::vector<uint32_t> bins(nBins, 0);
const double binWidth = (maxVal - minVal) / nBins;
for (const auto& value : data) {
const uint32_t binIndex = static_cast<uint32_t>((value - minVal) / binWidth);
if (binIndex < nBins) {
bins[binIndex]++;
} else {
bins[nBins - 1]++;
}
}
const uint32_t maxBinCount = *std::ranges::max_element(bins);
for (uint32_t i = 0; i < nBins; ++i) {
const double binStart = minVal + i * binWidth;
const double binEnd = binStart + binWidth;
const uint32_t barLength = static_cast<uint32_t>(std::round((static_cast<double>(bins[i]) / maxBinCount) * 50.0));
histogram += std::format("[{:.2e}, {:.2e}): {:>15} | {:}\n",
binStart, binEnd, bins[i], std::string(barLength, '*'));
}
return histogram;
}
template <typename Func>
auto fdst_benchmark_function(Func&& func_call) {
auto start = std::chrono::high_resolution_clock::now();
// Forward the callable
std::forward<Func>(func_call)();
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
do_not_optimize(duration.count());
return duration;
}