perf(thread saftey): All Engines are now thread safe
Previously engines were not thread safe, a seperate engine would be needed for every thread. This is no longer the case. This allows for much more efficient parallel execution
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
// ReSharper disable CppUnusedIncludeDirective
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <format>
|
||||
|
||||
#include "gridfire/gridfire.h"
|
||||
#include <cppad/utility/thread_alloc.hpp> // Required for parallel_setup
|
||||
|
||||
#include "fourdst/composition/composition.h"
|
||||
#include "fourdst/logging/logging.h"
|
||||
@@ -17,7 +20,15 @@
|
||||
#include <clocale>
|
||||
|
||||
#include "gridfire/reaction/reaclib.h"
|
||||
#include <omp.h>
|
||||
|
||||
unsigned long get_thread_id() {
|
||||
return static_cast<unsigned long>(omp_get_thread_num());
|
||||
}
|
||||
|
||||
bool in_parallel() {
|
||||
return omp_in_parallel() != 0;
|
||||
}
|
||||
|
||||
static std::terminate_handler g_previousHandler = nullptr;
|
||||
static std::vector<std::pair<double, std::unordered_map<std::string, std::pair<double, double>>>> g_callbackHistory;
|
||||
@@ -110,14 +121,14 @@ void log_results(const gridfire::NetOut& netOut, const gridfire::NetIn& netIn) {
|
||||
std::vector<std::string> rowLabels = [&]() -> std::vector<std::string> {
|
||||
std::vector<std::string> labels;
|
||||
for (const auto& species : logSpecies) {
|
||||
labels.push_back(std::string(species.name()));
|
||||
labels.emplace_back(species.name());
|
||||
}
|
||||
labels.push_back("ε");
|
||||
labels.push_back("dε/dT");
|
||||
labels.push_back("dε/dρ");
|
||||
labels.push_back("Eν");
|
||||
labels.push_back("Fν");
|
||||
labels.push_back("<μ>");
|
||||
labels.emplace_back("ε");
|
||||
labels.emplace_back("dε/dT");
|
||||
labels.emplace_back("dε/dρ");
|
||||
labels.emplace_back("Eν");
|
||||
labels.emplace_back("Fν");
|
||||
labels.emplace_back("<μ>");
|
||||
return labels;
|
||||
}();
|
||||
|
||||
@@ -145,13 +156,13 @@ void record_abundance_history_callback(const gridfire::solver::CVODESolverStrate
|
||||
const auto& engine = ctx.engine;
|
||||
// std::unordered_map<std::string, std::pair<double, double>> abundances;
|
||||
std::vector<double> Y;
|
||||
for (const auto& species : engine.getNetworkSpecies()) {
|
||||
const size_t sid = engine.getSpeciesIndex(species);
|
||||
for (const auto& species : engine.getNetworkSpecies(ctx.state_ctx)) {
|
||||
const size_t sid = engine.getSpeciesIndex(ctx.state_ctx, species);
|
||||
double y = N_VGetArrayPointer(ctx.state)[sid];
|
||||
Y.push_back(y > 0.0 ? y : 0.0); // Regularize tiny negative abundances to zero
|
||||
}
|
||||
|
||||
fourdst::composition::Composition comp(engine.getNetworkSpecies(), Y);
|
||||
fourdst::composition::Composition comp(engine.getNetworkSpecies(ctx.state_ctx), Y);
|
||||
|
||||
|
||||
std::unordered_map<std::string, std::pair<double, double>> abundances;
|
||||
@@ -225,45 +236,116 @@ void callback_main(const gridfire::solver::CVODESolverStrategy::TimestepContext&
|
||||
record_abundance_history_callback(ctx);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int main() {
|
||||
using namespace gridfire;
|
||||
|
||||
CLI::App app{"GridFire Sandbox Application."};
|
||||
|
||||
constexpr size_t breaks = 100;
|
||||
constexpr size_t breaks = 1;
|
||||
double temp = 1.5e7;
|
||||
double rho = 1.5e2;
|
||||
double tMax = 3.1536e+17/breaks;
|
||||
double tMax = 3.1536e+16/breaks;
|
||||
|
||||
app.add_option("-t,--temp", temp, "Temperature in K (Default 1.5e7K)");
|
||||
app.add_option("-r,--rho", rho, "Density in g/cm^3 (Default 1.5e2g/cm^3)");
|
||||
app.add_option("--tmax", tMax, "Maximum simulation time in s (Default 3.1536e17s)");
|
||||
|
||||
CLI11_PARSE(app, argc, argv);
|
||||
|
||||
NetIn netIn = init(temp, rho, tMax);
|
||||
const NetIn netIn = init(temp, rho, tMax);
|
||||
|
||||
policy::MainSequencePolicy stellarPolicy(netIn.composition);
|
||||
stellarPolicy.construct();
|
||||
engine::DynamicEngine& engine = stellarPolicy.construct();
|
||||
policy::ConstructionResults construct = stellarPolicy.construct();
|
||||
std::println("Sandbox Engine Stack: {}", stellarPolicy);
|
||||
std::println("Scratch Blob State: {}", *construct.scratch_blob);
|
||||
|
||||
solver::CVODESolverStrategy solver(engine);
|
||||
solver.set_stdout_logging_enabled(false);
|
||||
// solver.set_callback(solver::CVODESolverStrategy::TimestepCallback(callback_main));
|
||||
|
||||
fourdst::composition::Composition reinputComp = netIn.composition;
|
||||
NetOut netOut;
|
||||
const auto timer = std::chrono::high_resolution_clock::now();
|
||||
for (int i = 0; i < breaks; ++i) {
|
||||
NetIn in({.composition = reinputComp, .temperature = temp, .density = rho, .tMax = tMax, .dt0 = 1e-12});
|
||||
netOut = solver.evaluate(in, false, false);
|
||||
reinputComp = netOut.composition;
|
||||
constexpr size_t runs = 1000;
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// arrays to store timings
|
||||
std::array<std::chrono::duration<double>, runs> setup_times;
|
||||
std::array<std::chrono::duration<double>, runs> eval_times;
|
||||
std::array<NetOut, runs> serial_results;
|
||||
for (size_t i = 0; i < runs; ++i) {
|
||||
auto start_setup_time = std::chrono::high_resolution_clock::now();
|
||||
std::print("Run {}/{}\r", i + 1, runs);
|
||||
solver::CVODESolverStrategy solver(construct.engine, *construct.scratch_blob);
|
||||
// solver.set_callback(solver::CVODESolverStrategy::TimestepCallback(callback_main));
|
||||
solver.set_stdout_logging_enabled(false);
|
||||
auto end_setup_time = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> setup_elapsed = end_setup_time - start_setup_time;
|
||||
setup_times[i] = setup_elapsed;
|
||||
|
||||
auto start_eval_time = std::chrono::high_resolution_clock::now();
|
||||
const NetOut netOut = solver.evaluate(netIn);
|
||||
auto end_eval_time = std::chrono::high_resolution_clock::now();
|
||||
serial_results[i] = netOut;
|
||||
std::chrono::duration<double> eval_elapsed = end_eval_time - start_eval_time;
|
||||
eval_times[i] = eval_elapsed;
|
||||
|
||||
// log_results(netOut, netIn);
|
||||
}
|
||||
auto endTime = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> elapsed = endTime - startTime;
|
||||
std::println("");
|
||||
|
||||
// Summarize serial timings
|
||||
double total_setup_time = 0.0;
|
||||
double total_eval_time = 0.0;
|
||||
for (size_t i = 0; i < runs; ++i) {
|
||||
total_setup_time += setup_times[i].count();
|
||||
total_eval_time += eval_times[i].count();
|
||||
}
|
||||
std::println("Average Setup Time over {} runs: {:.6f} seconds", runs, total_setup_time / runs);
|
||||
std::println("Average Evaluation Time over {} runs: {:.6f} seconds", runs, total_eval_time / runs);
|
||||
std::println("Total Time for {} runs: {:.6f} seconds", runs, elapsed.count());
|
||||
std::println("Final H-1 Abundances Serial: {}", serial_results[0].composition.getMolarAbundance(fourdst::atomic::H_1));
|
||||
|
||||
CppAD::thread_alloc::parallel_setup(
|
||||
static_cast<size_t>(omp_get_max_threads()), // Max threads
|
||||
[]() -> bool { return in_parallel(); }, // Function to get thread ID
|
||||
[]() -> size_t { return get_thread_id(); } // Function to check parallel state
|
||||
);
|
||||
|
||||
// OPTIONAL: Prevent CppAD from returning memory to the system
|
||||
// during execution to reduce overhead (can speed up tight loops)
|
||||
CppAD::thread_alloc::hold_memory(true);
|
||||
|
||||
std::array<NetOut, runs> parallelResults;
|
||||
std::array<std::chrono::duration<double>, runs> setupTimes;
|
||||
std::array<std::chrono::duration<double>, runs> evalTimes;
|
||||
std::array<std::unique_ptr<gridfire::engine::scratch::StateBlob>, runs> workspaces;
|
||||
for (size_t i = 0; i < runs; ++i) {
|
||||
workspaces[i] = construct.scratch_blob->clone_structure();
|
||||
}
|
||||
const auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - timer).count();
|
||||
std::cout << "Average execution time over run: " << duration/breaks << " ms" << std::endl;
|
||||
std::cout << "Total execution time over " << breaks << " runs: " << duration << " ms" << std::endl;
|
||||
|
||||
|
||||
log_results(netOut, netIn);
|
||||
// log_callback_data(temp);
|
||||
}
|
||||
// Parallel runs
|
||||
startTime = std::chrono::high_resolution_clock::now();
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < runs; ++i) {
|
||||
auto start_setup_time = std::chrono::high_resolution_clock::now();
|
||||
solver::CVODESolverStrategy solver(construct.engine, *workspaces[i]);
|
||||
solver.set_stdout_logging_enabled(false);
|
||||
auto end_setup_time = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> setup_elapsed = end_setup_time - start_setup_time;
|
||||
setupTimes[i] = setup_elapsed;
|
||||
auto start_eval_time = std::chrono::high_resolution_clock::now();
|
||||
parallelResults[i] = solver.evaluate(netIn);
|
||||
auto end_eval_time = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> eval_elapsed = end_eval_time - start_eval_time;
|
||||
evalTimes[i] = eval_elapsed;
|
||||
}
|
||||
endTime = std::chrono::high_resolution_clock::now();
|
||||
elapsed = endTime - startTime;
|
||||
std::println("");
|
||||
|
||||
// Summarize parallel timings
|
||||
total_setup_time = 0.0;
|
||||
total_eval_time = 0.0;
|
||||
for (size_t i = 0; i < runs; ++i) {
|
||||
total_setup_time += setupTimes[i].count();
|
||||
total_eval_time += evalTimes[i].count();
|
||||
}
|
||||
|
||||
std::println("Average Parallel Setup Time over {} runs: {:.6f} seconds", runs, total_setup_time / runs);
|
||||
std::println("Average Parallel Evaluation Time over {} runs: {:.6f} seconds", runs, total_eval_time / runs);
|
||||
std::println("Total Parallel Time for {} runs: {:.6f} seconds", runs, elapsed.count());
|
||||
|
||||
std::println("Final H-1 Abundances Parallel: {}", utils::iterable_to_delimited_string(parallelResults, ",", [](const auto& result) {
|
||||
return result.composition.getMolarAbundance(fourdst::atomic::H_1);
|
||||
}));
|
||||
}
|
||||
@@ -4,8 +4,8 @@ executable(
|
||||
dependencies: [gridfire_dep, cli11_dep],
|
||||
)
|
||||
|
||||
executable(
|
||||
'spectral_sandbox',
|
||||
'spectral_main.cpp',
|
||||
dependencies: [gridfire_dep, cli11_dep]
|
||||
)
|
||||
#executable(
|
||||
# 'spectral_sandbox',
|
||||
# 'spectral_main.cpp',
|
||||
# dependencies: [gridfire_dep, cli11_dep]
|
||||
#)
|
||||
|
||||
Reference in New Issue
Block a user