perf(openMP): added openMP support

Note that currently this actually slows the code down. Spinning up the threads and tearing them down is expensive
This commit is contained in:
2025-12-06 13:48:12 -05:00
parent 4e2b3cb11f
commit 67dde830af
7 changed files with 314 additions and 132 deletions

View File

@@ -31,3 +31,7 @@ else
message('enabling default visibility for C++ symbols')
add_project_arguments('-fvisibility=default', language: 'cpp')
endif
if get_option('openmp_support')
add_project_arguments('-DGRIDFIRE_USE_OPENMP', language: 'cpp')
endif

View File

@@ -9,3 +9,4 @@ option('plugin_support', type: 'boolean', value: false, description: 'Enable sup
option('python_target_version', type: 'string', value: '3.13', description: 'Target version for python compilation, only used for cross compilation')
option('build_c_api', type: 'boolean', value: true, description: 'compile the C API')
option('build_tools', type: 'boolean', value: true, description: 'build the GridFire command line tools')
option('openmp_support', type: 'boolean', value: false, description: 'Enable OpenMP support for parallelization')

View File

@@ -97,7 +97,7 @@ namespace gridfire::engine {
*
* @see engine_abstract.h
*/
class GraphEngine final : public DynamicEngine{
class GraphEngine final : public DynamicEngine {
public:
/**
* @brief Constructs a GraphEngine from a composition.
@@ -855,6 +855,12 @@ namespace gridfire::engine {
const reaction::Reaction& m_reaction;
const GraphEngine& m_engine;
};
struct PrecomputationKernelResults {
std::vector<double> dydt_vector;
double total_neutrino_energy_loss_rate{0.0};
double total_neutrino_flux{0.0};
};
private:
Config<config::GridFireConfig> m_config;
quill::Logger* m_logger = LogManager::getInstance().getLogger("log");
@@ -959,6 +965,42 @@ namespace gridfire::engine {
*/
[[nodiscard]] bool validateConservation() const;
double compute_reaction_flow(
const std::vector<double> &local_abundances,
const std::vector<double> &screening_factors,
const std::vector<double> &bare_rates,
const std::vector<double> &bare_reverse_rates,
double rho,
size_t reactionCounter,
const reaction::Reaction &reaction,
size_t reactionIndex,
const PrecomputedReaction &precomputedReaction
) const;
std::pair<double, double> compute_neutrino_fluxes(
double netFlow,
const reaction::Reaction &reaction) const;
PrecomputationKernelResults accumulate_flows_serial(
const std::vector<double>& local_abundances,
const std::vector<double>& screening_factors,
const std::vector<double>& bare_rates,
const std::vector<double>& bare_reverse_rates,
double rho,
const reaction::ReactionSet& activeReactions
) const;
#ifdef GRIDFIRE_USE_OPENMP
PrecomputationKernelResults accumulate_flows_parallel(
const std::vector<double>& local_abundances,
const std::vector<double>& screening_factors,
const std::vector<double>& bare_rates,
const std::vector<double>& bare_reverse_rates,
double rho,
const reaction::ReactionSet& activeReactions
) const;
#endif
[[nodiscard]] StepDerivatives<double> calculateAllDerivativesUsingPrecomputation(
const fourdst::composition::CompositionAbstract &comp,

View File

@@ -878,6 +878,8 @@ namespace gridfire::reaction {
[[nodiscard]] std::optional<std::unique_ptr<Reaction>> get(const std::string_view& id) const;
[[nodiscard]] std::unique_ptr<Reaction> get(size_t index) const;
/**
* @brief Removes a reaction from the set.
* @param reaction The Reaction to remove.

View File

@@ -28,6 +28,10 @@
#include "cppad/utility/sparse_rc.hpp"
#include "cppad/utility/sparse_rcv.hpp"
#ifdef GRIDFIRE_USE_OPENMP
#include <omp.h>
#endif
namespace {
enum class REACLIB_WEAK_TYPES {
@@ -403,6 +407,167 @@ namespace gridfire::engine {
return true; // All reactions passed the conservation check
}
double GraphEngine::compute_reaction_flow(
const std::vector<double> &local_abundances,
const std::vector<double> &screening_factors,
const std::vector<double> &bare_rates,
const std::vector<double> &bare_reverse_rates,
const double rho,
const size_t reactionCounter,
const reaction::Reaction &reaction,
const size_t reactionIndex,
const PrecomputedReaction &precomputedReaction
) const {
double forwardAbundanceProduct = 1.0;
for (size_t i = 0; i < precomputedReaction.unique_reactant_indices.size(); ++i) {
const size_t reactantIndex = precomputedReaction.unique_reactant_indices[i];
const int power = precomputedReaction.reactant_powers[i];
const double abundance = local_abundances[reactantIndex];
double factor;
if (power == 1) { factor = abundance; }
else if (power == 2) { factor = abundance * abundance; }
else { factor = std::pow(abundance, power); }
if (!std::isfinite(factor)) {
LOG_CRITICAL(m_logger, "Non-finite factor encountered in forward abundance product for reaction '{}'. Check input abundances for validity.", reaction.id());
throw exceptions::BadRHSEngineError("Non-finite factor encountered in forward abundance product.");
}
forwardAbundanceProduct *= factor;
}
const double bare_rate = bare_rates.at(reactionCounter);
const double screeningFactor = screening_factors[reactionCounter];
const size_t numReactants = m_reactions[reactionIndex].reactants().size();
const size_t numProducts = m_reactions[reactionIndex].products().size();
const double forwardMolarReactionFlow = screeningFactor *
bare_rate *
precomputedReaction.symmetry_factor *
forwardAbundanceProduct *
std::pow(rho, numReactants > 1 ? static_cast<double>(numReactants) - 1 : 0.0);
if (!std::isfinite(forwardMolarReactionFlow)) {
LOG_CRITICAL(m_logger, "Non-finite forward molar reaction flow computed for reaction '{}'. Check input abundances and rates for validity.", reaction.id());
throw exceptions::BadRHSEngineError("Non-finite forward molar reaction flow computed.");
}
double reverseMolarReactionFlow = 0.0;
if (precomputedReaction.reverse_symmetry_factor != 0.0 and m_useReverseReactions) {
const double bare_reverse_rate = bare_reverse_rates.at(reactionCounter);
double reverseAbundanceProduct = 1.0;
for (size_t i = 0; i < precomputedReaction.unique_product_indices.size(); ++i) {
const size_t productIndex = precomputedReaction.unique_product_indices[i];
reverseAbundanceProduct *= std::pow(local_abundances[productIndex], precomputedReaction.product_powers[i]);
}
reverseMolarReactionFlow = screeningFactor *
bare_reverse_rate *
precomputedReaction.reverse_symmetry_factor *
reverseAbundanceProduct *
std::pow(rho, numProducts > 1 ? static_cast<double>(numProducts) - 1 : 0.0);
}
return forwardMolarReactionFlow - reverseMolarReactionFlow;
}
std::pair<double, double> GraphEngine::compute_neutrino_fluxes(
const double netFlow,
const reaction::Reaction &reaction
) const {
if (reaction.type() == reaction::ReactionType::REACLIB_WEAK) {
const double q_abs = std::abs(reaction.qValue());
const REACLIB_WEAK_TYPES weakType = get_weak_reaclib_reaction_type(reaction);
double neutrino_loss_fraction = 0.0;
switch (weakType) {
case REACLIB_WEAK_TYPES::BETA_PLUS_DECAY:
[[fallthrough]];
case REACLIB_WEAK_TYPES::BETA_MINUS_DECAY:
neutrino_loss_fraction = 0.5; // Approximate 50% energy loss to neutrinos for beta decays
break;
case REACLIB_WEAK_TYPES::ELECTRON_CAPTURE:
[[fallthrough]];
case REACLIB_WEAK_TYPES::POSITRON_CAPTURE:
neutrino_loss_fraction = 1.0;
break;
default: ;
}
const double local_neutrino_loss = netFlow * q_abs * neutrino_loss_fraction * m_constants.Na * m_constants.MeV_to_erg;
const double local_neutrino_flux = netFlow * m_constants.Na;
return {local_neutrino_loss, local_neutrino_flux};
}
return {0.0, 0.0};
}
GraphEngine::PrecomputationKernelResults GraphEngine::accumulate_flows_serial(
const std::vector<double> &local_abundances,
const std::vector<double> &screening_factors,
const std::vector<double> &bare_rates,
const std::vector<double> &bare_reverse_rates,
const double rho,
const reaction::ReactionSet &activeReactions
) const {
PrecomputationKernelResults results;
results.dydt_vector.resize(m_networkSpecies.size(), 0.0);
std::vector<double> molarReactionFlows;
molarReactionFlows.reserve(m_precomputedReactions.size());
size_t reactionCounter = 0;
for (const auto& reaction : activeReactions) {
uint64_t reactionHash = utils::hash_reaction(*reaction);
const size_t reactionIndex = m_precomputedReactionIndexMap.at(reactionHash);
const PrecomputedReaction& precomputedReaction = m_precomputedReactions[reactionIndex];
double netFlow = compute_reaction_flow(
local_abundances,
screening_factors,
bare_rates,
bare_reverse_rates,
rho,
reactionCounter,
*reaction,
reactionIndex,
precomputedReaction);
molarReactionFlows.push_back(netFlow);
auto [local_neutrino_loss, local_neutrino_flux] = compute_neutrino_fluxes(netFlow, *reaction);
results.total_neutrino_energy_loss_rate += local_neutrino_loss;
results.total_neutrino_flux += local_neutrino_flux;
reactionCounter++;
}
LOG_TRACE_L3(m_logger, "Computed {} molar reaction flows for active reactions. Assembling these into RHS", molarReactionFlows.size());
reactionCounter = 0;
for (const auto& reaction: activeReactions) {
const size_t j = m_precomputedReactionIndexMap.at(utils::hash_reaction(*reaction));
const auto& precomp = m_precomputedReactions[j];
const double R_j = molarReactionFlows[reactionCounter];
for (size_t i = 0; i < precomp.affected_species_indices.size(); ++i) {
const size_t speciesIndex = precomp.affected_species_indices[i];
const int stoichiometricCoefficient = precomp.stoichiometric_coefficients[i];
const double dydt_increment = static_cast<double>(stoichiometricCoefficient) * R_j;
results.dydt_vector[speciesIndex] += dydt_increment;
}
reactionCounter++;
}
return results;
}
double GraphEngine::calculateReverseRate(
const reaction::Reaction &reaction,
const double T9,
@@ -681,137 +846,31 @@ namespace gridfire::engine {
StepDerivatives<double> result;
std::vector<double> dydt_scratch(m_networkSpecies.size(), 0.0);
// --- Optimized loop ---
std::vector<double> molarReactionFlows;
molarReactionFlows.reserve(m_precomputedReactions.size());
size_t reactionCounter = 0;
for (const auto& reaction : activeReactions) {
// --- Efficient lookup of only the active reactions ---
uint64_t reactionHash = utils::hash_reaction(*reaction);
const size_t reactionIndex = m_precomputedReactionIndexMap.at(reactionHash);
const PrecomputedReaction& precomputedReaction = m_precomputedReactions[reactionIndex];
// --- Forward abundance product ---
double forwardAbundanceProduct = 1.0;
for (size_t i = 0; i < precomputedReaction.unique_reactant_indices.size(); ++i) {
const size_t reactantIndex = precomputedReaction.unique_reactant_indices[i];
const fourdst::atomic::Species& reactant = m_networkSpecies[reactantIndex];
const int power = precomputedReaction.reactant_powers[i];
const double abundance = m_local_abundance_cache[reactantIndex];
double factor;
if (power == 1) { factor = abundance; }
else if (power == 2) { factor = abundance * abundance; }
else { factor = std::pow(abundance, power); }
if (!std::isfinite(factor)) {
LOG_CRITICAL(m_logger, "Non-finite factor encountered in forward abundance product for reaction '{}'. Check input abundances for validity.", reaction->id());
throw exceptions::BadRHSEngineError("Non-finite factor encountered in forward abundance product.");
}
forwardAbundanceProduct *= factor;
}
const double bare_rate = bare_rates.at(reactionCounter);
const double screeningFactor = screeningFactors[reactionCounter];
const size_t numReactants = m_reactions[reactionIndex].reactants().size();
const size_t numProducts = m_reactions[reactionIndex].products().size();
// --- Forward reaction flow ---
const double forwardMolarReactionFlow =
screeningFactor *
bare_rate *
precomputedReaction.symmetry_factor *
forwardAbundanceProduct *
std::pow(rho, numReactants > 1 ? static_cast<double>(numReactants) - 1 : 0.0);
if (!std::isfinite(forwardMolarReactionFlow)) {
LOG_CRITICAL(m_logger, "Non-finite forward molar reaction flow computed for reaction '{}'. Check input abundances and rates for validity.", reaction->id());
throw exceptions::BadRHSEngineError("Non-finite forward molar reaction flow computed.");
}
// --- Reverse reaction flow ---
// Only do this is the reaction has a non-zero reverse symmetry factor (i.e. is reversible)
double reverseMolarReactionFlow = 0.0;
if (precomputedReaction.reverse_symmetry_factor != 0.0 and m_useReverseReactions) {
const double bare_reverse_rate = bare_reverse_rates.at(reactionCounter);
double reverseAbundanceProduct = 1.0;
for (size_t i = 0; i < precomputedReaction.unique_product_indices.size(); ++i) {
const size_t productIndex = precomputedReaction.unique_product_indices[i];
const fourdst::atomic::Species& product = m_networkSpecies[productIndex];
reverseAbundanceProduct *= std::pow(comp.getMolarAbundance(product), precomputedReaction.product_powers[i]);
}
reverseMolarReactionFlow = screeningFactor *
bare_reverse_rate *
precomputedReaction.reverse_symmetry_factor *
reverseAbundanceProduct *
std::pow(rho, numProducts > 1 ? static_cast<double>(numProducts) - 1 : 0.0);
}
molarReactionFlows.push_back(forwardMolarReactionFlow - reverseMolarReactionFlow);
if (reaction->type() == reaction::ReactionType::REACLIB_WEAK) {
double q_abs = std::abs(reaction->qValue());
REACLIB_WEAK_TYPES weakType = get_weak_reaclib_reaction_type(*reaction);
double neutrino_loss_fraction = 0.0;
switch (weakType) {
case REACLIB_WEAK_TYPES::BETA_PLUS_DECAY:
[[fallthrough]];
case REACLIB_WEAK_TYPES::BETA_MINUS_DECAY:
neutrino_loss_fraction = 0.5; // Approximate 50% energy loss to neutrinos for beta decays
break;
case REACLIB_WEAK_TYPES::ELECTRON_CAPTURE:
[[fallthrough]];
case REACLIB_WEAK_TYPES::POSITRON_CAPTURE:
neutrino_loss_fraction = 1.0;
break;
default: ;
}
const double local_neutrino_loss = molarReactionFlows.back() * q_abs * neutrino_loss_fraction * m_constants.Na * m_constants.MeV_to_erg;
const double local_neutrino_flux = molarReactionFlows.back() * m_constants.Na;
result.totalNeutrinoFlux += local_neutrino_flux;
result.neutrinoEnergyLossRate += local_neutrino_loss;
}
reactionCounter++;
}
LOG_TRACE_L3(m_logger, "Computed {} molar reaction flows for active reactions. Assembling these into RHS", molarReactionFlows.size());
// --- Assemble molar abundance derivatives ---
for (const auto& species: m_networkSpecies) {
result.dydt[species] = 0.0; // Initialize the change in abundance for each network species to 0
}
reactionCounter = 0;
for (const auto& reaction: activeReactions) {
const size_t j = m_precomputedReactionIndexMap.at(utils::hash_reaction(*reaction));
const auto& precomp = m_precomputedReactions[j];
const double R_j = molarReactionFlows[reactionCounter];
for (size_t i = 0; i < precomp.affected_species_indices.size(); ++i) {
const size_t speciesIndex = precomp.affected_species_indices[i];
const fourdst::atomic::Species& species = m_networkSpecies[speciesIndex];
const int stoichiometricCoefficient = precomp.stoichiometric_coefficients[i];
// Update the derivative for this species
const double dydt_increment = static_cast<double>(stoichiometricCoefficient) * R_j;
// result.dydt.at(species) += dydt_increment;
dydt_scratch[speciesIndex] += dydt_increment;
if (m_store_intermediate_reaction_contributions) {
result.reactionContributions.value()[species][std::string(reaction->id())] = dydt_increment;
}
}
reactionCounter++;
}
#ifndef GRIDFIRE_USE_OPENMP
const auto [dydt_vector, total_neutrino_energy_loss_rate, total_neutrino_flux] = accumulate_flows_serial(
m_local_abundance_cache,
screeningFactors,
bare_rates,
bare_reverse_rates,
rho,
activeReactions
);
dydt_scratch = dydt_vector;
result.neutrinoEnergyLossRate = total_neutrino_energy_loss_rate;
result.totalNeutrinoFlux = total_neutrino_flux;
#else
const auto [dydt_vector, total_neutrino_energy_loss_rate, total_neutrino_flux] = accumulate_flows_parallel(
m_local_abundance_cache,
screeningFactors,
bare_rates,
bare_reverse_rates,
rho,
activeReactions
);
dydt_scratch = dydt_vector;
result.neutrinoEnergyLossRate = total_neutrino_energy_loss_rate;
result.totalNeutrinoFlux = total_neutrino_flux;
#endif
// load scratch into result.dydt
for (size_t i = 0; i < m_networkSpecies.size(); ++i) {
@@ -1520,4 +1579,69 @@ namespace gridfire::engine {
return true;
}
#ifdef GRIDFIRE_USE_OPENMP
GraphEngine::PrecomputationKernelResults GraphEngine::accumulate_flows_parallel(
const std::vector<double> &local_abundances,
const std::vector<double> &screening_factors,
const std::vector<double> &bare_rates,
const std::vector<double> &bare_reverse_rates,
const double rho,
const reaction::ReactionSet &activeReactions
) const {
int n_threads = omp_get_max_threads();
std::vector<std::vector<double>> thread_local_dydt(n_threads, std::vector<double>(m_networkSpecies.size(), 0.0));
double total_neutrino_energy_loss_rate = 0.0;
double total_neutrino_flux = 0.0;
#pragma omp parallel for schedule(static) reduction(+:total_neutrino_energy_loss_rate, total_neutrino_flux)
for (size_t k = 0; k < activeReactions.size(); ++k) {
int t_id = omp_get_thread_num();
const auto& reaction = activeReactions[k];
const size_t reactionIndex = m_precomputedReactionIndexMap.at(utils::hash_reaction(reaction));
const PrecomputedReaction& precomputedReaction = m_precomputedReactions[reactionIndex];
double netFlow = compute_reaction_flow(
local_abundances,
screening_factors,
bare_rates,
bare_reverse_rates,
rho,
reactionIndex,
reaction,
reactionIndex,
precomputedReaction
);
auto [neutrinoEnergyLossRate, neutrinoFlux] = compute_neutrino_fluxes(
netFlow,
reaction
);
total_neutrino_energy_loss_rate += neutrinoEnergyLossRate;
total_neutrino_flux += neutrinoFlux;
for (size_t i = 0; i < precomputedReaction.affected_species_indices.size(); ++i) {
thread_local_dydt[t_id][precomputedReaction.affected_species_indices[i]] +=
netFlow * precomputedReaction.stoichiometric_coefficients[i];
}
}
PrecomputationKernelResults results;
results.total_neutrino_energy_loss_rate = total_neutrino_energy_loss_rate;
results.total_neutrino_flux = total_neutrino_flux;
results.dydt_vector.resize(m_networkSpecies.size(), 0.0);
#pragma omp parallel for schedule(static)
for (size_t i = 0; i < m_networkSpecies.size(); ++i) {
double sum = 0.0;
for (int t = 0; t < n_threads; ++t) sum += thread_local_dydt[t][i];
results.dydt_vector[i] = sum;
}
return results;
}
#endif
}

View File

@@ -460,6 +460,10 @@ namespace gridfire::reaction {
return std::make_optional(m_reactions[m_reactionNameMap.at(std::string(id))]->clone());
}
std::unique_ptr<Reaction> ReactionSet::get(size_t index) const {
return m_reactions.at(index)->clone();
}
void ReactionSet::remove_reaction(const Reaction& reaction) {
const size_t rh = reaction.hash(0);
if (!m_reactionHashes.contains(rh)) {

View File

@@ -46,6 +46,11 @@ if get_option('plugin_support')
gridfire_build_dependencies += [plugin_dep]
endif
if get_option('openmp_support')
openmp_dep = dependency('openmp', required: true)
gridfire_build_dependencies += [openmp_dep]
endif
# Define the libnetwork library so it can be linked against by other parts of the build system
libgridfire = library('gridfire',
gridfire_sources,