feat(reaclib): reworked how I store reactions to avoid stack overflows

This commit is contained in:
2025-06-29 14:53:09 -04:00
parent 3c38187d3f
commit 2a410dc3fd
14 changed files with 182372 additions and 78781 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,68 @@
"""
REACLIB Reaction Formatting and Parsing Utilities
================================================
This module provides utilities for parsing, formatting, and analyzing nuclear reaction
data in the REACLIB format. It includes tools for extracting reaction information,
computing reaction rates, and exporting data for use in C++ or binary formats.
Algorithms and Quantum Number Accounting
----------------------------------------
The core of this module is the quantum number bookkeeping performed in
`determine_reaction_type`, which ensures conservation of baryon number (A),
charge (Z), and handles leptonic and photonic processes:
- **Baryon Number Conservation**: The difference in total mass number (A) between
reactants and products must be zero. If not, an assertion error is raised.
- **Charge Conservation**: The difference in total charge (Z) is checked:
- |ΔZ| = 1: Indicates a weak process (beta decay or electron/positron capture).
The code distinguishes between electron/positron as projectile or ejectile
based on the sign of ΔZ and the Q-value.
- ΔZ = 0: If the number of nuclei changes by ±1, photons are involved
(radiative capture or photodisintegration).
- **Projectiles and Ejectiles**: The heaviest reactant is considered the target,
and the heaviest product is the residual. Other nuclei are projectiles/ejectiles.
Special handling is provided for light nuclei (p, d, t, n, a).
Usage Examples
--------------
Parse a REACLIB file and export to CSV:
python format.py path/to/reaclib/file -o output.csv -f csv
Programmatic usage:
from utils.reaclib.format import parse_reaclib_entry, extract_groups
entry = '''1
h1 he4 c12 mg24 wkb 1.234e+00
1.234e+01 2.345e+02 3.456e+03 4.567e+04 5.678e+05 6.789e+06 7.890e+07
'''
match, reverse = parse_reaclib_entry(entry)
if match:
reaction = extract_groups(match, reverse)
print(reaction)
Functions
---------
- `parse_reaclib_entry(entry)`: Parse a REACLIB entry string.
- `extract_groups(match, reverse)`: Extracts a Reaction object from a regex match.
- `determine_reaction_type(reactants, products, qValue)`: Determines projectiles,
ejectiles, and reaction type using quantum number accounting.
- `evaluate_rate(coeffs, T9)`: Computes the reaction rate at temperature T9.
- `format_cpp_identifier(name)`: Formats a species name for C++ code.
- `create_reaction_dataframe(reactions)`: Converts a list of Reaction objects to a DataFrame.
- `write_reactions_binary(reactions, output_path)`: Writes reactions to a binary file.
Classes
-------
- `Reaction`: Dataclass representing a nuclear reaction.
- `ReaclibParseError`: Exception for parsing errors.
See function/class docstrings for further details.
"""
import re
import sys
from collections import defaultdict
@@ -10,8 +75,10 @@ from serif.constants import Constants
import hashlib
from collections import Counter
import math
import argparse
import pandas as pd
import struct
#import dataclasses
from dataclasses import dataclass
@dataclass
@@ -22,10 +89,10 @@ class Reaction:
chapter: int
qValue: float
coeffs: List[float]
projectile: str # added
ejectile: str # added
rpName: str # added
reactionType: str # added (e.g. "(p,γ)")
projectile: str
ejectile: str
rpName: str
reactionType: str
reverse: bool
def format_rp_name(self) -> str:
return self.rpName
@@ -36,6 +103,26 @@ class Reaction:
def evaluate_rate(coeffs: List[float], T9: float) -> float:
"""
Evaluate the REACLIB reaction rate at a given temperature.
Parameters
----------
coeffs : list of float
The 7 REACLIB coefficients (a0..a6) for the reaction.
T9 : float
Temperature in units of 10^9 K.
Returns
-------
float
The reaction rate at the specified temperature.
Notes
-----
The rate is computed as:
rate = exp(a0 + a1/T9 + a2/T9^{1/3} + a3*T9^{1/3} + a4*T9 + a5*T9^{5/3} + a6*ln(T9))
"""
rateExponent: float = coeffs[0] + \
coeffs[1] / T9 + \
coeffs[2] / (T9 ** (1/3)) + \
@@ -46,7 +133,18 @@ def evaluate_rate(coeffs: List[float], T9: float) -> float:
return np.exp(rateExponent)
class ReaclibParseError(Exception):
"""Custom exception for parsing errors."""
"""
Exception raised for errors encountered while parsing REACLIB entries.
Parameters
----------
message : str
Description of the error.
line_num : int, optional
Line number where the error occurred.
line_content : str, optional
Content of the problematic line.
"""
def __init__(self, message, line_num=None, line_content=None):
self.line_num = line_num
self.line_content = line_content
@@ -60,16 +158,50 @@ class ReaclibParseError(Exception):
def format_cpp_identifier(name: str) -> str:
name_map = {'p': 'H_1', 'd': 'H_2', 't': 'H_3', 'n': 'n_1', 'a': 'He_4'}
"""
Convert a REACLIB species name to a C++-friendly identifier.
Parameters
----------
name : str
The REACLIB species name (e.g., 'h1', 'he4', 'c12', 'p', 'a').
Returns
-------
str
The formatted C++ identifier (e.g., 'H-1', 'He-4', 'C-12', etc.).
"""
name_map = {'p': 'H-1', 'd': 'H-2', 't': 'H-3', 'n': 'n-1', 'a': 'He-4'}
if name.lower() in name_map:
return name_map[name.lower()]
match = re.match(r"([a-zA-Z]+)(\d+)", name)
if match:
element, mass = match.groups()
return f"{element.capitalize()}_{mass}"
return f"{name.capitalize()}_1"
return f"{element.capitalize()}-{mass}"
return f"{name.capitalize()}-1"
def parse_reaclib_entry(entry: str) -> tuple[Match[str] | None, bool]:
"""
Parse a single REACLIB entry string using a regular expression.
Parameters
----------
entry : str
The REACLIB entry as a string (typically 4 lines).
Returns
-------
match : re.Match or None
The regex match object if parsing was successful, else None.
reverse : bool
True if the entry is marked as a reverse reaction, else False.
Notes
-----
The function uses a regular expression to extract chapter, reactants/products,
label, Q-value, and coefficients. The 'reverse' flag is determined by the
character at a fixed position in the entry.
"""
pattern = re.compile(r"""^([1-9]|1[0-1])\r?\n
[ \t]*
((?:[A-Za-z0-9-*]+[ \t]+)*
@@ -93,6 +225,23 @@ def parse_reaclib_entry(entry: str) -> tuple[Match[str] | None, bool]:
def get_rp(group: str, chapter: int) -> Tuple[List[str], List[str]]:
"""
Split a REACLIB group string into reactants and products based on chapter.
Parameters
----------
group : str
The whitespace-separated list of species.
chapter : int
The REACLIB chapter number (determines reactant/product count).
Returns
-------
reactants : list of str
List of reactant species names.
products : list of str
List of product species names.
"""
rpGroupings = {
1: (1, 1), 2: (1, 2), 3: (1, 3), 4: (2, 1), 5: (2, 2),
6: (2, 3), 7: (2, 4), 8: (3, 1), 9: (3, 2), 10: (4, 2), 11: (1, 4)
@@ -104,6 +253,24 @@ def get_rp(group: str, chapter: int) -> Tuple[List[str], List[str]]:
return reactants, products
def translate_names_to_species(names: List[str]) -> List[Species]:
"""
Convert a list of REACLIB species names to Species objects.
Parameters
----------
names : list of str
List of REACLIB species names (e.g., 'h1', 'he4', 'c12', etc.).
Returns
-------
list of Species
List of Species objects corresponding to the names.
Raises
------
ReaclibParseError
If a species name cannot be found in the species database.
"""
sp = list()
split_alpha_digits = lambda inputString: re.match(r'([A-Za-z]+)[-+*]?(\d+)$', inputString).groups()
for name in names:
@@ -118,25 +285,44 @@ def translate_names_to_species(names: List[str]) -> List[Species]:
raise ReaclibParseError(f"Species '{name}' not found in species database.", line_content=name)
return sp
def determine_reaction_type(reactants: List[str],
products: List[str],
qValue: float
) -> Tuple[str, List[str], List[str], str]:
) -> Tuple[str, List[str], List[str], str, str, str]:
"""
Return (targetToken, projectiles, ejectiles, residualToken)
Analyze a reaction for quantum number conservation and classify projectiles/ejectiles.
targetToken the nucleus that appears before the parenthesis (A)
projectiles every explicit projectile that must be written inside ( )
ejectiles every explicit ejectile that must be written after the comma
residualToken the main heavy product that appears after the parenthesis (D)
Parameters
----------
reactants : list of str
List of reactant species names.
products : list of str
List of product species names.
qValue : float
Q-value of the reaction (MeV).
Photons and neutrinos are added / omitted exactly the way JINA REACLIB expects:
γ is explicit only when it is a **projectile** (photodisintegration)
ν/ν̄ are never explicit
Returns
-------
targetToken : str
Name of the heaviest reactant (target nucleus).
projectiles : list of str
List of projectile names (including leptons/photons if present).
ejectiles : list of str
List of ejectile names (including leptons/photons if present).
residualToken : str
Name of the heaviest product (residual nucleus).
reactionKey : str
Unique string key for the reaction.
rType : str
String representation of the reaction type.
Notes
-----
This function performs quantum number bookkeeping:
- Checks baryon and charge conservation.
- Identifies weak (leptonic) and photonic processes.
- Determines projectiles/ejectiles based on mass and reaction type.
"""
if abs(qValue - 4.621) < 1e-6:
print("Looking at he3(he3, 2p)he4")
# --- helper look-ups ----------------------------------------------------
@@ -197,10 +383,9 @@ def determine_reaction_type(reactants: List[str],
# 2. Photon bookkeeping (ΔZ = 0) ----------------------------------------
# -----------------------------------------------------------------------
if dZ == 0:
# Two → one nucleus and exothermic ⇒ radiative capture (γ ejectile, implicit)
# Two → one nucleus and exothermic ⇒ radiative capture (γ ejectile, (seems to normally be implicit, but I am writing it explicitly))
if dN == 1 and qValue > 0:
ejectiles.append("g")
pass # γ is implicit; nothing to write
# One → two nuclei and endothermic ⇒ photodisintegration (γ projectile, explicit)
elif dN == -1 and qValue < 0:
@@ -248,6 +433,21 @@ def determine_reaction_type(reactants: List[str],
def extract_groups(match: re.Match, reverse: bool) -> Reaction:
"""
Extract a Reaction object from a regex match of a REACLIB entry.
Parameters
----------
match : re.Match
The regex match object from `parse_reaclib_entry`.
reverse : bool
Whether the reaction is a reverse reaction.
Returns
-------
Reaction
The parsed Reaction dataclass instance.
"""
groups = match.groups()
chapter = int(groups[0].strip())
rawGroup = groups[1].strip()
@@ -274,6 +474,19 @@ def extract_groups(match: re.Match, reverse: bool) -> Reaction:
return reaction
def format_emplacment(reaction: Reaction) -> str:
"""
Format a Reaction object as a C++ emplacement statement.
Parameters
----------
reaction : Reaction
The Reaction object to format.
Returns
-------
str
The C++ code string for emplacing the reaction.
"""
reactantNames = [f'{format_cpp_identifier(r)}' for r in reaction.reactants]
productNames = [f'{format_cpp_identifier(p)}' for p in reaction.products]
@@ -282,7 +495,6 @@ def format_emplacment(reaction: Reaction) -> str:
label = f"{'_'.join(reactantNames)}_to_{'_'.join(productNames)}_{reaction.label.upper()}"
reactants_str = ', '.join(reactants_cpp)
products_str = ', '.join(products_cpp)
@@ -294,101 +506,27 @@ def format_emplacment(reaction: Reaction) -> str:
return emplacment
def generate_reaclib_header(reaclib_filepath: str, culling: float, T9: float, verbose: bool) -> tuple[
LiteralString, int | Any, int | Any]:
"""
Parses a JINA REACLIB file using regular expressions and generates a C++ header file string.
Args:
reaclib_filepath: The path to the REACLIB data file.
culling: The threshold for culling reactions based on their rates at T9.
T9: The temperature in billions of Kelvin to evaluate the reaction rates for culling.
verbose: If True, prints additional information about skipped reactions.
Returns:
A string containing the complete C++ header content.
"""
with open(reaclib_filepath, 'r') as file:
content = file.read()
fileHash = hashlib.sha256(content.encode('utf-8')).hexdigest()
# split the file into blocks of 4 lines each
lines = content.split('\n')
entries = ['\n'.join(lines[i:i+4]) for i in range(0, len(lines), 4) if len(lines[i:i+4]) == 4]
reactions = list()
for entry in entries:
m, r = parse_reaclib_entry(entry)
if m is not None:
try:
reac = extract_groups(m, r)
except ReaclibParseError as e:
continue
if verbose:
print(f"Parsed reaction: {reac.format_rp_name()} ({reac.coeffs}) with label {reac.label} (reverse: {reac.reverse})")
reactions.append(reac)
# --- Generate the C++ Header String ---
cpp_lines = [
"// This file is automatically generated. Do not edit!",
"// Generated on: " + str(np.datetime64('now')),
"// REACLIB file hash (sha256): " + fileHash,
"// Generated from REACLIB data file: " + reaclib_filepath,
"// Culling threshold: rate >" + str(culling) + " at T9 = " + str(T9),
"// Note that if the culling threshold is set to 0.0, no reactions are culled.",
"// Includes %%TOTAL%% reactions.",
"// Note: Only reactions with species defined in the atomicSpecies.h header will be included at compile time.",
"#pragma once",
"#include \"fourdst/composition/atomicSpecies.h\"",
"#include \"fourdst/composition/species.h\"",
"#include \"reaclib.h\"",
"\nnamespace gridfire::reaclib {\n",
"""
inline void initializeAllReaclibReactions() {
if (s_initialized) return; // already initialized
s_initialized = true;
s_all_reaclib_reactions.clear();
s_all_reaclib_reactions.reserve(%%TOTAL%%); // reserve space for total reactions
"""
]
totalSkipped = 0
totalIncluded = 0
energy = list()
energyFile = open('energy.txt', 'w')
energyFile.write("name;maxEnergy;QValue,reactants;products;a0;a1;a2;a3;a4;a5;a6\n")
for reaction in reactions:
maxEnergy = calculate_peak_importance(reaction)
energyFile.write(f"{reaction.format_rp_name()};{maxEnergy};{reaction.qValue};{' '.join(reaction.reactants)};{' '.join(reaction.products)};{';'.join([str(x) for x in reaction.coeffs])}\n")
energy.append(maxEnergy)
reactantNames = [f'{format_cpp_identifier(r)}' for r in reaction.reactants]
productNames = [f'{format_cpp_identifier(p)}' for p in reaction.products]
reactionName = f"{'_'.join(reactantNames)}_to_{'_'.join(productNames)}_{reaction.label.upper()}"
if culling > 0.0:
rate = evaluate_rate(reaction.coeffs, T9)
if rate < culling:
if verbose:
print(f"Skipping reaction {reaction.format_rp_name()} ({reactionName}) with rate {rate:.6e} at T9={T9} (culling threshold: {culling} at T9={T9})")
totalSkipped += 1
continue
else:
totalIncluded += 1
else:
totalIncluded += 1
defines = ' && '.join(set([f"defined(SERIF_SPECIES_{name.upper().replace('-', '_min_').replace('+', '_add_').replace('*', '_mult_')})" for name in reactantNames + productNames]))
cpp_lines.append(f" #if {defines}")
emplacment = format_emplacment(reaction)
cpp_lines.append(f" {emplacment}")
cpp_lines.append(f" #endif // {defines}")
cpp_lines.append("\n }\n} // namespace gridfire::reaclib\n")
energyFile.close()
#save energy data to a file
return "\n".join(cpp_lines), totalSkipped, totalIncluded
def calculate_peak_importance(reaction: Reaction) -> float:
TGrid = np.logspace(-3, 2, 100) # Temperature grid from 0.001 to 100 T9
RhoGrid = np.logspace(0.0, 6.0, 100) # Density grid from 1e0 to 1e3 g/cm^3
"""
Estimate the peak energy importance of a reaction over a grid of T and rho.
Parameters
----------
reaction : Reaction
The Reaction object to analyze.
Returns
-------
float
The maximum energy proxy (rate * |Q|) found over the grid.
Notes
-----
The function evaluates the reaction rate over a grid of temperature (T9)
and density (rho), multiplies by |Q|, and returns the maximum value.
"""
TGrid = np.logspace(-3, 2, 100)
RhoGrid = np.logspace(0.0, 6.0, 100)
N_A: float = Constants['N_a'].value
u: float = Constants['u'].value
max_energy_proxy: float = 0.0
@@ -419,12 +557,10 @@ def calculate_peak_importance(reaction: Reaction) -> float:
reactant = {'t': 'H-3', 'a': 'He-4', 'he4': 'He-4', 'd': 'H-2', 'n': 'n-1', 'p': 'H-1'}[reactant]
else:
reactant = '-'.join(split_alpha_digits(reactant)).capitalize()
# print(f"Parsing reactant {reactant} using split_alpha_digits")
reactantMassAMU = species[reactant].mass()
reactantMassG = reactantMassAMU * u
mass_term *= (Y_ideal/ reactantMassG)
except Exception as e:
# print(f"Error: Reactant {reactant} not found in species database. (what: {e})")
return 0.0
for T9 in TGrid:
@@ -438,30 +574,129 @@ def calculate_peak_importance(reaction: Reaction) -> float:
print(f"For reaction {reaction.format_rp_name()}, max energy proxy: {max_energy_proxy:.6e} MeV")
return max_energy_proxy
# def smart_cull(reactions: List[Reaction], verbose: bool = False):
def create_reaction_dataframe(reactions: List[Reaction]) -> pd.DataFrame:
"""
Convert a list of Reaction objects into a pandas DataFrame.
Parameters
----------
reactions : list of Reaction
List of Reaction objects.
Returns
-------
pd.DataFrame
DataFrame with columns for all reaction properties.
"""
reaction_data = []
for reaction in reactions:
record = {
'id': f"{'_'.join(reaction.reactants)}_to_{'_'.join(reaction.products)}_{reaction.label.upper()}",
'rpName': reaction.rpName,
'chapter': reaction.chapter,
'reactants': ' '.join(reaction.reactants),
'products': ' '.join(reaction.products),
'qValue': reaction.qValue,
'is_reverse': reaction.reverse,
'label': reaction.label,
'a0': reaction.coeffs[0],
'a1': reaction.coeffs[1],
'a2': reaction.coeffs[2],
'a3': reaction.coeffs[3],
'a4': reaction.coeffs[4],
'a5': reaction.coeffs[5],
'a6': reaction.coeffs[6]
}
reaction_data.append(record)
return pd.DataFrame(reaction_data)
def write_reactions_binary(reactions: List[Reaction], output_path: str):
"""
Write a list of Reaction objects to a binary file.
Parameters
----------
reactions : list of Reaction
List of Reaction objects to write.
output_path : str
Path to the output binary file.
Notes
-----
Each reaction is packed using struct with a fixed format for chapter, Q-value,
coefficients, reverse flag, label, rpName, reactants, and products.
"""
record_format = '<i d 7d ? 8s 64s 128s 128s'
with open(output_path, 'wb') as f:
for reaction in reactions:
label_bytes = reaction.label.encode('utf-8')[:7].ljust(8, b'\0')
rpName_bytes = reaction.rpName.encode('utf-8')[:63].ljust(64, b'\0')
reactants_str = ' '.join([format_cpp_identifier(x) for x in reaction.reactants])
products_str = ' '.join([format_cpp_identifier(x) for x in reaction.products])
reactants_bytes = reactants_str.encode('utf-8')[:127].ljust(128, b'\0')
products_bytes = products_str.encode('utf-8')[:127].ljust(128, b'\0')
packed_data = struct.pack(
record_format,
reaction.chapter,
reaction.qValue,
*reaction.coeffs,
reaction.reverse,
label_bytes,
rpName_bytes,
reactants_bytes,
products_bytes
)
f.write(packed_data)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description="Generate a C++ header from a REACLIB file.")
parser = argparse.ArgumentParser(description="Generate a CSV file from a REACLIB file.")
parser.add_argument("reaclib_file", type=str, help="Path to the REACLIB data file.")
parser.add_argument("-o", "--output", type=str, default=None, help="Output file path (default: stdout).")
parser.add_argument('-c', "--culling", type=float, default=0.0, help="Culling threshold for reaction rates at T9 (when 0.0, no culling is applied).")
parser.add_argument('-T', '--T9', type=float, default=0.01, help="Temperature in billions of Kelvin (default: 0.01) to evaluate the reaction rates for culling.")
parser.add_argument("-o", "--output", type=str, default="reactions.csv", help="Output CSV file path.")
parser.add_argument('-v', '--verbose', action='store_true', help="Enable verbose output.")
parser.add_argument('-f', '--format', type=str, choices=['csv', 'bin'], default='bin', help="Output format")
args = parser.parse_args()
try:
cpp_header_string, skipped, included = generate_reaclib_header(args.reaclib_file, args.culling, args.T9, args.verbose)
cpp_header_string = cpp_header_string.replace("%%TOTAL%%", str(included))
print("--- Generated C++ Header (Success!) ---")
if args.output:
with open(args.output, 'w') as f:
f.write(cpp_header_string)
print(f"Header written to {args.output}")
print(f"Total reactions included: {included}, Total reactions skipped: {skipped}")
with open(args.reaclib_file, 'r') as file:
content = file.read()
lines = content.split('\n')
entries = ['\n'.join(lines[i:i+4]) for i in range(0, len(lines), 4) if len(lines[i:i+4]) == 4 and lines[i].strip()]
parsed_reactions = []
for i, entry in enumerate(entries):
m, r = parse_reaclib_entry(entry)
if m is not None:
try:
reac = extract_groups(m, r)
parsed_reactions.append(reac)
except ReaclibParseError as e:
if args.verbose:
print(f"Skipping entry starting at line {i*4 + 1} due to parsing error: {e}", file=sys.stderr)
continue
print(f"Successfully parsed {len(parsed_reactions)} reactions from {args.reaclib_file}")
reaction_df = create_reaction_dataframe(parsed_reactions)
if args.format == 'csv':
reaction_df.to_csv(args.output, index=False)
print("--- CSV Generation (Success!) ---")
print(f"Reaction data written to {args.output}")
else:
print(cpp_header_string)
except ReaclibParseError as e:
print(f"\n--- PARSING FAILED ---")
write_reactions_binary(parsed_reactions, args.output)
print("--- Binary File Generation (Success!) ---")
print(f"Reaction data written to {args.output}")
except FileNotFoundError:
print(f"Error: Input file not found at {args.reaclib_file}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"\n--- AN UNEXPECTED ERROR OCCURRED ---")
print(e, file=sys.stderr)
sys.exit(1)

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -18,7 +18,7 @@ Assuming you download that file to your ~/Downloads directory, and it is called
`results123` then usage is as simple as
```bash
python generateEmbeddedReaclibHeader.py ~/Downloads/results123 -o reaclib.h -c 1e-8 -T 0.1
python format.py ~/Downloads/results123 -o reaclib.h
```
This will generate the `reaclib.h` header file in your current directory.

File diff suppressed because it is too large Load Diff