feat(composition-hash): added robust hashed for compositions

also added ability to use Compositions as a std::hash in unordered_* types. Further added a constructor to build a Composition from a const CompositionAbstract&
This commit is contained in:
2025-11-12 15:21:33 -05:00
parent 2492b5f652
commit 408dd71eee
13 changed files with 899 additions and 5 deletions

View File

@@ -274,6 +274,8 @@ namespace fourdst::composition {
*/
Composition(const Composition& composition);
explicit Composition(const CompositionAbstract& composition);
/**
* @brief Assignment operator.
* @param other The Composition to assign from.
@@ -820,4 +822,22 @@ namespace fourdst::composition {
}
};
inline bool operator==(const Composition& a, const Composition& b) noexcept {
if (a.size() != b.size()) return false;
// Compare species sets quickly
if (a.getRegisteredSpecies() != b.getRegisteredSpecies())
return false;
// Compare all abundances
for (auto itA = a.begin(), itB = b.begin();
itA != a.end() && itB != b.end(); ++itA, ++itB) {
if (itA->first != itB->first)
return false;
if (itA->second != itB->second)
return false;
}
return true;
}
}; // namespace fourdst::composition

View File

@@ -0,0 +1,157 @@
#pragma once
#include <cstring>
#include <cmath>
#include <vector>
#include <bit>
#include "xxhash64.h"
namespace fourdst::composition::utils {
struct CompositionHash {
static constexpr uint64_t kSeed = 0xC04D5EEDBEEFull;
static constexpr char kTag[] = "4DSTAR:Composition";
template <typename CompositionT>
static uint64_t hash_exact(const CompositionT& comp) {
std::vector<std::uint8_t> buf;
reserve_bytes(comp, buf);
write_header(comp, buf);
for (auto it = comp.begin(); it != comp.end(); ++it) {
const auto& species = it->first;
const double abundance = it->second;
const std::uint32_t spWord = pack_species(species);
push_le32(buf, spWord);
const std::uint64_t bits = normalize_double_bits(abundance);
push_le64(buf, bits);
}
return XXHash64::hash(buf.data(), buf.size(), kSeed);
}
static inline bool is_finite(double v) noexcept {
return std::isfinite(v);
}
static inline std::int64_t quantize_index(double v, double eps) noexcept {
const auto ld_v = static_cast<long double>(v);
const auto ld_eps = static_cast<long double>(eps);
const long double scaled = ld_v / ld_eps;
const long long idx = std::llroundl(scaled);
return static_cast<std::int64_t>(idx);
}
template <typename CompositionT>
static uint64_t hash_quantized(const CompositionT& comp, double eps) noexcept {
std::vector<std::uint8_t> buf;
reserve_bytes(comp, buf);
write_header(comp, buf);
push_bytes(buf, reinterpret_cast<const std::uint8_t*>("quantized"), 9);
push_le64(buf, encode_fp64(eps));
for (auto it = comp.begin(); it != comp.end(); ++it) {
const auto& species = it->first;
const double abundance = it->second;
const std::uint32_t spWord = pack_species(species);
push_le32(buf, spWord);
if (!is_finite(abundance) || eps <= 0.0) {
const std::uint64_t bits = normalize_double_bits(abundance);
push_le64(buf, bits);
} else {
const std::int64_t idx = quantize_index(abundance, eps);
push_le64(buf, static_cast<std::uint64_t>(idx));
}
}
return XXHash64::hash(buf.data(), buf.size(), kSeed ^ 0x7319'BEEF'1234ull);
}
private:
template <typename SpeciesT>
static std::uint32_t pack_species(const SpeciesT& s) noexcept {
// Adjust accessors if your Species API differs.
const auto z = static_cast<std::uint16_t>(s.z());
const auto a = static_cast<std::uint16_t>(s.a());
return (static_cast<std::uint32_t>(z) << 16) | static_cast<std::uint32_t>(a);
}
static inline std::uint64_t normalize_double_bits(double v) noexcept {
if (v == 0.0) v = 0.0; // fold -0.0 -> +0.0
if (std::isnan(v)) {
return 0x7ff8000000000000ULL; // canonical quiet NaN
}
return std::bit_cast<std::uint64_t>(v);
}
static inline double quantize(double v, double eps) noexcept {
if (!std::isfinite(v) || eps <= 0.0) return v;
const double q = std::nearbyint(v / eps) * eps;
return (q == 0.0) ? 0.0 : q;
}
static inline std::uint64_t encode_fp64(double v) noexcept {
return std::bit_cast<std::uint64_t>(v);
}
// ---------- byte helpers (explicit little-endian) ----------
static inline void push_le32(std::vector<std::uint8_t>& b, std::uint32_t x) {
b.push_back(static_cast<std::uint8_t>( x & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 8 ) & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 16) & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 24) & 0xFF));
}
static inline void push_le64(std::vector<std::uint8_t>& b, std::uint64_t x) noexcept {
b.push_back(static_cast<std::uint8_t>( x & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 8 ) & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 16) & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 24) & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 32) & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 40) & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 48) & 0xFF));
b.push_back(static_cast<std::uint8_t>((x >> 56) & 0xFF));
}
static inline void push_bytes(std::vector<std::uint8_t>& b, const std::uint8_t* p, std::size_t n) noexcept{
b.insert(b.end(), p, p + n);
}
template <typename CompositionT>
static void write_header(const CompositionT& comp, std::vector<std::uint8_t>& buf) noexcept {
push_bytes(buf, reinterpret_cast<const std::uint8_t*>(kTag), sizeof(kTag) - 1);
const std::size_t nRegistered = comp.getRegisteredSpecies().size();
std::size_t nMolar = 0;
for (auto it = comp.begin(); it != comp.end(); ++it) { ++nMolar; }
push_le64(buf, static_cast<std::uint64_t>(nRegistered));
push_le64(buf, static_cast<std::uint64_t>(nMolar));
}
template <typename CompositionT>
static void reserve_bytes(const CompositionT& comp, std::vector<std::uint8_t>& buf) noexcept {
std::size_t nMolar = 0;
for (auto it = comp.begin(); it != comp.end(); ++it) { ++nMolar; }
const std::size_t approx = (sizeof(kTag) - 1) + 16 + nMolar * (4 + 8 + 0 /*quantized flag optional*/);
buf.reserve(approx);
}
};
}
namespace std {
template<>
struct hash<fourdst::composition::Composition> {
std::size_t operator()(const fourdst::composition::Composition& c) const noexcept {
return static_cast<std::size_t>(
fourdst::composition::utils::CompositionHash::hash_exact(c)
);
}
};
}

View File

@@ -0,0 +1,5 @@
// Define the project name
#define PROJECT_NAME @PROJECT_NAME@
// Define the project version
#define PROJECT_VERSION @PROJECT_VERSION@

View File

@@ -165,6 +165,13 @@ namespace fourdst::composition {
m_molarAbundances = composition.m_molarAbundances;
}
Composition::Composition(const CompositionAbstract &composition) {
for (const auto& species : composition.getRegisteredSpecies()) {
registerSpecies(species);
setMolarAbundance(species, composition.getMolarAbundance(species));
}
}
Composition& Composition::operator=(
const Composition &other
) {

View File

@@ -30,7 +30,8 @@ dependencies = [
species_weight_dep,
const_dep,
config_dep,
log_dep
log_dep,
xxhash_dep
]
# Define the libcomposition library so it can be linked against by other parts of the build system
@@ -69,3 +70,15 @@ composition_exception_headers = files(
'include/fourdst/composition/exceptions/exceptions_composition.h',
)
install_headers(composition_exception_headers, subdir : 'fourdst/fourdst/composition/exceptions')
v = meson.project_version()
conf_data = configuration_data()
conf_data.set_quoted('PROJECT_VERSION', v)
conf_data.set_quoted('PROJECT_NAME', meson.project_name())
configure_file(
input : 'include/fourdst/config.h.in',
output : 'config.h',
configuration : conf_data
)