feat(opatIO): fully updated for index vector

PreviouslyOPAT files were indexed using X and Z now they are indexed with a general index vector.

BREAKING CHANGE: all methods wch used X and Z now use std::vector<double> index (size: header.numIndex) instead. Also added a method to validate tables using checksum
This commit is contained in:
2025-02-17 13:01:34 -05:00
parent 5d51f5b5e0
commit df6335d25f
2 changed files with 171 additions and 135 deletions

View File

@@ -10,6 +10,7 @@
#include <cmath>
#include <limits>
#include <deque>
#include "picosha2.h"
// Function to check system endianness
bool is_big_endian() {
@@ -75,13 +76,14 @@ void OpatIO::unload() {
void OpatIO::readHeader(std::ifstream &file) {
file.read(reinterpret_cast<char*>(&header), sizeof(Header));
if (file.gcount() != sizeof(Header)) {
throw std::runtime_error("Error reading header from file: " + filename);
throw std::runtime_error("Error reading header from file");
}
if (is_big_endian()) {
header.version = swap_bytes(header.version);
header.numTables = swap_bytes(header.numTables);
header.indexOffset = swap_bytes(header.indexOffset);
header.numIndex = swap_bytes(header.numIndex);
}
}
@@ -89,75 +91,86 @@ void OpatIO::readHeader(std::ifstream &file) {
void OpatIO::readTableIndex(std::ifstream &file) {
file.seekg(header.indexOffset, std::ios::beg);
tableIndex.resize(header.numTables);
file.read(reinterpret_cast<char*>(tableIndex.data()), header.numTables * sizeof(TableIndex));
if (file.gcount() != static_cast<std::streamsize>(header.numTables * sizeof(TableIndex))) {
throw std::runtime_error("Error reading table index from file: " + filename);
long unsigned int indexReadBytes;
for (uint32_t i = 0; i < header.numTables; i++) {
indexReadBytes = 0;
// Read the index vector in based on numIndex
tableIndex.at(i).index.resize(header.numIndex);
file.read(reinterpret_cast<char*>(tableIndex.at(i).index.data()), header.numIndex * sizeof(double));
indexReadBytes += static_cast<int>(file.gcount());
// Read the start and end position of the table in
file.read(reinterpret_cast<char*>(&tableIndex.at(i).byteStart), sizeof(uint64_t));
indexReadBytes += static_cast<int>(file.gcount());
file.read(reinterpret_cast<char*>(&tableIndex.at(i).byteEnd), sizeof(uint64_t));
indexReadBytes += static_cast<int>(file.gcount());
// Read the checksum in
file.read(tableIndex.at(i).sha256, 32);
indexReadBytes += static_cast<int>(file.gcount());
// validate that the size of read data is correct
if (indexReadBytes != header.numIndex * sizeof(double) + 32 + 2 * sizeof(uint64_t)) {
throw std::runtime_error("Error reading table index from file");
}
}
buildTableIDToComposition();
buildTableIDToIndex();
}
void OpatIO::buildTableIDToComposition(){
tableIDToComposition.clear();
void OpatIO::buildTableIDToIndex(){
tableIDToIndex.clear();
int tableID = 0;
std::pair<double, double> comp;
for (const auto &index : tableIndex) {
comp.first = index.X;
comp.second = index.Z;
tableIDToComposition.emplace(tableID, comp);
std::vector<double> ind;
ind.resize(header.numIndex);
for (const auto &table : tableIndex) {
ind.clear();
for (const auto &index : table.index) {
ind.push_back(index);
}
tableIDToIndex.emplace(tableID, ind);
tableID++;
}
XZLookupEpsilon();
LookupEpsilon();
}
void OpatIO::XZLookupEpsilon() {
void OpatIO::LookupEpsilon() {
/*
Get 10% of the minimum spacing between XZ values
in the tableIDToComposition map. This can be used
Get 10% of the minimum spacing between index values
in the tableIDToIndex map. This can be used
to set the comparison distance when doing a reverse
lookup (composition -> tableID)
lookup (index -> tableID)
*/
std::vector<double> Xvalues, Zvalues;
double epsilonX, epsilonZ, xgap, zgap;
indexEpsilon.resize(header.numIndex);
// Start these out as larger than they will ever be
epsilonX = 1;
epsilonZ = 1;
for (const auto& pair : tableIDToComposition) {
Xvalues.push_back(pair.second.first);
Zvalues.push_back(pair.second.second);
}
// Sorting is required for this algorithm.
std::sort(Xvalues.begin(), Xvalues.end());
std::sort(Zvalues.begin(), Zvalues.end());
for (size_t i = 1; i < Xvalues.size(); ++i) {
xgap = Xvalues[i] - Xvalues[i - 1];
zgap = Zvalues[i] - Zvalues[i - 1];
if (xgap > 0 && xgap < epsilonX) {
epsilonX = xgap;
}
if (zgap > 0 && zgap < epsilonZ) {
epsilonZ = zgap;
double epsilon;
for (int i = 0; i < static_cast<int>(header.numIndex); i++) {
epsilon = std::numeric_limits<double>::max();
for (int j = 1; j < static_cast<int>(header.numTables); j++) {
epsilon = std::min(epsilon, std::fabs(tableIDToIndex.at(j).at(i) - tableIDToIndex.at(j-1).at(i)));
}
indexEpsilon.at(i) = epsilon * 0.1;
}
// 0.1 to extract 10% of min distance.
XZepsilon = {0.1*epsilonX, 0.1*epsilonZ};
}
int OpatIO::lookupTableID(double X, double Z){
bool XOkay;
bool ZOkay;
int OpatIO::lookupTableID(std::vector<double> index) {
std::vector<bool> IndexOkay;
IndexOkay.resize(header.numIndex);
int tableID = 0;
for (const auto &tableMap : tableIDToComposition){
XOkay = std::fabs(tableMap.second.first - X) < XZepsilon.first;
ZOkay = std::fabs(tableMap.second.second - Z) < XZepsilon.second;
if (XOkay and ZOkay){
for (const auto &tableMap : tableIDToIndex){
// Loop through all index values and check if they are within epsilon for that index
std::fill(IndexOkay.begin(), IndexOkay.end(), false);
for (long unsigned int i = 0; i < index.size(); i++) {
IndexOkay.at(i) = std::fabs(tableMap.second.at(i) - index.at(i)) < indexEpsilon.at(i);
}
// If all index values are within epsilon, return the table ID
if (std::all_of(IndexOkay.begin(), IndexOkay.end(), [](bool i){return i;})) {
return tableID;
}
tableID++;
}
// If no table is found, return -1 (sentinal value)
return -1;
}
@@ -201,10 +214,10 @@ uint16_t OpatIO::getOPATVersion() {
}
// Get a table for given X and Z
OPATTable OpatIO::getTable(double X, double Z) {
int tableID = lookupTableID(X, Z);
OPATTable OpatIO::getTable(std::vector<double> index) {
int tableID = lookupTableID(index);
if (tableID == -1) {
throw std::out_of_range("X Z Pair Not found!");
throw std::out_of_range("Index Not found!");
}
try {
return getTableFromQueue(tableID);
@@ -288,6 +301,7 @@ void OpatIO::printHeader() {
std::cout << "Creation Date: " << header.creationDate << std::endl;
std::cout << "Source Info: " << header.sourceInfo << std::endl;
std::cout << "Comment: " << header.comment << std::endl;
std::cout << "Number of Indices: " << header.numIndex << std::endl;
}
// Print the table index
@@ -298,9 +312,11 @@ void OpatIO::printTableIndex() {
}
// Print table header
std::cout << std::left << std::setw(10) << "X"
<< std::setw(10) << "Z"
<< std::setw(15) << "Byte Start"
std::cout << std::left << std::setw(10);
for (int i = 0; i < header.numIndex; i++) {
std::cout << "Index " << i << std::setw(10);
}
std::cout << std::setw(15) << "Byte Start"
<< std::setw(15) << "Byte End"
<< "Checksum (SHA-256)" << std::endl;
@@ -308,10 +324,11 @@ void OpatIO::printTableIndex() {
// Print each entry in the table
for (const auto &index : tableIndex) {
std::cout << std::fixed << std::setprecision(4)
<< std::setw(10) << index.X
<< std::setw(10) << index.Z
<< std::setw(15) << index.byteStart
std::cout << std::fixed << std::setprecision(4) << std::setw(10);
for (int i = 0; i < header.numIndex; i++) {
std::cout << index.index[i] << std::setw(10);
}
std::cout << std::setw(5) << index.byteStart
<< std::setw(15) << index.byteEnd
<< std::hex; // Switch to hex mode for checksum
@@ -422,8 +439,8 @@ void OpatIO::printTable(OPATTable table, uint32_t truncateDigits) {
std::cout << "]" << std::endl;
}
void OpatIO::printTable(double X, double Z, uint32_t truncateDigits) {
int tableID = lookupTableID(X, Z);
void OpatIO::printTable(std::vector<double> index, uint32_t truncateDigits) {
int tableID = lookupTableID(index);
OPATTable table = getTable(tableID);
printTable(table, truncateDigits);
}
@@ -438,23 +455,45 @@ Header OpatIO::getHeader() {
return header;
}
// // Get the closest X tables
// std::vector<OPATTable> OpatIO::getClosestXTables(double X, double ZExact, int numTables) {
// std::vector<OPATTable> closestTables;
// // Implement logic to find closest X tables
// return closestTables;
// }
// Get the size of the index vector used
uint16_t OpatIO::getNumIndex() {
return header.numIndex;
}
// // Get the closest Z tables
// std::vector<OPATTable> OpatIO::getClosestZTables(double XExact, double Z, int numTables) {
// std::vector<OPATTable> closestTables;
// // Implement logic to find closest Z tables
// return closestTables;
// }
TableIndex OpatIO::getTableIndex(std::vector<double> index) {
int tableID = lookupTableID(index);
return tableIndex.at(tableID);
}
// // Get the closest tables
// std::vector<OPATTable> OpatIO::getClosestTables(double X, double Z, int numTables) {
// std::vector<OPATTable> closestTables;
// // Implement logic to find closest tables
// return closestTables;
// }
std::vector<unsigned char> OpatIO::computeChecksum(int tableID) {
OPATTable table = getTable(tableID);
std::vector<std::vector<double>> logKappa = table.logKappa;
std::vector<double> flatData(logKappa.size() * logKappa.size());
size_t offset = 0;
for (const auto& row : logKappa) {
for (const auto& val : row) {
flatData[offset++] = val;
}
}
std::vector<unsigned char> flatDataBytes(flatData.size() * sizeof(double));
std::memcpy(flatDataBytes.data(), flatData.data(), flatDataBytes.size());
std::vector<unsigned char> hash(picosha2::k_digest_size);
picosha2::hash256(flatDataBytes.begin(), flatDataBytes.end(), hash.begin(), hash.end());
return hash;
}
std::vector<unsigned char> OpatIO::computeChecksum(std::vector<double> index) {
int tableID = lookupTableID(index);
return computeChecksum(tableID);
}
bool OpatIO::validateAll() {
for (const auto &table : tableIDToIndex) {
std::vector<unsigned char> hash = computeChecksum(table.first);
std::vector<unsigned char> storedHash(tableIndex.at(table.first).sha256, tableIndex.at(table.first).sha256 + 32);
if (hash != storedHash) {
return false;
}
}
return true;
}

View File

@@ -22,7 +22,8 @@ struct Header {
char creationDate[16]; ///< Creation date of the file
char sourceInfo[64]; ///< Source information
char comment[128]; ///< Comment section
char reserved[26]; ///< Reserved for future use
uint16_t numIndex; ///< Size of index vector per table
char reserved[24]; ///< Reserved for future use
};
#pragma pack()
@@ -30,8 +31,7 @@ struct Header {
* @brief Structure to hold the index information of a table in an OPAT file.
*/
struct TableIndex {
double X; ///< X composition value
double Z; ///< Z composition value
std::vector<double> index; ///< Index vector for associated table
uint64_t byteStart; ///< Byte start position of the table
uint64_t byteEnd; ///< Byte end position of the table
char sha256[32]; ///< SHA-256 hash of the table data
@@ -58,9 +58,9 @@ private:
Header header; ///< Header information of the OPAT file
std::vector<TableIndex> tableIndex; ///< Index information of the tables
std::deque<std::pair<int, OPATTable>> tableQueue; ///< Queue to manage table caching
std::map<int, std::pair<double, double>> tableIDToComposition; ///< Map to store table ID to composition mapping
std::pair<double, double> XZepsilon; ///< Epsilon values for X and Z
int maxQDepth = 10; ///< Maximum depth of the table queue
std::map<int, std::vector<double>> tableIDToIndex; ///< Map to store table ID to indexing
std::vector<double> indexEpsilon; ///< Epsilon values for each index
int maxQDepth = 20; ///< Maximum depth of the table queue
std::string filename; ///< Filename of the OPAT file
bool loaded = false; ///< Flag to indicate if the file is loaded
@@ -115,14 +115,14 @@ private:
void printTable(OPATTable table, uint32_t truncateDigits=5);
/**
* @brief Lookup epsilon values for X and Z.
* @brief Lookup epsilon values for Index.
*/
void XZLookupEpsilon();
void LookupEpsilon();
/**
* @brief Build the table ID to composition mapping.
* @brief Build the table ID to Index mapping.
*/
void buildTableIDToComposition();
void buildTableIDToIndex();
public:
/**
@@ -142,12 +142,12 @@ public:
~OpatIO();
/**
* @brief Get a table by X and Z values.
* @param X The X composition value.
* @param Z The Z composition value.
* @brief Get a table by index vector
* @param index The index vector associated with the table to retrieve.
* @throw std::out_of_range if the index is not found.
* @return The OPAT table.
*/
OPATTable getTable(double X, double Z);
OPATTable getTable(std::vector<double> index);
/**
* @brief Set the maximum depth of the table queue.
@@ -195,11 +195,10 @@ public:
/**
* @brief Print a table by X and Z values.
* @param X The X composition value.
* @param Z The Z composition value.
* @param index The index vector associated with the table to print.
* @param truncateDigits Number of digits to truncate.
*/
void printTable(double X, double Z, uint32_t truncateDigits=5);
void printTable(std::vector<double> index, uint32_t truncateDigits=5);
/**
* @brief Get the table index.
@@ -213,60 +212,58 @@ public:
*/
Header getHeader();
/**
* @brief Get the closest tables by X value.
* @param X The X composition value.
* @param ZExact The exact Z composition value.
* @param C The C composition value (default is 0).
* @param O The O composition value (default is 0).
* @param numTables The number of closest tables to retrieve (default is 1).
* @return A vector of OPAT tables.
*/
std::vector<OPATTable> getClosestXTables(double X, double ZExact, double C=0, double O=0, int numTables=1);
/**
* @brief Get the closest tables by Z value.
* @param XExact The exact X composition value.
* @param Z The Z composition value.
* @param C The C composition value (default is 0).
* @param O The O composition value (default is 0).
* @param numTables The number of closest tables to retrieve (default is 1).
* @return A vector of OPAT tables.
*/
std::vector<OPATTable> getClosestZTables(double XExact, double Z, double C=0, double O=0, int numTables=1);
/**
* @brief Get the closest tables by X and Z values.
* @param X The X composition value.
* @param Z The Z composition value.
* @param C The C composition value (default is 0).
* @param O The O composition value (default is 0).
* @param numTables The number of closest tables to retrieve (default is 1).
* @return A vector of OPAT tables.
*/
std::vector<OPATTable> getClosestTables(double X, double Z, double C=0, double O=0, int numTables=1);
/**
* @brief Lookup the table ID by X and Z values.
* @param X The X composition value.
* @param Z The Z composition value.
* @return The table ID.
* @param index The index vector associated with the table to lookup.
* @return The table ID if index is found, otherwise -1.
*/
int lookupTableID(double X, double Z);
int lookupTableID(std::vector<double> index);
/**
* @brief Lookup the closest table ID by X and Z values.
* @param X The X composition value.
* @param Z The Z composition value.
* @param index The index vector associated with the table to lookup.
* @return The closest table ID.
*/
int lookupClosestTableID(double X, double Z);
int lookupClosestTableID(std::vector<double> index);
/**
* @brief Get the version of the OPAT file format.
* @return The version of the OPAT file format.
*/
uint16_t getOPATVersion();
/**
* @brief Get size of the index vector per table in the OPAT file.
* @return The size of the index vector per table.
*/
uint16_t getNumIndex();
/**
* @brief Get the index vector for a given table ID.
* @param index The index vector associated with the table to retrieve.
* @return The full TableIndex entry for the table
*/
TableIndex getTableIndex(std::vector<double> index);
/**
* @brief Get the checksum (sha256) for a given table ID.
* @param tableID The ID of the table.
* @return The checksum vector for the table.
*/
std::vector<unsigned char> computeChecksum(int tableID);
/**
* @brief Get the checksum (sha256) for a given index vector.
* @param index The index vector associated with the table to retrieve.
* @return The checksum vector for the table.
*/
std::vector<unsigned char> computeChecksum(std::vector<double> index);
/**
* @brief Validate the checksum of all tables.
* @return True if all checksum are valid, false otherwise.
*/
bool validateAll();
};
#endif