feat(opatIO): fully updated for index vector

PreviouslyOPAT files were indexed using X and Z now they are indexed with a general index vector.

BREAKING CHANGE: all methods wch used X and Z now use std::vector<double> index (size: header.numIndex) instead. Also added a method to validate tables using checksum
This commit is contained in:
2025-02-17 13:01:34 -05:00
parent 5d51f5b5e0
commit df6335d25f
2 changed files with 171 additions and 135 deletions

View File

@@ -10,6 +10,7 @@
#include <cmath> #include <cmath>
#include <limits> #include <limits>
#include <deque> #include <deque>
#include "picosha2.h"
// Function to check system endianness // Function to check system endianness
bool is_big_endian() { bool is_big_endian() {
@@ -75,13 +76,14 @@ void OpatIO::unload() {
void OpatIO::readHeader(std::ifstream &file) { void OpatIO::readHeader(std::ifstream &file) {
file.read(reinterpret_cast<char*>(&header), sizeof(Header)); file.read(reinterpret_cast<char*>(&header), sizeof(Header));
if (file.gcount() != sizeof(Header)) { if (file.gcount() != sizeof(Header)) {
throw std::runtime_error("Error reading header from file: " + filename); throw std::runtime_error("Error reading header from file");
} }
if (is_big_endian()) { if (is_big_endian()) {
header.version = swap_bytes(header.version); header.version = swap_bytes(header.version);
header.numTables = swap_bytes(header.numTables); header.numTables = swap_bytes(header.numTables);
header.indexOffset = swap_bytes(header.indexOffset); header.indexOffset = swap_bytes(header.indexOffset);
header.numIndex = swap_bytes(header.numIndex);
} }
} }
@@ -89,75 +91,86 @@ void OpatIO::readHeader(std::ifstream &file) {
void OpatIO::readTableIndex(std::ifstream &file) { void OpatIO::readTableIndex(std::ifstream &file) {
file.seekg(header.indexOffset, std::ios::beg); file.seekg(header.indexOffset, std::ios::beg);
tableIndex.resize(header.numTables); tableIndex.resize(header.numTables);
file.read(reinterpret_cast<char*>(tableIndex.data()), header.numTables * sizeof(TableIndex)); long unsigned int indexReadBytes;
if (file.gcount() != static_cast<std::streamsize>(header.numTables * sizeof(TableIndex))) { for (uint32_t i = 0; i < header.numTables; i++) {
throw std::runtime_error("Error reading table index from file: " + filename); indexReadBytes = 0;
// Read the index vector in based on numIndex
tableIndex.at(i).index.resize(header.numIndex);
file.read(reinterpret_cast<char*>(tableIndex.at(i).index.data()), header.numIndex * sizeof(double));
indexReadBytes += static_cast<int>(file.gcount());
// Read the start and end position of the table in
file.read(reinterpret_cast<char*>(&tableIndex.at(i).byteStart), sizeof(uint64_t));
indexReadBytes += static_cast<int>(file.gcount());
file.read(reinterpret_cast<char*>(&tableIndex.at(i).byteEnd), sizeof(uint64_t));
indexReadBytes += static_cast<int>(file.gcount());
// Read the checksum in
file.read(tableIndex.at(i).sha256, 32);
indexReadBytes += static_cast<int>(file.gcount());
// validate that the size of read data is correct
if (indexReadBytes != header.numIndex * sizeof(double) + 32 + 2 * sizeof(uint64_t)) {
throw std::runtime_error("Error reading table index from file");
}
} }
buildTableIDToComposition();
buildTableIDToIndex();
} }
void OpatIO::buildTableIDToComposition(){ void OpatIO::buildTableIDToIndex(){
tableIDToComposition.clear(); tableIDToIndex.clear();
int tableID = 0; int tableID = 0;
std::pair<double, double> comp; std::vector<double> ind;
for (const auto &index : tableIndex) { ind.resize(header.numIndex);
comp.first = index.X; for (const auto &table : tableIndex) {
comp.second = index.Z; ind.clear();
tableIDToComposition.emplace(tableID, comp); for (const auto &index : table.index) {
ind.push_back(index);
}
tableIDToIndex.emplace(tableID, ind);
tableID++; tableID++;
} }
XZLookupEpsilon(); LookupEpsilon();
} }
void OpatIO::XZLookupEpsilon() { void OpatIO::LookupEpsilon() {
/* /*
Get 10% of the minimum spacing between XZ values Get 10% of the minimum spacing between index values
in the tableIDToComposition map. This can be used in the tableIDToIndex map. This can be used
to set the comparison distance when doing a reverse to set the comparison distance when doing a reverse
lookup (composition -> tableID) lookup (index -> tableID)
*/ */
std::vector<double> Xvalues, Zvalues; indexEpsilon.resize(header.numIndex);
double epsilonX, epsilonZ, xgap, zgap;
// Start these out as larger than they will ever be double epsilon;
epsilonX = 1; for (int i = 0; i < static_cast<int>(header.numIndex); i++) {
epsilonZ = 1; epsilon = std::numeric_limits<double>::max();
for (const auto& pair : tableIDToComposition) { for (int j = 1; j < static_cast<int>(header.numTables); j++) {
Xvalues.push_back(pair.second.first); epsilon = std::min(epsilon, std::fabs(tableIDToIndex.at(j).at(i) - tableIDToIndex.at(j-1).at(i)));
Zvalues.push_back(pair.second.second);
}
// Sorting is required for this algorithm.
std::sort(Xvalues.begin(), Xvalues.end());
std::sort(Zvalues.begin(), Zvalues.end());
for (size_t i = 1; i < Xvalues.size(); ++i) {
xgap = Xvalues[i] - Xvalues[i - 1];
zgap = Zvalues[i] - Zvalues[i - 1];
if (xgap > 0 && xgap < epsilonX) {
epsilonX = xgap;
}
if (zgap > 0 && zgap < epsilonZ) {
epsilonZ = zgap;
} }
indexEpsilon.at(i) = epsilon * 0.1;
} }
// 0.1 to extract 10% of min distance.
XZepsilon = {0.1*epsilonX, 0.1*epsilonZ};
} }
int OpatIO::lookupTableID(double X, double Z){ int OpatIO::lookupTableID(std::vector<double> index) {
bool XOkay; std::vector<bool> IndexOkay;
bool ZOkay; IndexOkay.resize(header.numIndex);
int tableID = 0; int tableID = 0;
for (const auto &tableMap : tableIDToComposition){ for (const auto &tableMap : tableIDToIndex){
XOkay = std::fabs(tableMap.second.first - X) < XZepsilon.first; // Loop through all index values and check if they are within epsilon for that index
ZOkay = std::fabs(tableMap.second.second - Z) < XZepsilon.second; std::fill(IndexOkay.begin(), IndexOkay.end(), false);
if (XOkay and ZOkay){ for (long unsigned int i = 0; i < index.size(); i++) {
IndexOkay.at(i) = std::fabs(tableMap.second.at(i) - index.at(i)) < indexEpsilon.at(i);
}
// If all index values are within epsilon, return the table ID
if (std::all_of(IndexOkay.begin(), IndexOkay.end(), [](bool i){return i;})) {
return tableID; return tableID;
} }
tableID++; tableID++;
} }
// If no table is found, return -1 (sentinal value)
return -1; return -1;
} }
@@ -201,10 +214,10 @@ uint16_t OpatIO::getOPATVersion() {
} }
// Get a table for given X and Z // Get a table for given X and Z
OPATTable OpatIO::getTable(double X, double Z) { OPATTable OpatIO::getTable(std::vector<double> index) {
int tableID = lookupTableID(X, Z); int tableID = lookupTableID(index);
if (tableID == -1) { if (tableID == -1) {
throw std::out_of_range("X Z Pair Not found!"); throw std::out_of_range("Index Not found!");
} }
try { try {
return getTableFromQueue(tableID); return getTableFromQueue(tableID);
@@ -288,6 +301,7 @@ void OpatIO::printHeader() {
std::cout << "Creation Date: " << header.creationDate << std::endl; std::cout << "Creation Date: " << header.creationDate << std::endl;
std::cout << "Source Info: " << header.sourceInfo << std::endl; std::cout << "Source Info: " << header.sourceInfo << std::endl;
std::cout << "Comment: " << header.comment << std::endl; std::cout << "Comment: " << header.comment << std::endl;
std::cout << "Number of Indices: " << header.numIndex << std::endl;
} }
// Print the table index // Print the table index
@@ -298,9 +312,11 @@ void OpatIO::printTableIndex() {
} }
// Print table header // Print table header
std::cout << std::left << std::setw(10) << "X" std::cout << std::left << std::setw(10);
<< std::setw(10) << "Z" for (int i = 0; i < header.numIndex; i++) {
<< std::setw(15) << "Byte Start" std::cout << "Index " << i << std::setw(10);
}
std::cout << std::setw(15) << "Byte Start"
<< std::setw(15) << "Byte End" << std::setw(15) << "Byte End"
<< "Checksum (SHA-256)" << std::endl; << "Checksum (SHA-256)" << std::endl;
@@ -308,10 +324,11 @@ void OpatIO::printTableIndex() {
// Print each entry in the table // Print each entry in the table
for (const auto &index : tableIndex) { for (const auto &index : tableIndex) {
std::cout << std::fixed << std::setprecision(4) std::cout << std::fixed << std::setprecision(4) << std::setw(10);
<< std::setw(10) << index.X for (int i = 0; i < header.numIndex; i++) {
<< std::setw(10) << index.Z std::cout << index.index[i] << std::setw(10);
<< std::setw(15) << index.byteStart }
std::cout << std::setw(5) << index.byteStart
<< std::setw(15) << index.byteEnd << std::setw(15) << index.byteEnd
<< std::hex; // Switch to hex mode for checksum << std::hex; // Switch to hex mode for checksum
@@ -422,8 +439,8 @@ void OpatIO::printTable(OPATTable table, uint32_t truncateDigits) {
std::cout << "]" << std::endl; std::cout << "]" << std::endl;
} }
void OpatIO::printTable(double X, double Z, uint32_t truncateDigits) { void OpatIO::printTable(std::vector<double> index, uint32_t truncateDigits) {
int tableID = lookupTableID(X, Z); int tableID = lookupTableID(index);
OPATTable table = getTable(tableID); OPATTable table = getTable(tableID);
printTable(table, truncateDigits); printTable(table, truncateDigits);
} }
@@ -438,23 +455,45 @@ Header OpatIO::getHeader() {
return header; return header;
} }
// // Get the closest X tables // Get the size of the index vector used
// std::vector<OPATTable> OpatIO::getClosestXTables(double X, double ZExact, int numTables) { uint16_t OpatIO::getNumIndex() {
// std::vector<OPATTable> closestTables; return header.numIndex;
// // Implement logic to find closest X tables }
// return closestTables;
// }
// // Get the closest Z tables TableIndex OpatIO::getTableIndex(std::vector<double> index) {
// std::vector<OPATTable> OpatIO::getClosestZTables(double XExact, double Z, int numTables) { int tableID = lookupTableID(index);
// std::vector<OPATTable> closestTables; return tableIndex.at(tableID);
// // Implement logic to find closest Z tables }
// return closestTables;
// }
// // Get the closest tables std::vector<unsigned char> OpatIO::computeChecksum(int tableID) {
// std::vector<OPATTable> OpatIO::getClosestTables(double X, double Z, int numTables) { OPATTable table = getTable(tableID);
// std::vector<OPATTable> closestTables; std::vector<std::vector<double>> logKappa = table.logKappa;
// // Implement logic to find closest tables std::vector<double> flatData(logKappa.size() * logKappa.size());
// return closestTables; size_t offset = 0;
// } for (const auto& row : logKappa) {
for (const auto& val : row) {
flatData[offset++] = val;
}
}
std::vector<unsigned char> flatDataBytes(flatData.size() * sizeof(double));
std::memcpy(flatDataBytes.data(), flatData.data(), flatDataBytes.size());
std::vector<unsigned char> hash(picosha2::k_digest_size);
picosha2::hash256(flatDataBytes.begin(), flatDataBytes.end(), hash.begin(), hash.end());
return hash;
}
std::vector<unsigned char> OpatIO::computeChecksum(std::vector<double> index) {
int tableID = lookupTableID(index);
return computeChecksum(tableID);
}
bool OpatIO::validateAll() {
for (const auto &table : tableIDToIndex) {
std::vector<unsigned char> hash = computeChecksum(table.first);
std::vector<unsigned char> storedHash(tableIndex.at(table.first).sha256, tableIndex.at(table.first).sha256 + 32);
if (hash != storedHash) {
return false;
}
}
return true;
}

View File

@@ -22,7 +22,8 @@ struct Header {
char creationDate[16]; ///< Creation date of the file char creationDate[16]; ///< Creation date of the file
char sourceInfo[64]; ///< Source information char sourceInfo[64]; ///< Source information
char comment[128]; ///< Comment section char comment[128]; ///< Comment section
char reserved[26]; ///< Reserved for future use uint16_t numIndex; ///< Size of index vector per table
char reserved[24]; ///< Reserved for future use
}; };
#pragma pack() #pragma pack()
@@ -30,8 +31,7 @@ struct Header {
* @brief Structure to hold the index information of a table in an OPAT file. * @brief Structure to hold the index information of a table in an OPAT file.
*/ */
struct TableIndex { struct TableIndex {
double X; ///< X composition value std::vector<double> index; ///< Index vector for associated table
double Z; ///< Z composition value
uint64_t byteStart; ///< Byte start position of the table uint64_t byteStart; ///< Byte start position of the table
uint64_t byteEnd; ///< Byte end position of the table uint64_t byteEnd; ///< Byte end position of the table
char sha256[32]; ///< SHA-256 hash of the table data char sha256[32]; ///< SHA-256 hash of the table data
@@ -58,9 +58,9 @@ private:
Header header; ///< Header information of the OPAT file Header header; ///< Header information of the OPAT file
std::vector<TableIndex> tableIndex; ///< Index information of the tables std::vector<TableIndex> tableIndex; ///< Index information of the tables
std::deque<std::pair<int, OPATTable>> tableQueue; ///< Queue to manage table caching std::deque<std::pair<int, OPATTable>> tableQueue; ///< Queue to manage table caching
std::map<int, std::pair<double, double>> tableIDToComposition; ///< Map to store table ID to composition mapping std::map<int, std::vector<double>> tableIDToIndex; ///< Map to store table ID to indexing
std::pair<double, double> XZepsilon; ///< Epsilon values for X and Z std::vector<double> indexEpsilon; ///< Epsilon values for each index
int maxQDepth = 10; ///< Maximum depth of the table queue int maxQDepth = 20; ///< Maximum depth of the table queue
std::string filename; ///< Filename of the OPAT file std::string filename; ///< Filename of the OPAT file
bool loaded = false; ///< Flag to indicate if the file is loaded bool loaded = false; ///< Flag to indicate if the file is loaded
@@ -115,14 +115,14 @@ private:
void printTable(OPATTable table, uint32_t truncateDigits=5); void printTable(OPATTable table, uint32_t truncateDigits=5);
/** /**
* @brief Lookup epsilon values for X and Z. * @brief Lookup epsilon values for Index.
*/ */
void XZLookupEpsilon(); void LookupEpsilon();
/** /**
* @brief Build the table ID to composition mapping. * @brief Build the table ID to Index mapping.
*/ */
void buildTableIDToComposition(); void buildTableIDToIndex();
public: public:
/** /**
@@ -142,12 +142,12 @@ public:
~OpatIO(); ~OpatIO();
/** /**
* @brief Get a table by X and Z values. * @brief Get a table by index vector
* @param X The X composition value. * @param index The index vector associated with the table to retrieve.
* @param Z The Z composition value. * @throw std::out_of_range if the index is not found.
* @return The OPAT table. * @return The OPAT table.
*/ */
OPATTable getTable(double X, double Z); OPATTable getTable(std::vector<double> index);
/** /**
* @brief Set the maximum depth of the table queue. * @brief Set the maximum depth of the table queue.
@@ -195,11 +195,10 @@ public:
/** /**
* @brief Print a table by X and Z values. * @brief Print a table by X and Z values.
* @param X The X composition value. * @param index The index vector associated with the table to print.
* @param Z The Z composition value.
* @param truncateDigits Number of digits to truncate. * @param truncateDigits Number of digits to truncate.
*/ */
void printTable(double X, double Z, uint32_t truncateDigits=5); void printTable(std::vector<double> index, uint32_t truncateDigits=5);
/** /**
* @brief Get the table index. * @brief Get the table index.
@@ -213,60 +212,58 @@ public:
*/ */
Header getHeader(); Header getHeader();
/**
* @brief Get the closest tables by X value.
* @param X The X composition value.
* @param ZExact The exact Z composition value.
* @param C The C composition value (default is 0).
* @param O The O composition value (default is 0).
* @param numTables The number of closest tables to retrieve (default is 1).
* @return A vector of OPAT tables.
*/
std::vector<OPATTable> getClosestXTables(double X, double ZExact, double C=0, double O=0, int numTables=1);
/**
* @brief Get the closest tables by Z value.
* @param XExact The exact X composition value.
* @param Z The Z composition value.
* @param C The C composition value (default is 0).
* @param O The O composition value (default is 0).
* @param numTables The number of closest tables to retrieve (default is 1).
* @return A vector of OPAT tables.
*/
std::vector<OPATTable> getClosestZTables(double XExact, double Z, double C=0, double O=0, int numTables=1);
/**
* @brief Get the closest tables by X and Z values.
* @param X The X composition value.
* @param Z The Z composition value.
* @param C The C composition value (default is 0).
* @param O The O composition value (default is 0).
* @param numTables The number of closest tables to retrieve (default is 1).
* @return A vector of OPAT tables.
*/
std::vector<OPATTable> getClosestTables(double X, double Z, double C=0, double O=0, int numTables=1);
/** /**
* @brief Lookup the table ID by X and Z values. * @brief Lookup the table ID by X and Z values.
* @param X The X composition value. * @param index The index vector associated with the table to lookup.
* @param Z The Z composition value. * @return The table ID if index is found, otherwise -1.
* @return The table ID.
*/ */
int lookupTableID(double X, double Z); int lookupTableID(std::vector<double> index);
/** /**
* @brief Lookup the closest table ID by X and Z values. * @brief Lookup the closest table ID by X and Z values.
* @param X The X composition value. * @param index The index vector associated with the table to lookup.
* @param Z The Z composition value.
* @return The closest table ID. * @return The closest table ID.
*/ */
int lookupClosestTableID(double X, double Z); int lookupClosestTableID(std::vector<double> index);
/** /**
* @brief Get the version of the OPAT file format. * @brief Get the version of the OPAT file format.
* @return The version of the OPAT file format. * @return The version of the OPAT file format.
*/ */
uint16_t getOPATVersion(); uint16_t getOPATVersion();
/**
* @brief Get size of the index vector per table in the OPAT file.
* @return The size of the index vector per table.
*/
uint16_t getNumIndex();
/**
* @brief Get the index vector for a given table ID.
* @param index The index vector associated with the table to retrieve.
* @return The full TableIndex entry for the table
*/
TableIndex getTableIndex(std::vector<double> index);
/**
* @brief Get the checksum (sha256) for a given table ID.
* @param tableID The ID of the table.
* @return The checksum vector for the table.
*/
std::vector<unsigned char> computeChecksum(int tableID);
/**
* @brief Get the checksum (sha256) for a given index vector.
* @param index The index vector associated with the table to retrieve.
* @return The checksum vector for the table.
*/
std::vector<unsigned char> computeChecksum(std::vector<double> index);
/**
* @brief Validate the checksum of all tables.
* @return True if all checksum are valid, false otherwise.
*/
bool validateAll();
}; };
#endif #endif