From df6335d25fba4e9f3252fdacce118c8c5df38be0 Mon Sep 17 00:00:00 2001 From: Emily Boudreaux Date: Mon, 17 Feb 2025 13:01:34 -0500 Subject: [PATCH] feat(opatIO): fully updated for index vector PreviouslyOPAT files were indexed using X and Z now they are indexed with a general index vector. BREAKING CHANGE: all methods wch used X and Z now use std::vector index (size: header.numIndex) instead. Also added a method to validate tables using checksum --- src/opatIO/private/opatIO.cpp | 195 ++++++++++++++++++++-------------- src/opatIO/public/opatIO.h | 111 ++++++++++--------- 2 files changed, 171 insertions(+), 135 deletions(-) diff --git a/src/opatIO/private/opatIO.cpp b/src/opatIO/private/opatIO.cpp index b637d38..9c3b043 100644 --- a/src/opatIO/private/opatIO.cpp +++ b/src/opatIO/private/opatIO.cpp @@ -10,6 +10,7 @@ #include #include #include +#include "picosha2.h" // Function to check system endianness bool is_big_endian() { @@ -75,13 +76,14 @@ void OpatIO::unload() { void OpatIO::readHeader(std::ifstream &file) { file.read(reinterpret_cast(&header), sizeof(Header)); if (file.gcount() != sizeof(Header)) { - throw std::runtime_error("Error reading header from file: " + filename); + throw std::runtime_error("Error reading header from file"); } if (is_big_endian()) { header.version = swap_bytes(header.version); header.numTables = swap_bytes(header.numTables); header.indexOffset = swap_bytes(header.indexOffset); + header.numIndex = swap_bytes(header.numIndex); } } @@ -89,75 +91,86 @@ void OpatIO::readHeader(std::ifstream &file) { void OpatIO::readTableIndex(std::ifstream &file) { file.seekg(header.indexOffset, std::ios::beg); tableIndex.resize(header.numTables); - file.read(reinterpret_cast(tableIndex.data()), header.numTables * sizeof(TableIndex)); - if (file.gcount() != static_cast(header.numTables * sizeof(TableIndex))) { - throw std::runtime_error("Error reading table index from file: " + filename); + long unsigned int indexReadBytes; + for (uint32_t i = 0; i < header.numTables; i++) { + indexReadBytes = 0; + // Read the index vector in based on numIndex + tableIndex.at(i).index.resize(header.numIndex); + file.read(reinterpret_cast(tableIndex.at(i).index.data()), header.numIndex * sizeof(double)); + indexReadBytes += static_cast(file.gcount()); + + // Read the start and end position of the table in + file.read(reinterpret_cast(&tableIndex.at(i).byteStart), sizeof(uint64_t)); + indexReadBytes += static_cast(file.gcount()); + file.read(reinterpret_cast(&tableIndex.at(i).byteEnd), sizeof(uint64_t)); + indexReadBytes += static_cast(file.gcount()); + + // Read the checksum in + file.read(tableIndex.at(i).sha256, 32); + indexReadBytes += static_cast(file.gcount()); + + // validate that the size of read data is correct + if (indexReadBytes != header.numIndex * sizeof(double) + 32 + 2 * sizeof(uint64_t)) { + throw std::runtime_error("Error reading table index from file"); + } } - buildTableIDToComposition(); + + buildTableIDToIndex(); } -void OpatIO::buildTableIDToComposition(){ - tableIDToComposition.clear(); +void OpatIO::buildTableIDToIndex(){ + tableIDToIndex.clear(); int tableID = 0; - std::pair comp; - for (const auto &index : tableIndex) { - comp.first = index.X; - comp.second = index.Z; - tableIDToComposition.emplace(tableID, comp); + std::vector ind; + ind.resize(header.numIndex); + for (const auto &table : tableIndex) { + ind.clear(); + for (const auto &index : table.index) { + ind.push_back(index); + } + tableIDToIndex.emplace(tableID, ind); tableID++; } - XZLookupEpsilon(); + LookupEpsilon(); } -void OpatIO::XZLookupEpsilon() { +void OpatIO::LookupEpsilon() { /* - Get 10% of the minimum spacing between XZ values - in the tableIDToComposition map. This can be used + Get 10% of the minimum spacing between index values + in the tableIDToIndex map. This can be used to set the comparison distance when doing a reverse - lookup (composition -> tableID) + lookup (index -> tableID) */ - std::vector Xvalues, Zvalues; - double epsilonX, epsilonZ, xgap, zgap; + indexEpsilon.resize(header.numIndex); - // Start these out as larger than they will ever be - epsilonX = 1; - epsilonZ = 1; - for (const auto& pair : tableIDToComposition) { - Xvalues.push_back(pair.second.first); - Zvalues.push_back(pair.second.second); - } - - // Sorting is required for this algorithm. - std::sort(Xvalues.begin(), Xvalues.end()); - std::sort(Zvalues.begin(), Zvalues.end()); - - for (size_t i = 1; i < Xvalues.size(); ++i) { - xgap = Xvalues[i] - Xvalues[i - 1]; - zgap = Zvalues[i] - Zvalues[i - 1]; - if (xgap > 0 && xgap < epsilonX) { - epsilonX = xgap; - } - if (zgap > 0 && zgap < epsilonZ) { - epsilonZ = zgap; + double epsilon; + for (int i = 0; i < static_cast(header.numIndex); i++) { + epsilon = std::numeric_limits::max(); + for (int j = 1; j < static_cast(header.numTables); j++) { + epsilon = std::min(epsilon, std::fabs(tableIDToIndex.at(j).at(i) - tableIDToIndex.at(j-1).at(i))); } + indexEpsilon.at(i) = epsilon * 0.1; } - // 0.1 to extract 10% of min distance. - XZepsilon = {0.1*epsilonX, 0.1*epsilonZ}; } -int OpatIO::lookupTableID(double X, double Z){ - bool XOkay; - bool ZOkay; +int OpatIO::lookupTableID(std::vector index) { + std::vector IndexOkay; + IndexOkay.resize(header.numIndex); int tableID = 0; - for (const auto &tableMap : tableIDToComposition){ - XOkay = std::fabs(tableMap.second.first - X) < XZepsilon.first; - ZOkay = std::fabs(tableMap.second.second - Z) < XZepsilon.second; - if (XOkay and ZOkay){ + for (const auto &tableMap : tableIDToIndex){ + // Loop through all index values and check if they are within epsilon for that index + std::fill(IndexOkay.begin(), IndexOkay.end(), false); + for (long unsigned int i = 0; i < index.size(); i++) { + IndexOkay.at(i) = std::fabs(tableMap.second.at(i) - index.at(i)) < indexEpsilon.at(i); + } + // If all index values are within epsilon, return the table ID + if (std::all_of(IndexOkay.begin(), IndexOkay.end(), [](bool i){return i;})) { return tableID; } tableID++; } + // If no table is found, return -1 (sentinal value) return -1; } @@ -201,10 +214,10 @@ uint16_t OpatIO::getOPATVersion() { } // Get a table for given X and Z -OPATTable OpatIO::getTable(double X, double Z) { - int tableID = lookupTableID(X, Z); +OPATTable OpatIO::getTable(std::vector index) { + int tableID = lookupTableID(index); if (tableID == -1) { - throw std::out_of_range("X Z Pair Not found!"); + throw std::out_of_range("Index Not found!"); } try { return getTableFromQueue(tableID); @@ -288,6 +301,7 @@ void OpatIO::printHeader() { std::cout << "Creation Date: " << header.creationDate << std::endl; std::cout << "Source Info: " << header.sourceInfo << std::endl; std::cout << "Comment: " << header.comment << std::endl; + std::cout << "Number of Indices: " << header.numIndex << std::endl; } // Print the table index @@ -298,9 +312,11 @@ void OpatIO::printTableIndex() { } // Print table header - std::cout << std::left << std::setw(10) << "X" - << std::setw(10) << "Z" - << std::setw(15) << "Byte Start" + std::cout << std::left << std::setw(10); + for (int i = 0; i < header.numIndex; i++) { + std::cout << "Index " << i << std::setw(10); + } + std::cout << std::setw(15) << "Byte Start" << std::setw(15) << "Byte End" << "Checksum (SHA-256)" << std::endl; @@ -308,10 +324,11 @@ void OpatIO::printTableIndex() { // Print each entry in the table for (const auto &index : tableIndex) { - std::cout << std::fixed << std::setprecision(4) - << std::setw(10) << index.X - << std::setw(10) << index.Z - << std::setw(15) << index.byteStart + std::cout << std::fixed << std::setprecision(4) << std::setw(10); + for (int i = 0; i < header.numIndex; i++) { + std::cout << index.index[i] << std::setw(10); + } + std::cout << std::setw(5) << index.byteStart << std::setw(15) << index.byteEnd << std::hex; // Switch to hex mode for checksum @@ -422,8 +439,8 @@ void OpatIO::printTable(OPATTable table, uint32_t truncateDigits) { std::cout << "]" << std::endl; } -void OpatIO::printTable(double X, double Z, uint32_t truncateDigits) { - int tableID = lookupTableID(X, Z); +void OpatIO::printTable(std::vector index, uint32_t truncateDigits) { + int tableID = lookupTableID(index); OPATTable table = getTable(tableID); printTable(table, truncateDigits); } @@ -438,23 +455,45 @@ Header OpatIO::getHeader() { return header; } -// // Get the closest X tables -// std::vector OpatIO::getClosestXTables(double X, double ZExact, int numTables) { -// std::vector closestTables; -// // Implement logic to find closest X tables -// return closestTables; -// } +// Get the size of the index vector used +uint16_t OpatIO::getNumIndex() { + return header.numIndex; +} -// // Get the closest Z tables -// std::vector OpatIO::getClosestZTables(double XExact, double Z, int numTables) { -// std::vector closestTables; -// // Implement logic to find closest Z tables -// return closestTables; -// } +TableIndex OpatIO::getTableIndex(std::vector index) { + int tableID = lookupTableID(index); + return tableIndex.at(tableID); +} -// // Get the closest tables -// std::vector OpatIO::getClosestTables(double X, double Z, int numTables) { -// std::vector closestTables; -// // Implement logic to find closest tables -// return closestTables; -// } \ No newline at end of file +std::vector OpatIO::computeChecksum(int tableID) { + OPATTable table = getTable(tableID); + std::vector> logKappa = table.logKappa; + std::vector flatData(logKappa.size() * logKappa.size()); + size_t offset = 0; + for (const auto& row : logKappa) { + for (const auto& val : row) { + flatData[offset++] = val; + } + } + std::vector flatDataBytes(flatData.size() * sizeof(double)); + std::memcpy(flatDataBytes.data(), flatData.data(), flatDataBytes.size()); + std::vector hash(picosha2::k_digest_size); + picosha2::hash256(flatDataBytes.begin(), flatDataBytes.end(), hash.begin(), hash.end()); + return hash; +} + +std::vector OpatIO::computeChecksum(std::vector index) { + int tableID = lookupTableID(index); + return computeChecksum(tableID); +} + +bool OpatIO::validateAll() { + for (const auto &table : tableIDToIndex) { + std::vector hash = computeChecksum(table.first); + std::vector storedHash(tableIndex.at(table.first).sha256, tableIndex.at(table.first).sha256 + 32); + if (hash != storedHash) { + return false; + } + } + return true; +} \ No newline at end of file diff --git a/src/opatIO/public/opatIO.h b/src/opatIO/public/opatIO.h index 62ffe0f..5fdad3f 100644 --- a/src/opatIO/public/opatIO.h +++ b/src/opatIO/public/opatIO.h @@ -22,7 +22,8 @@ struct Header { char creationDate[16]; ///< Creation date of the file char sourceInfo[64]; ///< Source information char comment[128]; ///< Comment section - char reserved[26]; ///< Reserved for future use + uint16_t numIndex; ///< Size of index vector per table + char reserved[24]; ///< Reserved for future use }; #pragma pack() @@ -30,8 +31,7 @@ struct Header { * @brief Structure to hold the index information of a table in an OPAT file. */ struct TableIndex { - double X; ///< X composition value - double Z; ///< Z composition value + std::vector index; ///< Index vector for associated table uint64_t byteStart; ///< Byte start position of the table uint64_t byteEnd; ///< Byte end position of the table char sha256[32]; ///< SHA-256 hash of the table data @@ -58,9 +58,9 @@ private: Header header; ///< Header information of the OPAT file std::vector tableIndex; ///< Index information of the tables std::deque> tableQueue; ///< Queue to manage table caching - std::map> tableIDToComposition; ///< Map to store table ID to composition mapping - std::pair XZepsilon; ///< Epsilon values for X and Z - int maxQDepth = 10; ///< Maximum depth of the table queue + std::map> tableIDToIndex; ///< Map to store table ID to indexing + std::vector indexEpsilon; ///< Epsilon values for each index + int maxQDepth = 20; ///< Maximum depth of the table queue std::string filename; ///< Filename of the OPAT file bool loaded = false; ///< Flag to indicate if the file is loaded @@ -115,14 +115,14 @@ private: void printTable(OPATTable table, uint32_t truncateDigits=5); /** - * @brief Lookup epsilon values for X and Z. + * @brief Lookup epsilon values for Index. */ - void XZLookupEpsilon(); + void LookupEpsilon(); /** - * @brief Build the table ID to composition mapping. + * @brief Build the table ID to Index mapping. */ - void buildTableIDToComposition(); + void buildTableIDToIndex(); public: /** @@ -142,12 +142,12 @@ public: ~OpatIO(); /** - * @brief Get a table by X and Z values. - * @param X The X composition value. - * @param Z The Z composition value. + * @brief Get a table by index vector + * @param index The index vector associated with the table to retrieve. + * @throw std::out_of_range if the index is not found. * @return The OPAT table. */ - OPATTable getTable(double X, double Z); + OPATTable getTable(std::vector index); /** * @brief Set the maximum depth of the table queue. @@ -195,11 +195,10 @@ public: /** * @brief Print a table by X and Z values. - * @param X The X composition value. - * @param Z The Z composition value. + * @param index The index vector associated with the table to print. * @param truncateDigits Number of digits to truncate. */ - void printTable(double X, double Z, uint32_t truncateDigits=5); + void printTable(std::vector index, uint32_t truncateDigits=5); /** * @brief Get the table index. @@ -213,60 +212,58 @@ public: */ Header getHeader(); - /** - * @brief Get the closest tables by X value. - * @param X The X composition value. - * @param ZExact The exact Z composition value. - * @param C The C composition value (default is 0). - * @param O The O composition value (default is 0). - * @param numTables The number of closest tables to retrieve (default is 1). - * @return A vector of OPAT tables. - */ - std::vector getClosestXTables(double X, double ZExact, double C=0, double O=0, int numTables=1); - - /** - * @brief Get the closest tables by Z value. - * @param XExact The exact X composition value. - * @param Z The Z composition value. - * @param C The C composition value (default is 0). - * @param O The O composition value (default is 0). - * @param numTables The number of closest tables to retrieve (default is 1). - * @return A vector of OPAT tables. - */ - std::vector getClosestZTables(double XExact, double Z, double C=0, double O=0, int numTables=1); - - /** - * @brief Get the closest tables by X and Z values. - * @param X The X composition value. - * @param Z The Z composition value. - * @param C The C composition value (default is 0). - * @param O The O composition value (default is 0). - * @param numTables The number of closest tables to retrieve (default is 1). - * @return A vector of OPAT tables. - */ - std::vector getClosestTables(double X, double Z, double C=0, double O=0, int numTables=1); - /** * @brief Lookup the table ID by X and Z values. - * @param X The X composition value. - * @param Z The Z composition value. - * @return The table ID. + * @param index The index vector associated with the table to lookup. + * @return The table ID if index is found, otherwise -1. */ - int lookupTableID(double X, double Z); + int lookupTableID(std::vector index); /** * @brief Lookup the closest table ID by X and Z values. - * @param X The X composition value. - * @param Z The Z composition value. + * @param index The index vector associated with the table to lookup. * @return The closest table ID. */ - int lookupClosestTableID(double X, double Z); + int lookupClosestTableID(std::vector index); /** * @brief Get the version of the OPAT file format. * @return The version of the OPAT file format. */ uint16_t getOPATVersion(); + + /** + * @brief Get size of the index vector per table in the OPAT file. + * @return The size of the index vector per table. + */ + uint16_t getNumIndex(); + + /** + * @brief Get the index vector for a given table ID. + * @param index The index vector associated with the table to retrieve. + * @return The full TableIndex entry for the table + */ + TableIndex getTableIndex(std::vector index); + + /** + * @brief Get the checksum (sha256) for a given table ID. + * @param tableID The ID of the table. + * @return The checksum vector for the table. + */ + std::vector computeChecksum(int tableID); + + /** + * @brief Get the checksum (sha256) for a given index vector. + * @param index The index vector associated with the table to retrieve. + * @return The checksum vector for the table. + */ + std::vector computeChecksum(std::vector index); + + /** + * @brief Validate the checksum of all tables. + * @return True if all checksum are valid, false otherwise. + */ + bool validateAll(); }; #endif \ No newline at end of file