feat(debug-utils): added framework for shared debug util tools

2025-04-10 09:05:30 -04:00
parent 08b68c22de
commit 41460acacf
21 changed files with 465 additions and 1799 deletions
--- a/utils/debugUtils/MFEMAnalysisUtils/MFEMAnalysis-cpp/meson.build
+++ b/utils/debugUtils/MFEMAnalysisUtils/MFEMAnalysis-cpp/meson.build
@@ -0,0 +1 @@
+mfemanalysis_dep = declare_dependency(include_directories: 'src/include')
--- a/utils/debugUtils/MFEMAnalysisUtils/MFEMAnalysis-cpp/src/include/mfem_smout.h
+++ b/utils/debugUtils/MFEMAnalysisUtils/MFEMAnalysis-cpp/src/include/mfem_smout.h
@@ -0,0 +1,166 @@
+//
+// Created by Emily Boudreaux on 4/10/25.
+//
+
+#ifndef MFEM_SMOUT_H
+#define MFEM_SMOUT_H
+
+#include "mfem.hpp"
+#include <iostream>
+#include <fstream>
+
+/**
+ * @brief Saves an mfem::SparseMatrix to a custom compact binary file (.csrbin).
+ *
+ * @param mat The mfem::SparseMatrix to save (assumed to be in CSR format).
+ * @param filename The path to the output file.
+ * @return true if saving was successful, false otherwise.
+ *
+ * File Format (.csrbin):
+ * - Magic (4 bytes): 'C','S','R','B'
+ * - Version (1 byte): 1
+ * - IntSize (1 byte): 8 (using int64_t for indices/dims)
+ * - FltSize (1 byte): 8 (using double for data)
+ * - Reserved (1 byte): 0
+ * - Height (uint64_t): Number of rows
+ * - Width (uint64_t): Number of columns
+ * - NNZ (uint64_t): Number of non-zeros
+ * - I array (int64_t * (Height + 1)): CSR Row Pointers
+ * - J array (int64_t * NNZ): CSR Column Indices
+ * - Data array (double * NNZ): CSR Non-zero values
+ */
+bool saveSparseMatrixBinary(const mfem::SparseMatrix& mat, const std::string& filename) {
+    std::ofstream outfile(filename, std::ios::binary | std::ios::trunc);
+    if (!outfile) {
+        std::cerr << "Error: Cannot open file for writing: " << filename << std::endl;
+        return false;
+    }
+
+    try {
+        // --- Get Data Pointers and Dimensions from MFEM Matrix ---
+        const int* mfem_I = mat.GetI();
+        const int* mfem_J = mat.GetJ();
+        const double* mfem_data = mat.GetData();
+
+        uint64_t height = static_cast<uint64_t>(mat.Height());
+        uint64_t width = static_cast<uint64_t>(mat.Width());
+        uint64_t nnz = static_cast<uint64_t>(mat.NumNonZeroElems());
+        uint64_t i_count = height + 1;
+        uint64_t j_count = nnz;
+        uint64_t data_count = nnz;
+
+
+        // --- Write Header ---
+        const char magic[4] = {'C', 'S', 'R', 'B'};
+        const uint8_t version = 1;
+        const uint8_t int_size = 8;
+        const uint8_t flt_size = 8;
+        const uint8_t reserved = 0;
+
+        outfile.write(magic, 4);
+        outfile.write(reinterpret_cast<const char*>(&version), 1);
+        outfile.write(reinterpret_cast<const char*>(&int_size), 1);
+        outfile.write(reinterpret_cast<const char*>(&flt_size), 1);
+        outfile.write(reinterpret_cast<const char*>(&reserved), 1);
+
+        outfile.write(reinterpret_cast<const char*>(&height), sizeof(height));
+        outfile.write(reinterpret_cast<const char*>(&width), sizeof(width));
+        outfile.write(reinterpret_cast<const char*>(&nnz), sizeof(nnz));
+
+        if (!outfile) throw std::runtime_error("Error writing header.");
+
+        // --- Write Arrays (Converting int to int64_t for I and J) ---
+        std::vector<int64_t> i_buffer(i_count);
+        for (uint64_t idx = 0; idx < i_count; ++idx) {
+            i_buffer[idx] = static_cast<int64_t>(mfem_I[idx]);
+        }
+        outfile.write(reinterpret_cast<const char*>(i_buffer.data()), i_count * sizeof(int64_t));
+        if (!outfile) throw std::runtime_error("Error writing I array.");
+
+        std::vector<int64_t> j_buffer(j_count);
+        for (uint64_t idx = 0; idx < j_count; ++idx) {
+            j_buffer[idx] = static_cast<int64_t>(mfem_J[idx]);
+        }
+        outfile.write(reinterpret_cast<const char*>(j_buffer.data()), j_count * sizeof(int64_t));
+        if (!outfile) throw std::runtime_error("Error writing J array.");
+
+        outfile.write(reinterpret_cast<const char*>(mfem_data), data_count * sizeof(double));
+        if (!outfile) throw std::runtime_error("Error writing Data array.");
+
+
+    } catch (const std::exception& e) {
+        std::cerr << "Error during binary matrix save: " << e.what() << std::endl;
+        outfile.close();
+        return false;
+    }
+
+    outfile.close();
+    if (!outfile) {
+        std::cerr << "Error closing file after writing: " << filename << std::endl;
+        return false;
+    }
+    return true;
+}
+
+void writeDenseMatrixToCSV(const std::string &filename, int precision, const mfem::DenseMatrix *mat) {
+    if (!mat) {
+        throw std::runtime_error("The operator is not a SparseMatrix.");
+    }
+
+    std::ofstream outfile(filename);
+    if (!outfile.is_open()) {
+        throw std::runtime_error("Failed to open file: " + filename);
+    }
+
+
+    int height = mat->Height();
+    int width = mat->Width();
+
+    // Set precision for floating-point output
+    outfile << std::fixed << std::setprecision(precision);
+
+    for (int i = 0; i < width; i++) {
+        outfile << i;
+        if (i < width - 1) {
+            outfile << ",";
+        }
+        else {
+            outfile << "\n";
+        }
+    }
+
+    // Iterate through rows
+    for (int i = 0; i < height; ++i) {
+        for (int j = 0; j < width; ++j) {
+            outfile << mat->Elem(i, j);
+            if (j < width - 1) {
+                outfile << ",";
+            }
+        }
+        outfile << std::endl;
+    }
+
+    outfile.close();
+}
+
+/**
+ * @brief Writes the dense representation of an MFEM Operator (if it's a SparseMatrix) to a CSV file.
+ *
+ * @param op The MFEM Operator to write.
+ * @param filename The name of the output CSV file.
+ * @param precision Number of decimal places for floating-point values.
+ */
+ void writeOperatorToCSV(const mfem::Operator &op,
+    const std::string &filename,
+    int precision = 6) // Add precision argument
+{
+    // Attempt to cast the Operator to a SparseMatrix
+    const auto *sparse_mat = dynamic_cast<const mfem::SparseMatrix*>(&op);
+    if (!sparse_mat) {
+        throw std::runtime_error("The operator is not a SparseMatrix.");
+    }
+    const mfem::DenseMatrix *mat = sparse_mat->ToDenseMatrix();
+    writeDenseMatrixToCSV(filename, precision, mat);
+}
+
+#endif //MFEM_SMOUT_H
--- a/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/pyproject.toml
+++ b/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "SSEDebug"
+version = "0.1.0"
+description = "A python module for general 4DSSE debugging"
+readme = "readme.md"
+authors = [
+    {name = "Emily M. Boudreaux", email = "emily.boudreaux@dartmouth.edu"},
+    {name = "4D-STAR Collaboration"},
+]
+
+maintainers = [
+    {name = "Emily M. Boudreaux", email="emily.boudreaux@dartmouth.edu"}
+]
+
+keywords = ["astrophysics", "MFEM"]
+requires-python = ">=3.8"
+dependencies = ["numpy >= 1.21.1", "scipy>=1.13.1"]
+
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "Programming Language :: Python :: 3",
+    "Topic :: Scientific/Engineering :: Astronomy",
+    "Operating System :: OS Independent"
+]
+
+
+[tool.setuptools]
+package-dir = {"" = "src"}
+
+[tool.setuptools.packages.find]
+where = ["src"]
--- a/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/src/SSEDebug/init.py
+++ b/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/src/SSEDebug/init.py
@@ -0,0 +1 @@
+__version__="0.1.0"
--- a/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/src/SSEDebug/smRead/init.py
+++ b/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/src/SSEDebug/smRead/init.py
@@ -0,0 +1 @@
+from .smread import loadSparseMatrixBinary, analyze_sparse_matrix, load_and_analyze_sparse_matrix
--- a/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/src/SSEDebug/smRead/smread.py
+++ b/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/src/SSEDebug/smRead/smread.py
@@ -0,0 +1,234 @@
+import argparse
+
+import numpy as np
+import scipy.sparse as sp
+import struct
+import scipy.sparse.linalg as spla # For matrix norm
+import time
+import os
+
+def loadSparseMatrixBinary(filename):
+    """
+    Loads a sparse matrix from the custom binary format (.csrbin).
+
+    Args:
+        filename (str): The path to the .csrbin file.
+
+    Returns:
+        scipy.sparse.csr_matrix: The loaded sparse matrix.
+
+    Raises:
+        ValueError: If the file format is incorrect or sizes don't match.
+        IOError: If the file cannot be read.
+    """
+    INT_SIZE = 8  # Expecting int64_t from the C++ writer
+    FLT_SIZE = 8  # Expecting double from the C++ writer
+    EXPECTED_MAGIC = b'CSRB'
+    EXPECTED_VERSION = 1
+
+    try:
+        with open(filename, 'rb') as f:
+            # --- Read Header ---
+            magic = f.read(4)
+            if magic != EXPECTED_MAGIC:
+                raise ValueError(f"Invalid magic number. Expected {EXPECTED_MAGIC}, got {magic}")
+
+            version, int_size_file, flt_size_file, reserved = struct.unpack('<BBBB', f.read(4))
+            # '<' means little-endian, 'B' means unsigned char (1 byte)
+
+            if version != EXPECTED_VERSION:
+                print(f"Warning: File version {version} differs from expected {EXPECTED_VERSION}.")
+            if int_size_file != INT_SIZE:
+                raise ValueError(f"Integer size mismatch. Expected {INT_SIZE}, file has {int_size_file}")
+            if flt_size_file != FLT_SIZE:
+                raise ValueError(f"Float size mismatch. Expected {FLT_SIZE}, file has {flt_size_file}")
+
+            height, width, nnz = struct.unpack('<QQQ', f.read(24))
+            # '<' means little-endian, 'Q' means unsigned long long (8 bytes)
+
+            i_count = height + 1
+            j_count = nnz
+            data_count = nnz
+
+            if nnz == 0: # Handle empty matrix case
+                print("Warning: Matrix file contains zero non-zero elements.")
+                # Return an empty matrix with correct shape
+                return sp.csr_matrix((height, width), dtype=np.float64)
+
+
+            # --- Read Arrays ---
+
+            # Read I array (Row Pointers)
+            expected_i_bytes = i_count * INT_SIZE
+            I_array = np.fromfile(f, dtype=np.int64, count=i_count) # Read as int64
+            if I_array.size != i_count:
+                raise ValueError(f"Error reading I array. Expected {i_count} elements, read {I_array.size}. File truncated or corrupt?")
+
+            # Read J array (Column Indices)
+            expected_j_bytes = j_count * INT_SIZE
+            J_array = np.fromfile(f, dtype=np.int64, count=j_count) # Read as int64
+            if J_array.size != j_count:
+                raise ValueError(f"Error reading J array. Expected {j_count} elements, read {J_array.size}. File truncated or corrupt?")
+
+            # Read Data array (Values)
+            expected_data_bytes = data_count * FLT_SIZE
+            Data_array = np.fromfile(f, dtype=np.float64, count=data_count) # Read as float64
+            if Data_array.size != data_count:
+                raise ValueError(f"Error reading Data array. Expected {data_count} elements, read {Data_array.size}. File truncated or corrupt?")
+
+            # --- Check for extra data ---
+            extra_data = f.read()
+            if extra_data:
+                print(f"Warning: {len(extra_data)} extra bytes found at the end of the file.")
+
+
+            # --- Construct SciPy CSR Matrix ---
+            sparse_matrix = sp.csr_matrix((Data_array, J_array, I_array), shape=(height, width))
+
+            if sparse_matrix.nnz != nnz:
+                print(f"Warning: NNZ mismatch after loading. Header NNZ: {nnz}, Scipy NNZ: {sparse_matrix.nnz}")
+
+
+            return sparse_matrix
+
+    except FileNotFoundError:
+        raise IOError(f"Error: File not found at {filename}")
+    except Exception as e:
+        raise RuntimeError(f"An error occurred while reading {filename}: {e}")
+
+
+def analyze_sparse_matrix(sp_mat):
+    """
+    Analyzes a SciPy sparse matrix and prints various statistics.
+
+    Args:
+        sp_mat (scipy.sparse.spmatrix): The sparse matrix to analyze.
+                                         (e.g., csr_matrix, csc_matrix).
+    """
+    print("-" * 50)
+    print("Sparse Matrix Analysis Report")
+    print("-" * 50)
+
+    if not isinstance(sp_mat, sp.spmatrix):
+        print("Error: Input is not a SciPy sparse matrix.")
+        return
+
+    rows, cols = sp_mat.shape
+    print(f"Size (Shape):           {rows} rows x {cols} columns")
+
+    if rows == 0 or cols == 0:
+        print("\nMatrix is empty. No further analysis possible.")
+        print("-" * 50)
+        return
+
+    nnz = sp_mat.nnz
+    total_elements = rows * cols
+    sparsity = 0.0
+    if total_elements > 0:
+        sparsity = 1.0 - (nnz / total_elements)
+    else:
+        sparsity = 1.0
+
+    print(f"Non-zero elements (NNZ): {nnz}")
+    print(f"Total elements:         {total_elements}")
+    print(f"Sparsity:               {sparsity:.6%} (percentage of zeros)")
+
+    if nnz == 0:
+        print("\nMatrix contains only zero elements.")
+        diag_elements = sp_mat.diagonal()
+        print(f"\nDiagonal Mean:          {np.mean(diag_elements):.6e}")
+        print(f"Diagonal Max:           {np.max(diag_elements):.6e}")
+        print(f"Diagonal Min:           {np.min(diag_elements):.6e}")
+        print(f"Value Range (Min):      N/A (no non-zero values)")
+        print(f"Value Range (Max):      N/A (no non-zero values)")
+        print(f"Mean Non-Zero Value:    N/A (no non-zero values)")
+        print(f"Relative Diagonal Norm: N/A (matrix norm is zero)")
+        print("-" * 50)
+        return
+
+    all_values = sp_mat.data # Access non-zero values directly
+    min_val = np.min(all_values)
+    max_val = np.max(all_values)
+    mean_val = np.mean(all_values)
+    print(f"\nValue Range (Min):      {min_val:.6e}")
+    print(f"Value Range (Max):      {max_val:.6e}")
+    print(f"Mean Non-Zero Value:    {mean_val:.6e}")
+
+
+    print("\n--- Diagonal Properties ---")
+    start_diag = time.time()
+    diag_elements = sp_mat.diagonal()
+    end_diag = time.time()
+    print(f"(Diagonal extraction time: {end_diag - start_diag:.4f}s)")
+
+    if diag_elements.size > 0: # Should always be true unless rows=0 (handled above)
+        mean_diag = np.mean(diag_elements)
+        max_diag = np.max(diag_elements)
+        min_diag = np.min(diag_elements)
+
+        diag_nonzero = diag_elements[diag_elements != 0]
+        if diag_nonzero.size > 0:
+            mean_diag_nz = np.mean(diag_nonzero)
+            print(f"Mean Diagonal (all):    {mean_diag:.6e}")
+            print(f"Mean Diagonal (non-zero):{mean_diag_nz:.6e} ({diag_nonzero.size} elements)")
+        else:
+            print(f"Mean Diagonal (all):    {mean_diag:.6e}")
+            print(f"Mean Diagonal (non-zero): N/A (all diagonal elements are zero)")
+
+        print(f"Max Diagonal:           {max_diag:.6e}")
+        print(f"Min Diagonal:           {min_diag:.6e}")
+
+        # 5. "Diagonality" - Relative Diagonal Norm (using Frobenius norm)
+        # The Frobenius norm is sqrt(sum(abs(A_ij)^2))
+        start_norm = time.time()
+        norm_diag = np.linalg.norm(diag_elements)
+        norm_matrix = spla.norm(sp_mat, ord='fro')
+        end_norm = time.time()
+        print(f"(Norm calculation time: {end_norm - start_norm:.4f}s)")
+
+        if norm_matrix > 1e-15: # Avoid division by zero
+            diagonality_ratio = norm_diag / norm_matrix
+            print(f"\nRelative Diagonal Norm (Frobenius): {diagonality_ratio:.6f}")
+            print(f"  (Ratio of ||diag(A)||_F / ||A||_F)")
+            print(f"  (Diagonal Norm = {norm_diag:.6e}, Matrix Norm = {norm_matrix:.6e})")
+            if diagonality_ratio > 0.99:
+                print("  -> Matrix is strongly diagonal dominant by norm.")
+            elif diagonality_ratio < 0.1:
+                print("  -> Matrix norm is dominated by off-diagonal elements.")
+        else:
+            print("\nRelative Diagonal Norm: N/A (matrix Frobenius norm is zero)")
+
+    else: # Should not happen if rows > 0
+        print("\nCould not extract diagonal (matrix has zero rows?).")
+
+
+    # 6. Other Useful Stats
+    print("\n--- Other Properties ---")
+    is_square = rows == cols
+    print(f"Is Square:              {is_square}")
+    if is_square:
+        try:
+            diff_norm = spla.norm(sp_mat - sp_mat.T, ord='fro')
+            if diff_norm < 1e-10 * norm_matrix : # Check relative difference norm
+                print(f"Is Symmetric (approx):  True (||A - A.T||_F / ||A||_F < 1e-10)")
+            else:
+                print(f"Is Symmetric (approx):  False (||A - A.T||_F = {diff_norm:.2e})")
+        except Exception as e:
+            print(f"Is Symmetric (approx):  Check failed ({e})")
+    else:
+        print(f"Is Symmetric (approx):  False (not square)")
+
+
+    print("-" * 50)
+
+def load_and_analyze_sparse_matrix(filename: str):
+    sm = loadSparseMatrixBinary(filename)
+    analyze_sparse_matrix(sm)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Simple tool to get some statistics about a sparse matrix from mfem")
+    parser.add_argument("path", help="path to the output file", type=str)
+
+    args = parser.parse_args()
+
+    load_and_analyze_sparse_matrix(args.filename)
--- a/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/src/init.py
+++ b/utils/debugUtils/MFEMAnalysisUtils/SSEDebug/src/init.py
--- a/utils/debugUtils/MFEMAnalysisUtils/meson.build
+++ b/utils/debugUtils/MFEMAnalysisUtils/meson.build
@@ -0,0 +1 @@
+subdir('MFEMAnalysis-cpp')
--- a/utils/debugUtils/MFEMAnalysisUtils/readme.md
+++ b/utils/debugUtils/MFEMAnalysisUtils/readme.md
@@ -0,0 +1,12 @@
+# Tools for analyzing MFEM Sparse Matricies (among other things)
+MFEM does a lot of work with sparse matrixes but does not provide trivial tools to use them. Here I include some basic utilities to analyze these matricies.
+
+## Python
+There is a python script to preform the actual analysis.
+
+## C++
+There is a small C++ header only library which provides an interface to write MFEM sparse matrixes out to disk.
+
+The C++ utility writes mfem sparse matricies in a custom format which was written to be simple. The python script
+only understands this format.
+
				`@@ -0,0 +1 @@`
				`mfemanalysis_dep = declare_dependency(include_directories: 'src/include')`
				`@@ -0,0 +1 @@`
				`from .smread import loadSparseMatrixBinary, analyze_sparse_matrix, load_and_analyze_sparse_matrix`