Add reading of energy cal from PHD files, and fit for polynomial coefficeints.

wcjohns · wcjohns · commit d629ca1c4fc4 · 2025-11-24T12:58:45.000-08:00
Only tested on a single file so far.
diff --git a/SpecUtils/EnergyCalibration.h b/SpecUtils/EnergyCalibration.h
@@ -679,22 +679,59 @@ namespace SpecUtils
   
   
   /** Writes the given energy calibration object as a CALp file.
-   
+
    If a spectrum file has multiple detectors, you may write out each calibration, with the detectors name, to a single file
-   
+
    @param output The stream to write the output to.
    @param The energy calibration to write.
    @param detector_name The name of the detector - an InterSpec/SpecUtils specific extension of the CALp file format.  If blank,
           wont be written.
    @returns if CALp file was successfully written.
-   
+
    Note, if the energy calibration is Full Range Fraction, then it will be converted to polynomial, and those coefficients written out, but also
    the original FRF coefficients will be written out after the other content - this is a InterSpec/SpecUtils specific extension of CALp file
    format.
    */
   bool write_CALp_file( std::ostream &output,
                         const std::shared_ptr<const EnergyCalibration> &cal,
                         const std::string &detector_name );
+
+
+  /** Fits polynomial energy calibration coefficients from channel-energy pairs using unweighted least squares.
+
+   Uses simple/unstable solution methods (Gaussian elimination with partial pivoting, not SVD-style fitting).
+   All data points are treated with equal weight (unweighted fit).
+
+   This function solves the normal equations for polynomial fitting:
+   Energy = c0 + c1*channel + c2*channel^2 + ... + c(n-1)*channel^(n-1)
+
+   where n is the number of coefficients (max_orders).
+
+   @param channel_energy_pairs Vector of (channel, energy) pairs to fit.  The channel values may be fractional.
+   @param max_orders Maximum number of polynomial coefficients to fit (e.g., 2 for linear with offset, 3 for quadratic).
+          Must be >= 1 and <= number of data points.
+
+   @returns Vector of polynomial coefficients [c0, c1, c2, ...] ordered from constant term to highest order term.
+           For max_orders=1, returns just the gain coefficient (offset assumed to be 0).
+           For max_orders>=2, returns offset, gain, and higher order terms.
+
+   @throws std::runtime_error if:
+           - channel_energy_pairs is empty
+           - max_orders is 0 or greater than the number of data points
+           - The system of equations is singular or nearly singular (determinant < 1e-10)
+           - Any resulting coefficient is NaN or Inf
+
+   Example usage:
+   \code
+   std::vector<std::pair<float,float>> pairs = {{0.0f, 0.0f}, {100.0f, 300.0f}, {200.0f, 600.0f}};
+   std::vector<float> coeffs = fit_poly_energy_cal_from_points( pairs, 2 ); // Linear fit
+   // coeffs[0] = offset, coeffs[1] = gain
+   \endcode
+
+   Note: For best numerical stability, consider normalizing channel numbers before fitting if they span a very large range.
+   */
+  std::vector<float> fit_poly_energy_cal_from_points( const std::vector<std::pair<float,float>> &channel_energy_pairs,
+                                                       const size_t max_orders );
 }//namespace SpecUtils
 
 #endif
diff --git a/src/EnergyCalibration.cpp b/src/EnergyCalibration.cpp
@@ -2166,7 +2166,165 @@ bool write_CALp_file( std::ostream &output, const shared_ptr<const EnergyCalibra
   }//if( cal->type() == FullRangeFraction )
   
   output << "#END" << eol_char << eol_char;
-  
+
   return output.good();
 }//void write_CALp_file(...)
+
+
+std::vector<float> fit_poly_energy_cal_from_points( const std::vector<std::pair<float,float>> &channel_energy_pairs,
+                                                     const size_t max_orders )
+{
+  using namespace std;
+
+  // Validate input
+  if( channel_energy_pairs.empty() )
+    throw runtime_error( "fit_poly_energy_cal_from_points: No data points provided" );
+
+  if( max_orders == 0 )
+    throw runtime_error( "fit_poly_energy_cal_from_points: max_orders must be at least 1" );
+
+  if( max_orders > channel_energy_pairs.size() )
+    throw runtime_error( "fit_poly_energy_cal_from_points: max_orders cannot exceed number of data points" );
+
+  // Validate that all input channel and energy values are finite
+  for( size_t i = 0; i < channel_energy_pairs.size(); ++i )
+  {
+    const float ch = channel_energy_pairs[i].first;
+    const float en = channel_energy_pairs[i].second;
+
+    if( isnan( ch ) || isinf( ch ) )
+      throw runtime_error( "fit_poly_energy_cal_from_points: Channel value is NaN or Inf at index " + std::to_string(i) );
+
+    if( isnan( en ) || isinf( en ) )
+      throw runtime_error( "fit_poly_energy_cal_from_points: Energy value is NaN or Inf at index " + std::to_string(i) );
+  }
+
+  const size_t n = channel_energy_pairs.size();
+  const int num_coefficients = static_cast<int>( max_orders );
+
+  vector<float> coeffs;
+
+  if( num_coefficients == 1 )
+  {
+    // Single coefficient: fit only gain (no offset)
+    // E = 0 + gain * channel
+    // Solve: gain = sum(channel_i * energy_i) / sum(channel_i^2)
+
+    double sum_ch_e = 0.0;
+    double sum_ch2 = 0.0;
+
+    for( const auto &pair : channel_energy_pairs )
+    {
+      const double ch = pair.first;
+      const double en = pair.second;
+      sum_ch_e += ch * en;
+      sum_ch2 += ch * ch;
+    }
+
+    if( sum_ch2 < 1e-10 )
+      throw runtime_error( "fit_poly_energy_cal_from_points: All channel values are zero or near-zero" );
+
+    const float gain = static_cast<float>( sum_ch_e / sum_ch2 );
+
+    if( isnan( gain ) || isinf( gain ) )
+      throw runtime_error( "fit_poly_energy_cal_from_points: Computed gain is NaN or Inf" );
+
+    // Return [offset=0, gain] to match polynomial format
+    coeffs.push_back( 0.0f );  // offset
+    coeffs.push_back( gain );  // gain
+  }
+  else
+  {
+    // Multiple coefficients: use least squares fitting
+    // Solve normal equations for polynomial: E = c0 + c1*ch + c2*ch^2 + ...
+
+    // Build normal equations matrix: (X^T * X) * coefs = X^T * Y
+    // For unweighted least squares
+    vector<vector<double>> matrix( num_coefficients, vector<double>( num_coefficients, 0.0 ) );
+    vector<double> rhs( num_coefficients, 0.0 );
+
+    for( size_t i = 0; i < n; ++i )
+    {
+      const double ch = channel_energy_pairs[i].first;
+      const double en = channel_energy_pairs[i].second;
+
+      // Build X^T * X and X^T * Y
+      for( int row = 0; row < num_coefficients; ++row )
+      {
+        const double ch_pow_row = pow( ch, row );
+
+        // Right hand side: X^T * Y
+        rhs[row] += ch_pow_row * en;
+
+        // Matrix: X^T * X
+        for( int col = 0; col < num_coefficients; ++col )
+        {
+          const double ch_pow_col = pow( ch, col );
+          matrix[row][col] += ch_pow_row * ch_pow_col;
+        }
+      }
+    }
+
+    // Solve using Gaussian elimination with partial pivoting
+    for( int k = 0; k < num_coefficients; ++k )
+    {
+      // Find pivot
+      int pivot_row = k;
+      double max_val = fabs( matrix[k][k] );
+      for( int i = k + 1; i < num_coefficients; ++i )
+      {
+        if( fabs( matrix[i][k] ) > max_val )
+        {
+          max_val = fabs( matrix[i][k] );
+          pivot_row = i;
+        }
+      }
+
+      if( max_val < 1e-10 )
+        throw runtime_error( "fit_poly_energy_cal_from_points: Singular matrix (determinant near zero)" );
+
+      // Swap rows if needed
+      if( pivot_row != k )
+      {
+        swap( matrix[k], matrix[pivot_row] );
+        swap( rhs[k], rhs[pivot_row] );
+      }
+
+      // Eliminate column
+      for( int i = k + 1; i < num_coefficients; ++i )
+      {
+        const double factor = matrix[i][k] / matrix[k][k];
+        for( int j = k; j < num_coefficients; ++j )
+          matrix[i][j] -= factor * matrix[k][j];
+        rhs[i] -= factor * rhs[k];
+      }
+    }
+
+    // Back substitution
+    coeffs.resize( num_coefficients );
+    for( int i = num_coefficients - 1; i >= 0; --i )
+    {
+      double sum = rhs[i];
+      for( int j = i + 1; j < num_coefficients; ++j )
+        sum -= matrix[i][j] * coeffs[j];
+      coeffs[i] = static_cast<float>( sum / matrix[i][i] );
+    }
+
+    // Validate coefficients
+    for( size_t i = 0; i < coeffs.size(); ++i )
+    {
+      if( isnan( coeffs[i] ) || isinf( coeffs[i] ) )
+        throw runtime_error( "fit_poly_energy_cal_from_points: Computed coefficient is NaN or Inf at index " + std::to_string(i) );
+    }
+  }
+
+  // Final validation of all coefficients before returning
+  for( size_t i = 0; i < coeffs.size(); ++i )
+  {
+    if( isnan( coeffs[i] ) || isinf( coeffs[i] ) )
+      throw runtime_error( "fit_poly_energy_cal_from_points: Final coefficient is NaN or Inf at index " + std::to_string(i) );
+  }
+
+  return coeffs;
+}//fit_poly_energy_cal_from_points(...)
 }//namespace SpecUtils
diff --git a/src/SpecFile_phd.cpp b/src/SpecFile_phd.cpp
@@ -240,6 +240,108 @@ bool SpecFile::load_from_phd( std::istream &input )
         //165.860          491.7100         0.02968
         //...
         //1836.060         5448.4400        0.02968
+
+        try
+        {
+          vector<pair<float,float>> channel_energy_pairs;
+
+          // Read energy-channel data lines until next section or STOP
+          while( SpecUtils::safe_get_line( input, line ) )
+          {
+            trim( line );
+            if( line.empty() )
+              continue;
+
+            // Stop at next section marker or STOP keyword
+            if( istarts_with( line, "#" ) || iequals_ascii( line, "STOP" ) )
+              break;
+
+            // Parse the three columns: energy, channel, uncertainty
+            vector<float> fields;
+            const bool split_success = SpecUtils::split_to_floats( line.c_str(), line.size(), fields );
+
+            if( !split_success || fields.size() < 2 )
+              continue;  // Skip malformed lines
+
+            const float energy = fields[0];
+            const float channel = fields[1];
+            // fields[2] is uncertainty - ignoring for now
+
+            // Basic sanity check
+            if( energy > 0.0f && channel >= 0.0f )
+              channel_energy_pairs.push_back( make_pair( channel, energy ) );
+          }//while( reading energy-channel pairs )
+
+          // Check if we got usable data
+          if( channel_energy_pairs.empty() )
+            throw runtime_error( "No valid energy-channel pairs found" );
+
+          // Determine polynomial order based on number of points
+          size_t num_coefficients = 2;  // Default: linear with offset
+
+          if( channel_energy_pairs.size() == 1 )
+          {
+            // Single point: linear through origin if energy > 100 keV
+            if( channel_energy_pairs[0].second <= 100.0f )
+              throw runtime_error( "Single calibration point with energy <= 100 keV" );
+            num_coefficients = 1;  // Just gain, no offset
+          }else if( channel_energy_pairs.size() >= 5 )
+          {
+            num_coefficients = 3;  // Quadratic
+          }
+          // else 2-4 points: use 2 coefficients (linear with offset)
+
+          // Fit polynomial coefficients using the utility function
+          vector<float> coeffs = SpecUtils::fit_poly_energy_cal_from_points( channel_energy_pairs, num_coefficients );
+
+          // Validate coefficients are reasonable
+          if( coeffs.size() < 2 )
+            throw runtime_error( "Failed to compute calibration coefficients" );
+
+          // Check gain is positive
+          if( coeffs.size() >= 2 && coeffs[1] <= 0.0f )
+            throw runtime_error( "Negative or zero gain in energy calibration" );
+
+          // Create energy calibration and validate energy range
+          if( !meas->gamma_counts_ || meas->gamma_counts_->empty() )
+            throw runtime_error( "No gamma spectrum data available for energy calibration" );
+
+          const size_t num_channels = meas->gamma_counts_->size();
+          auto newcal = make_shared<EnergyCalibration>();
+          newcal->set_polynomial( num_channels, coeffs, {} );
+
+          const float lower_energy = newcal->lower_energy();
+          const float upper_energy_cal = newcal->upper_energy();
+
+          // Reject if energy range is unreasonable
+          if( upper_energy_cal < 300.0f )
+            throw runtime_error( "Upper energy < 300 keV" );
+
+          if( upper_energy_cal > 15000.0f )
+            throw runtime_error( "Upper energy > 15 MeV" );
+
+          // If we already have a calibration from the simpler method (lines 213-226),
+          // check that this one is roughly compatible before replacing it
+          if( meas->energy_calibration_ && meas->energy_calibration_->valid() )
+          {
+            const float existing_upper = meas->energy_calibration_->upper_energy();
+            const float diff_percent = fabs( upper_energy_cal - existing_upper ) / existing_upper * 100.0f;
+
+            // Allow up to 10% difference
+            if( diff_percent > 10.0f )
+            {
+              meas->parse_warnings_.push_back( "Energy calibration from #g_Energy section differs significantly from upper energy value" );
+            }
+          }
+
+          // Set the new calibration
+          meas->energy_calibration_ = newcal;
+
+        }catch( std::exception &e )
+        {
+          // Failed to parse or create energy calibration - just continue without it
+          meas->parse_warnings_.push_back( "Failed to parse #g_Energy section: " + string(e.what()) );
+        }
       }//if( "#g_Energy" )
       
       if( SpecUtils::istarts_with( line, "#g_Resolution") )
diff --git a/src/SpecFile_spc.cpp b/src/SpecFile_spc.cpp
@@ -784,11 +784,14 @@ bool SpecFile::load_from_iaea_spc( std::istream &input )
         if( SpecUtils::istarts_with(line, "TSA,") )
           throw runtime_error( "This is probably a TSA file, not a Ascii Spc" );
         
+        if( SpecUtils::istarts_with(line, "RADDATA://G0/") )
+          throw runtime_error( "This is probably a URI file, not a Ascii Spc" );
+        
         ++nnotrecognized;
         if( nnotrecognized > 15 && nnotrecognized >= linenum )
           throw runtime_error( "To many unregognized begining lines" );
         
-#if(PERFORM_DEVELOPER_CHECKS && !SpecUtils_BUILD_FUZZING_TESTS)
+#if(PERFORM_DEVELOPER_CHECKS && !SpecUtils_BUILD_FUZZING_TESTS && !SpecUtils_BUILD_UNIT_TESTS )
         cerr << "Warning: SpecFile::load_from_iaea_spc(...):  I didnt recognize line: '"
         << line << "'" << endl;
 #endif
diff --git a/unit_tests/test_energy_calibration.cpp b/unit_tests/test_energy_calibration.cpp