From c47a8b6f2e52cf4c441ec055dffb18c853656cd3 Mon Sep 17 00:00:00 2001 From: GeEom Date: Mon, 16 Mar 2026 19:14:31 +0000 Subject: [PATCH 1/2] Replace CORDIC rotation with polynomial evaluation Breaking: removes rotation kernels and from_i2f62; bump to 2.0.0. --- Cargo.toml | 2 +- README.md | 32 +-- src/kernel/cordic.rs | 161 +------------ src/kernel/mod.rs | 16 +- src/ops/circular.rs | 57 ++++- src/ops/exponential.rs | 95 ++++---- src/ops/hyperbolic.rs | 127 +++++------ src/tables/chebyshev.rs | 87 +++++++ src/tables/circular.rs | 3 - src/tables/hyperbolic.rs | 6 - src/tables/mod.rs | 14 +- src/traits.rs | 34 --- tests/unit/kernel/cordic.rs | 40 +--- tests/unit/ops/circular.rs | 6 +- tests/unit/ops/exponential.rs | 64 +++--- tests/unit/tables/chebyshev.rs | 78 +++++++ tests/unit/tables/circular.rs | 9 +- tests/unit/tables/hyperbolic.rs | 18 +- tests/unit/tables/mod.rs | 1 + tests/unit/traits.rs | 23 -- tools/accuracy-bench/baseline.json | 350 ++++++++++++++--------------- tools/accuracy-bench/src/readme.rs | 4 +- 22 files changed, 578 insertions(+), 649 deletions(-) create mode 100644 src/tables/chebyshev.rs create mode 100644 tests/unit/tables/chebyshev.rs diff --git a/Cargo.toml b/Cargo.toml index 48f38f1..9ec4d9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fixed_analytics" -version = "1.0.0" +version = "2.0.0" edition = "2024" rust-version = "1.88" authors = ["David Gathercole"] diff --git a/README.md b/README.md index 2586f9f..c649a45 100644 --- a/README.md +++ b/README.md @@ -30,14 +30,14 @@ Requires Rust 1.88 or later. ```toml [dependencies] -fixed_analytics = "1.0.0" +fixed_analytics = "2.0.0" ``` For `no_std` environments: ```toml [dependencies] -fixed_analytics = { version = "1.0.0", default-features = false } +fixed_analytics = { version = "2.0.0", default-features = false } ``` ## Available Functions @@ -54,7 +54,7 @@ fixed_analytics = { version = "1.0.0", default-features = false } | Exponential | `exp`, `pow2` | `ln`, `log2`, `log10` | | Algebraic | — | `sqrt` | -Functions are calculated via CORDIC, Newton-Raphson, and Taylor series techniques. Complete absence of panic is verified at the linker level via the [`no-panic`](https://github.com/dtolnay/no-panic) crate. +Functions are calculated via polynomial evaluation, CORDIC, and Newton-Raphson techniques. Complete absence of panic is verified at the linker level via the [`no-panic`](https://github.com/dtolnay/no-panic) crate. ### Saturation Behavior @@ -62,13 +62,13 @@ The following total functions saturate, clamping to the representable range near | Function | I16F16 Threshold | I32F32 Threshold | Result | |----------|------------------|------------------|--------| -| `exp` | x ≥ 22.2 | x ≥ 44.4 | `T::MAX` | -| `exp` | x ≤ -9.2 | x ≤ -16.2 | Zero | +| `exp` | x ≥ 10.4 | x ≥ 21.5 | `T::MAX` | +| `exp` | x ≤ -11.1 | x ≤ -22.2 | Zero | | `pow2` | x ≥ 15.0 | x ≥ 31.0 | `T::MAX` | -| `pow2` | x ≤ -13.2 | x ≤ -23.3 | Zero | +| `pow2` | x ≤ -16.1 | x ≤ -32.1 | Zero | | `sinh` | \|x\| ≥ 11.1 | \|x\| ≥ 22.2 | `T::MAX` or `T::MIN` | | `cosh` | \|x\| ≥ 11.1 | \|x\| ≥ 22.2 | `T::MAX` | -| `tan` | \|x - pole\| < 8e-5 | \|x - pole\| < 5e-10 | `T::MAX` or `T::MIN` | +| `tan` | \|x - pole\| < 4e-5 | \|x - pole\| < 5e-10 | `T::MAX` or `T::MIN` | Where for `tan`, "pole" refers to ±π/2, ±3π/2, ±5π/2, ... @@ -79,24 +79,24 @@ Relative error statistics measured against MPFR reference implementations. Accur | Function | I16F16 Mean | I16F16 Median | I16F16 P95 | I32F32 Mean | I32F32 Median | I32F32 P95 | |----------|-------------|---------------|------------|-------------|---------------|------------| -| sin | 6.19e-4 | 9.34e-5 | 1.30e-3 | 1.22e-8 | 1.79e-9 | 2.52e-8 | -| cos | 6.82e-4 | 1.02e-4 | 1.46e-3 | 1.30e-8 | 1.91e-9 | 2.83e-8 | -| tan | 2.47e-4 | 9.84e-5 | 6.70e-4 | 6.00e-9 | 1.89e-9 | 1.40e-8 | +| sin | 6.06e-4 | 8.78e-5 | 1.28e-3 | 1.16e-8 | 1.68e-9 | 2.43e-8 | +| cos | 6.45e-4 | 9.03e-5 | 1.38e-3 | 1.22e-8 | 1.72e-9 | 2.64e-8 | +| tan | 7.20e-5 | 3.57e-5 | 2.20e-4 | 1.28e-9 | 3.98e-10 | 3.03e-9 | | asin | 2.87e-4 | 5.93e-5 | 6.46e-4 | 5.34e-9 | 8.82e-10 | 1.03e-8 | | acos | 3.61e-5 | 2.18e-5 | 1.14e-4 | 5.37e-10 | 3.19e-10 | 1.71e-9 | | atan | 2.71e-5 | 2.21e-5 | 6.29e-5 | 3.69e-10 | 2.92e-10 | 8.74e-10 | -| sinh | 1.82e-4 | 1.34e-4 | 5.03e-4 | 1.05e-8 | 2.35e-9 | 9.30e-9 | -| cosh | 1.73e-4 | 1.23e-4 | 5.00e-4 | 1.02e-8 | 2.07e-9 | 9.16e-9 | -| tanh | 2.08e-5 | 1.38e-5 | 5.89e-5 | 1.64e-9 | 1.31e-10 | 1.23e-9 | -| coth | 1.20e-5 | 4.83e-6 | 5.57e-5 | 4.00e-10 | 1.39e-10 | 1.30e-9 | +| sinh | 9.80e-5 | 6.23e-5 | 2.79e-4 | 1.52e-9 | 9.64e-10 | 4.29e-9 | +| cosh | 9.40e-5 | 5.75e-5 | 2.77e-4 | 1.44e-9 | 8.90e-10 | 4.25e-9 | +| tanh | 1.60e-5 | 1.32e-5 | 2.56e-5 | 2.25e-10 | 1.22e-10 | 3.90e-10 | +| coth | 6.68e-6 | 3.54e-6 | 1.80e-5 | 1.41e-10 | 1.16e-10 | 2.74e-10 | | asinh | 6.44e-4 | 4.83e-4 | 1.75e-3 | 1.03e-8 | 7.59e-9 | 2.85e-8 | | acosh | 6.74e-4 | 5.21e-4 | 1.80e-3 | 1.05e-8 | 7.96e-9 | 2.88e-8 | | atanh | 3.01e-4 | 5.90e-5 | 6.25e-4 | 6.68e-9 | 1.32e-9 | 1.44e-8 | | acoth | 2.10e-3 | 1.33e-3 | 6.67e-3 | 4.26e-8 | 2.62e-8 | 1.39e-7 | -| exp | 1.14e-2 | 6.67e-5 | 7.87e-2 | 1.91e-7 | 2.24e-9 | 1.30e-6 | +| exp | 1.14e-2 | 2.32e-5 | 7.88e-2 | 1.91e-7 | 1.73e-9 | 1.30e-6 | | ln | 1.35e-5 | 8.76e-6 | 2.97e-5 | 4.50e-10 | 3.48e-10 | 9.17e-10 | | log2 | 1.33e-5 | 8.48e-6 | 2.92e-5 | 3.46e-10 | 2.24e-10 | 7.21e-10 | | log10 | 1.44e-5 | 9.28e-6 | 3.14e-5 | 4.49e-10 | 3.27e-10 | 9.07e-10 | -| pow2 | 7.28e-4 | 4.74e-5 | 4.71e-3 | 1.15e-8 | 1.06e-9 | 7.38e-8 | +| pow2 | 7.21e-4 | 2.58e-5 | 4.72e-3 | 1.13e-8 | 4.93e-10 | 7.43e-8 | | sqrt | 1.77e-7 | 1.16e-7 | 4.74e-7 | 2.70e-12 | 1.78e-12 | 7.16e-12 | \ No newline at end of file diff --git a/src/kernel/cordic.rs b/src/kernel/cordic.rs index 1a6dd43..fedfd3f 100644 --- a/src/kernel/cordic.rs +++ b/src/kernel/cordic.rs @@ -1,11 +1,11 @@ //! Core CORDIC iteration implementations. //! -//! The CORDIC algorithm operates in two modes, each with two directions: +//! CORDIC vectoring mode drives y toward zero while accumulating angles: //! -//! | Mode | Rotation (z → 0) | Vectoring (y → 0) | -//! |------|------------------|-------------------| -//! | Circular | sin, cos | atan | -//! | Hyperbolic | sinh, cosh | atanh, ln | +//! | Mode | Vectoring (y → 0) | +//! |------|-------------------| +//! | Circular | atan | +//! | Hyperbolic | atanh, ln | //! //! # Algorithm //! @@ -22,9 +22,7 @@ //! - angle[i] = atan(2^-i) for circular, atanh(2^-i) for hyperbolic use crate::tables::hyperbolic::needs_repeat; -use crate::tables::{ - ATAN_TABLE, ATANH_TABLE, CIRCULAR_GAIN_INV, HYPERBOLIC_GAIN, HYPERBOLIC_GAIN_INV, -}; +use crate::tables::{ATAN_TABLE, ATANH_TABLE}; use crate::traits::CordicNumber; /// Table lookup for CORDIC iteration. @@ -43,83 +41,6 @@ const fn table_lookup(table: &[i64; 64], index: u32) -> i64 { table[index as usize] } -/// Returns the CORDIC scale factor (1/K ≈ 0.6073). -/// -/// Pre-multiply initial vectors by this to compensate for CORDIC gain. -#[inline] -#[must_use] -pub fn cordic_scale_factor() -> T { - T::from_i1f63(CIRCULAR_GAIN_INV) -} - -/// Returns the hyperbolic gain factor (`K_h` ≈ 0.8282). -/// -/// After hyperbolic CORDIC iterations, results are scaled by `1/K_h`. -/// To compensate, divide by `K_h` (or multiply by `1/K_h`). -#[inline] -#[must_use] -pub fn hyperbolic_gain() -> T { - T::from_i1f63(HYPERBOLIC_GAIN) -} - -/// Returns the inverse hyperbolic gain factor (`1/K_h` ≈ 1.2075). -/// -/// Pre-multiply initial vectors by this to compensate for hyperbolic CORDIC gain. -/// This uses a precomputed constant, avoiding runtime division. -#[inline] -#[must_use] -pub fn hyperbolic_gain_inv() -> T { - T::from_i2f62(HYPERBOLIC_GAIN_INV) -} - -/// Performs circular CORDIC in rotation mode. -/// -/// Given an initial vector (x, y) and angle z, rotates the vector by angle z. -/// After iteration: -/// - x ≈ K * (x₀ * cos(z₀) - y₀ * sin(z₀)) -/// - y ≈ K * (y₀ * cos(z₀) + x₀ * sin(z₀)) -/// - z ≈ 0 -/// -/// Where K is the circular gain factor (~1.6468). -/// -/// # Arguments -/// -/// * `x` - Initial x coordinate -/// * `y` - Initial y coordinate -/// * `z` - Angle to rotate by (in radians) -/// -/// # Returns -/// -/// Tuple of (x, y, z) after CORDIC iterations. -/// -/// # Note -/// -/// The input angle should be in the range [-1.74, 1.74] radians for -/// convergence. Use argument reduction for larger angles. -#[must_use] -pub fn circular_rotation(mut x: T, mut y: T, mut z: T) -> (T, T, T) { - let zero = T::zero(); - let iterations = T::frac_bits().min(62); - - for i in 0..iterations { - let angle = T::from_i1f63(table_lookup(&ATAN_TABLE, i)); - - if z >= zero { - let x_new = x.saturating_sub(y >> i); - y = y.saturating_add(x >> i); - x = x_new; - z -= angle; - } else { - let x_new = x.saturating_add(y >> i); - y = y.saturating_sub(x >> i); - x = x_new; - z += angle; - } - } - - (x, y, z) -} - /// Performs circular CORDIC in vectoring mode. /// /// Given an initial vector (x, y), rotates it until y ≈ 0. @@ -167,76 +88,6 @@ pub fn circular_vectoring(mut x: T, mut y: T, mut z: T) -> (T, (x, y, z) } -/// Performs hyperbolic CORDIC in rotation mode. -/// -/// Given initial values (x, y, z), performs hyperbolic pseudo-rotations -/// to drive z toward zero. -/// -/// After iteration: -/// - x ≈ `K_h` * (x₀ * cosh(z₀) + y₀ * sinh(z₀)) -/// - y ≈ `K_h` * (y₀ * cosh(z₀) + x₀ * sinh(z₀)) -/// - z ≈ 0 -/// -/// Where `K_h` is the hyperbolic gain factor (~1.2075). -/// -/// # Arguments -/// -/// * `x` - Initial x value -/// * `y` - Initial y value -/// * `z` - Hyperbolic angle to "rotate" by -/// -/// # Returns -/// -/// Tuple of (x, y, z) after CORDIC iterations. -/// -/// # Note -/// -/// - Hyperbolic CORDIC starts at i=1 (not i=0) -/// - Certain iterations must be repeated for convergence -/// - Input z should be in range [-1.12, 1.12] for convergence -#[must_use] -pub fn hyperbolic_rotation(mut x: T, mut y: T, mut z: T) -> (T, T, T) { - let zero = T::zero(); - // Use frac_bits iterations, capped at 54 for table bounds. - let max_iterations = T::frac_bits().min(54); - - let mut i: u32 = 1; // Hyperbolic starts at i=1 - let mut iteration_count: u32 = 0; - let mut repeated = false; - - while iteration_count < max_iterations && i < 64 { - let table_index = i.saturating_sub(1); - let angle = T::from_i1f63(table_lookup(&ATANH_TABLE, table_index)); - - if z >= zero { - // "Rotate" in positive direction - let x_new = x.saturating_add(y >> i); - y = y.saturating_add(x >> i); - x = x_new; - z -= angle; - } else { - // "Rotate" in negative direction - let x_new = x.saturating_sub(y >> i); - y = y.saturating_sub(x >> i); - x = x_new; - z += angle; - } - - iteration_count += 1; - - // Handle repetition for convergence - if needs_repeat(i) && !repeated { - repeated = true; - // Don't increment i, repeat this iteration - } else { - repeated = false; - i += 1; - } - } - - (x, y, z) -} - /// Performs hyperbolic CORDIC in vectoring mode. /// /// Drives y toward zero while accumulating the hyperbolic angle. diff --git a/src/kernel/mod.rs b/src/kernel/mod.rs index 79bd4d4..9fc237c 100644 --- a/src/kernel/mod.rs +++ b/src/kernel/mod.rs @@ -10,22 +10,18 @@ //! z' = z - σ·atan(2^-i) //! ``` //! -//! **Rotation mode** (z→0): computes sin/cos from angle. -//! **Vectoring mode** (y→0): computes atan from coordinates. +//! **Vectoring mode** (y→0): computes atan/atanh from coordinates. //! //! Hyperbolic mode uses `atanh(2^-i)` tables and requires iteration repeats //! at indices 4, 13, 40, ... for convergence. //! -//! | Mode | Rotation (z → 0) | Vectoring (y → 0) | -//! |------|------------------|-------------------| -//! | Circular | sin, cos | atan | -//! | Hyperbolic | sinh, cosh | atanh, ln | +//! | Mode | Vectoring (y → 0) | +//! |------|-------------------| +//! | Circular | atan | +//! | Hyperbolic | atanh, ln | //! //! Users should call functions in [`crate::ops`] rather than kernels directly. mod cordic; -pub use crate::kernel::cordic::{circular_rotation, circular_vectoring, cordic_scale_factor}; -pub use crate::kernel::cordic::{ - hyperbolic_gain, hyperbolic_gain_inv, hyperbolic_rotation, hyperbolic_vectoring, -}; +pub use crate::kernel::cordic::{circular_vectoring, hyperbolic_vectoring}; diff --git a/src/ops/circular.rs b/src/ops/circular.rs index a8c5b76..1e99232 100644 --- a/src/ops/circular.rs +++ b/src/ops/circular.rs @@ -2,8 +2,9 @@ use crate::bounded::{NonNegative, UnitInterval}; use crate::error::{Error, Result}; -use crate::kernel::{circular_rotation, circular_vectoring, cordic_scale_factor}; +use crate::kernel::circular_vectoring; use crate::ops::algebraic::sqrt_nonneg; +use crate::tables::chebyshev::{horner, COS_Q_HI, COS_Q_LO, SIN_P_HI, SIN_P_LO}; use crate::traits::CordicNumber; /// Sine and cosine. More efficient than separate calls. Accepts any angle. @@ -12,7 +13,6 @@ use crate::traits::CordicNumber; pub fn sin_cos(angle: T) -> (T, T) { let pi = T::pi(); let frac_pi_2 = T::frac_pi_2(); - let zero = T::zero(); let two_pi = pi + pi; // Reduce angle to [-π, π] using direct quotient computation. @@ -45,9 +45,56 @@ pub fn sin_cos(angle: T) -> (T, T) { (reduced, false) }; - // Run CORDIC with unit vector scaled by inverse gain - let inv_gain = cordic_scale_factor(); - let (cos_val, sin_val, _) = circular_rotation(inv_gain, zero, reduced); + // Polynomial evaluation via factored Horner form. + // To avoid catastrophic cancellation near π/2, reduce to [0, π/4]: + // For |x| ∈ [0, π/4]: sin(x) = sin_poly(x), cos(x) = cos_poly(x) + // For |x| ∈ (π/4, π/2]: sin(x) = cos_poly(π/2-|x|), cos(x) = sin_poly(π/2-|x|) + let one = T::one(); + let frac_pi_4 = T::frac_pi_4(); + let abs_reduced = reduced.abs(); + let (poly_arg, swapped) = if abs_reduced >= frac_pi_4 { + (frac_pi_2.saturating_sub(abs_reduced), true) + } else { + (abs_reduced, false) + }; + let u = poly_arg.saturating_mul(poly_arg); + + // Evaluate sin and cos polynomials over [0, π/4] using minimax + // (Chebyshev) coefficients. Uses multiply-by-constant instead of + // division, avoiding cumulative rounding error from per-step divides. + // + // sin(x) = x + x³·P(x²) where P = minimax poly of (sin(x)-x)/x³ + // cos(x) = 1 + x²·Q(x²) where Q = minimax poly of (cos(x)-1)/x² + let (sp_val, cp_val) = if T::frac_bits() >= 24 { + // High precision: degree 15 sin, degree 14 cos + let sp = horner(&SIN_P_HI, u); + let sin_approx = poly_arg.saturating_add( + poly_arg.saturating_mul(u).saturating_mul(sp), + ); + let cp = horner(&COS_Q_HI, u); + (sin_approx, one.saturating_add(u.saturating_mul(cp))) + } else { + // Low precision: degree 9 sin, degree 8 cos + let sp = horner(&SIN_P_LO, u); + let sin_approx = poly_arg.saturating_add( + poly_arg.saturating_mul(u).saturating_mul(sp), + ); + let cp = horner(&COS_Q_LO, u); + (sin_approx, one.saturating_add(u.saturating_mul(cp))) + }; + + // Map back: if we swapped, sin(x) = cos_poly, cos(x) = sin_poly + // Also restore sign of sin for negative angles. + let (sin_unsigned, cos_val) = if swapped { + (cp_val, sp_val) + } else { + (sp_val, cp_val) + }; + let sin_val = if reduced < T::zero() { + -sin_unsigned + } else { + sin_unsigned + }; if negate { (-sin_val, -cos_val) diff --git a/src/ops/exponential.rs b/src/ops/exponential.rs index 47655fe..904a211 100644 --- a/src/ops/exponential.rs +++ b/src/ops/exponential.rs @@ -2,7 +2,7 @@ use crate::bounded::{NormalizedLnArg, OpenUnitInterval}; use crate::error::{Error, Result}; -use crate::ops::hyperbolic::{atanh_open, sinh_cosh}; +use crate::ops::hyperbolic::atanh_open; use crate::traits::CordicNumber; /// Exponential function (e^x). @@ -14,11 +14,11 @@ use crate::traits::CordicNumber; /// | Condition | Result | Example (I16F16) | /// |-----------|--------|------------------| /// | x > `ln(T::MAX)` | `T::MAX` | x > ~10.4 → 32767.99 | -/// | x < `ln(T::MIN_POSITIVE)` | `T::ZERO` | x < ~-10.4 → 0 | +/// | x < `ln(T::MIN_POSITIVE)` | `T::ZERO` | x < ~-11.1 → 0 | /// /// The exact thresholds depend on the type's range: -/// - **I16F16:** Saturates for x > ~10.4 or x < ~-20 -/// - **I32F32:** Saturates for x > ~21.5 or x < ~-45 +/// - **I16F16:** Saturates to MAX for x > ~10.4, to zero for x < ~-11.1 +/// - **I32F32:** Saturates to MAX for x > ~21.5, to zero for x < ~-22.2 /// /// Saturation is silent and deterministic. If you need to detect overflow, /// check the input range before calling: @@ -48,51 +48,62 @@ pub fn exp(x: T) -> T { return one; } - // For large |x|, use argument reduction: exp(x) = 2^k * exp(r) - // where r is reduced to (-ln2, ln2) range - let mut reduced = x; - let mut scale: i32 = 0; + // Argument reduction: exp(x) = 2^k * exp(r), where r ∈ (-ln2, ln2). + // Compute k = trunc(x / ln2) in one step, then r = x - k*ln2 in one + // subtraction. Truncation toward zero matches the old iterative + // subtraction behavior while avoiding error accumulation. + #[allow(clippy::cast_possible_wrap, reason = "total_bits bounded by type size")] + let max_shift = (T::total_bits() - 1) as i32; + let scale = x.div(ln2).to_i32(); - // Reduce positive values - let mut i = 0; - while reduced > ln2 && i < 64 { - reduced = reduced.saturating_sub(ln2); - scale += 1; - i += 1; + // Early exit for values that will saturate after scaling + if scale > max_shift { + return T::max_value(); } - - // Reduce negative values - i = 0; - while reduced < -ln2 && i < 64 { - reduced = reduced.saturating_add(ln2); - scale -= 1; - i += 1; + if scale < -max_shift { + return zero; } - // Compute exp(reduced) = cosh(reduced) + sinh(reduced) - let (sinh_r, cosh_r) = sinh_cosh(reduced); - let exp_r = cosh_r.saturating_add(sinh_r); - - // Scale by 2^scale using bit shifts - #[allow(clippy::cast_possible_wrap, reason = "total_bits bounded by type size")] - let max_shift = (T::total_bits() - 1) as i32; - + let r = x.saturating_sub(T::from_num(scale).saturating_mul(ln2)); + + // Factored Taylor: exp(r) = 1 + r*(1 + r/2*(1 + r/3*(1 + ... r/n))) + let mut p = one; + if T::frac_bits() >= 24 { + // High precision: degree 12 Taylor + // Truncation error: |r^13/13!| ≤ (ln2)^13/13! ≈ 3.4e-15 + p = one.saturating_add(r.div(T::from_num(12)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(11)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(10)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(9)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(8)).saturating_mul(p)); + } + // Common terms (degree 7 base) + // Low-precision truncation error: |r^8/8!| ≤ (ln2)^8/8! ≈ 8.9e-7 + p = one.saturating_add(r.div(T::from_num(7)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(6)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(5)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(4)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(3)).saturating_mul(p)); + p = one.saturating_add(r.div(T::from_num(2)).saturating_mul(p)); + let exp_r = one.saturating_add(r.saturating_mul(p)); + + // Scale by 2^scale using bit shifts. + // scale is already bounded to [-max_shift, max_shift] by the early exits above. #[allow(clippy::cast_sign_loss, reason = "scale >= 0 checked before cast")] - if scale >= 0 { - if scale > max_shift { - T::max_value() - } else { + match scale.cmp(&0) { + core::cmp::Ordering::Greater => { let shift = scale as u32; - exp_r << shift - } - } else { - let neg_scale = -scale; - if neg_scale > max_shift { - zero - } else { - let shift = neg_scale as u32; - exp_r >> shift + // Detect overflow before shifting: if exp_r > MAX >> shift, + // the left shift would wrap, so saturate to MAX instead. + let headroom = T::max_value() >> shift; + if exp_r > headroom { + T::max_value() + } else { + exp_r << shift + } } + core::cmp::Ordering::Less => exp_r >> ((-scale) as u32), + core::cmp::Ordering::Equal => exp_r, } } diff --git a/src/ops/hyperbolic.rs b/src/ops/hyperbolic.rs index 788e47e..fe175e1 100644 --- a/src/ops/hyperbolic.rs +++ b/src/ops/hyperbolic.rs @@ -2,7 +2,7 @@ use crate::bounded::{AtLeastOne, NonNegative, OpenUnitInterval}; use crate::error::{Error, Result}; -use crate::kernel::{hyperbolic_gain_inv, hyperbolic_rotation, hyperbolic_vectoring}; +use crate::kernel::hyperbolic_vectoring; use crate::ops::algebraic::sqrt_nonneg; use crate::traits::CordicNumber; @@ -11,28 +11,6 @@ use crate::traits::CordicNumber; /// Full limit = 1 + this value. const HYPERBOLIC_CONVERGENCE_LIMIT_FRAC_I1F63: i64 = 0x0F22_3D70_A3D7_0A3D; -/// Taylor series threshold for sinh/cosh with high-precision types (≥24 frac bits). -/// -/// Below this threshold, 6th-order Taylor series is used instead of CORDIC: -/// ```text -/// sinh(x) ≈ x + x³/6 + x⁵/120 -/// cosh(x) ≈ 1 + x²/2 + x⁴/24 + x⁶/720 -/// ``` -/// -/// Optimal value: 0.1366 (tuned via golden section search). -const TAYLOR_THRESHOLD_HIGH_PREC_I1F63: i64 = 0x117B_9236_B6A0_8400; - -/// Taylor series threshold for sinh/cosh with low-precision types (<24 frac bits). -/// -/// Below this threshold, 4th-order Taylor series is used instead of CORDIC: -/// ```text -/// sinh(x) ≈ x + x³/6 -/// cosh(x) ≈ 1 + x²/2 + x⁴/24 -/// ``` -/// -/// Optimal value: 0.2776 (tuned via golden section search). -const TAYLOR_THRESHOLD_LOW_PREC_I1F63: i64 = 0x2389_AF6C_4171_EC00; - /// Argument reduction threshold for atanh. /// /// For |x| > this threshold, atanh uses the identity: @@ -60,7 +38,6 @@ const ATANH_REDUCTION_THRESHOLD_I1F63: i64 = 0x6000_0000_0000_0000; #[must_use] #[cfg_attr(feature = "verify-no-panic", no_panic::no_panic)] pub fn sinh_cosh(x: T) -> (T, T) { - let zero = T::zero(); let one = T::one(); // Compute limit as 1 + fractional_part (~1.1182) let limit = one.saturating_add(T::from_i1f63(HYPERBOLIC_CONVERGENCE_LIMIT_FRAC_I1F63)); @@ -76,44 +53,54 @@ pub fn sinh_cosh(x: T) -> (T, T) { depth += 1; } - // For very small reduced, use Taylor series approximation to avoid CORDIC - // overshoot on the first iteration (where atanh(0.5) ≈ 0.549 is larger than x). - // Use precision-dependent threshold and order. - let threshold_bits = if T::frac_bits() >= 24 { - TAYLOR_THRESHOLD_HIGH_PREC_I1F63 - } else { - TAYLOR_THRESHOLD_LOW_PREC_I1F63 - }; - let small_threshold = T::from_i1f63(threshold_bits); - let (mut sh, mut ch) = if reduced.abs() < small_threshold { - let x_sq = reduced.saturating_mul(reduced); - let x_cu = x_sq.saturating_mul(reduced); - let x_qu = x_sq.saturating_mul(x_sq); - - // Higher precision benefits from higher-order Taylor terms - if T::frac_bits() >= 24 { - // sinh(x) ≈ x + x³/6 + x⁵/120, cosh(x) ≈ 1 + x²/2 + x⁴/24 + x⁶/720 - let x_5 = x_qu.saturating_mul(reduced); - let x_6 = x_qu.saturating_mul(x_sq); - let cosh_base = one - .saturating_add(x_sq >> 1) - .saturating_add(x_qu.div(T::from_num(24))); - let cosh_approx = cosh_base.saturating_add(x_6.div(T::from_num(720))); - let sinh_base = reduced.saturating_add(x_cu.div(T::from_num(6))); - let sinh_approx = sinh_base.saturating_add(x_5.div(T::from_num(120))); - (sinh_approx, cosh_approx) - } else { - // sinh(x) ≈ x + x³/6, cosh(x) ≈ 1 + x²/2 + x⁴/24 - let c = one.saturating_add(x_sq >> 1); - let cosh_approx = c.saturating_add(x_qu.div(T::from_num(24))); - let sinh_approx = reduced.saturating_add(x_cu.div(T::from_num(6))); - (sinh_approx, cosh_approx) - } + // Factored Taylor evaluation. After argument reduction, |reduced| ≤ 1.1182. + // + // sinh(x) = x * (1 + u/6 * (1 + u/20 * (1 + u/42 * ...))) + // cosh(x) = 1 + u/2 * (1 + u/12 * (1 + u/30 * ...)) + // where u = x². + // + // Integer division (u/K) is used because it computes the quotient in one + // step without pre-rounding a reciprocal, and the factored form keeps each + // step's multiplicand u/K below 1 for K ≥ 2. + let u = reduced.saturating_mul(reduced); + + // High-precision path requires enough integer bits for divisors up to 182. + let mut sp = one; + let (mut sh, mut ch) = if T::frac_bits() >= 24 && T::total_bits() >= T::frac_bits() + 9 { + // High precision: degree 13 sinh, degree 14 cosh + sp = one.saturating_add(u.div(T::from_num(156)).saturating_mul(sp)); + sp = one.saturating_add(u.div(T::from_num(110)).saturating_mul(sp)); + sp = one.saturating_add(u.div(T::from_num(72)).saturating_mul(sp)); + sp = one.saturating_add(u.div(T::from_num(42)).saturating_mul(sp)); + sp = one.saturating_add(u.div(T::from_num(20)).saturating_mul(sp)); + sp = one.saturating_add(u.div(T::from_num(6)).saturating_mul(sp)); + let sinh_approx = reduced.saturating_mul(sp); + + let mut cp = one; + cp = one.saturating_add(u.div(T::from_num(182)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(132)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(90)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(56)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(30)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(12)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(2)).saturating_mul(cp)); + (sinh_approx, cp) } else { - // For moderate x, use CORDIC directly. - let inv_gain = hyperbolic_gain_inv(); - let (cosh_val, sinh_val, _) = hyperbolic_rotation(inv_gain, zero, reduced); - (sinh_val, cosh_val) + // Low precision: degree 9 sinh, degree 10 cosh + sp = one; + sp = one.saturating_add(u.div(T::from_num(72)).saturating_mul(sp)); + sp = one.saturating_add(u.div(T::from_num(42)).saturating_mul(sp)); + sp = one.saturating_add(u.div(T::from_num(20)).saturating_mul(sp)); + sp = one.saturating_add(u.div(T::from_num(6)).saturating_mul(sp)); + let sinh_approx = reduced.saturating_mul(sp); + + let mut cp = one; + cp = one.saturating_add(u.div(T::from_num(90)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(56)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(30)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(12)).saturating_mul(cp)); + cp = one.saturating_add(u.div(T::from_num(2)).saturating_mul(cp)); + (sinh_approx, cp) }; // Reconstruct via doubling: sinh(2x) = 2·sinh(x)·cosh(x), @@ -137,15 +124,15 @@ pub fn sinh_cosh(x: T) -> (T, T) { /// /// | Condition | Result | Example (I16F16) | /// |-----------|--------|------------------| -/// | x > `asinh(T::MAX)` | `T::MAX` | x > ~10.4 → 32767.99 | -/// | x < `-asinh(T::MAX)` | `T::MIN` | x < ~-10.4 → -32768.0 | +/// | x > `asinh(T::MAX)` | `T::MAX` | x > ~11.1 → 32767.99 | +/// | x < `-asinh(T::MAX)` | `T::MIN` | x < ~-11.1 → -32768.0 | /// /// The exact thresholds: -/// - **I16F16:** Saturates for |x| > ~10.4 -/// - **I32F32:** Saturates for |x| > ~21.5 +/// - **I16F16:** Saturates for |x| > ~11.1 +/// - **I32F32:** Saturates for |x| > ~22.2 /// -/// Within the non-saturating range, sinh is computed accurately via CORDIC -/// with argument reduction for |x| > 1.118. +/// Within the non-saturating range, sinh is computed via polynomial +/// evaluation with argument reduction (halving/doubling) for |x| > 1.118. #[inline] #[must_use] #[cfg_attr(feature = "verify-no-panic", no_panic::no_panic)] @@ -162,13 +149,13 @@ pub fn sinh(x: T) -> T { /// /// | Condition | Result | Example (I16F16) | /// |-----------|--------|------------------| -/// | \|x\| > `acosh(T::MAX)` | `T::MAX` | \|x\| > ~10.4 → 32767.99 | +/// | \|x\| > `acosh(T::MAX)` | `T::MAX` | \|x\| > ~11.1 → 32767.99 | /// /// Unlike sinh, cosh is always positive, so it only saturates to `T::MAX`. /// /// The exact thresholds: -/// - **I16F16:** Saturates for |x| > ~10.4 -/// - **I32F32:** Saturates for |x| > ~21.5 +/// - **I16F16:** Saturates for |x| > ~11.1 +/// - **I32F32:** Saturates for |x| > ~22.2 #[inline] #[must_use] #[cfg_attr(feature = "verify-no-panic", no_panic::no_panic)] diff --git a/src/tables/chebyshev.rs b/src/tables/chebyshev.rs new file mode 100644 index 0000000..1a4fd12 --- /dev/null +++ b/src/tables/chebyshev.rs @@ -0,0 +1,87 @@ +//! Minimax (Chebyshev) polynomial coefficients for sin/cos evaluation. +//! +//! Generated via the Remez algorithm (`mpmath.chebyfit`). Each coefficient +//! approximates a reduced-form remainder, +//! factored so all values fit in I1F63 (magnitude < 1). +//! +//! Stored highest-degree first: `[cₙ, cₙ₋₁, …, c₀]`. +//! Evaluated via Horner's method: +//! +//! ```text +//! P(x) = cₙ·xⁿ + ··· + c₁·x + c₀ = c₀ + x·(c₁ + x·(c₂ + ··· + x·cₙ)) +//! ``` +//! +//! # Reconstruction +//! +//! | Function | Factored form | Variable | Domain | +//! |----------|---------------|----------|--------| +//! | sin | `x + x³·P(x²)` | u = x² | \[0, (π/4)²\] | +//! | cos | `1 + x²·Q(x²)` | u = x² | \[0, (π/4)²\] | + +use crate::traits::CordicNumber; + +/// Evaluate polynomial via Horner's method using precomputed I1F63 coefficients. +/// +/// Stored highest-degree first: `[cₙ, cₙ₋₁, …, c₀]`. +/// Evaluates P(x) = cₙ·xⁿ + cₙ₋₁·xⁿ⁻¹ + ··· + c₀ via Horner's method. +#[inline] +pub fn horner(coeffs: &[i64; N], x: T) -> T { + let mut iter = coeffs.iter(); + // First element is the highest-degree coefficient (N ≥ 3 for all tables). + let mut result = T::from_i1f63(*iter.next().unwrap_or(&0)); + for &coeff in iter { + result = T::from_i1f63(coeff).saturating_add(x.saturating_mul(result)); + } + result +} + +// ============================================================================= +// Sin: P(u) = (sin(x) - x) / x³, u = x², domain [0, (π/4)²] +// ============================================================================= + +/// Minimax coeffs for (sin(x)-x)/x³ in u=x² on [0,(π/4)²]. Low precision (I16F16-class). +#[rustfmt::skip] +pub const SIN_P_LO: [i64; 4] = [ + 0x0000_16DB_E083_8A07, // +2.725e-06 + -0x0006_804E_9E4E_E633, // -1.984e-04 + 0x0111_110D_EF2E_1C96, // +8.333e-03 + -0x1555_5555_45E0_ABF9, // -1.667e-01 +]; + +/// Minimax coeffs for (sin(x)-x)/x³ in u=x² on [0,(π/4)²]. High precision (I32F32-class). +#[rustfmt::skip] +pub const SIN_P_HI: [i64; 7] = [ + -0x0000_0000_006A_C5F5, // -7.587e-13 + 0x0000_0000_5848_5FC3, // +1.606e-10 + -0x0000_0035_CC8A_8259, // -2.505e-08 + 0x0000_171D_E3A5_4609, // +2.756e-06 + -0x0006_8068_0680_664E, // -1.984e-04 + 0x0111_1111_1111_1100, // +8.333e-03 + -0x1555_5555_5555_5555, // -1.667e-01 +]; + +// ============================================================================= +// Cos: Q(u) = (cos(x) - 1) / x², u = x², domain [0, (π/4)²] +// ============================================================================= + +/// Minimax coeffs for (cos(x)-1)/x² in u=x² on [0,(π/4)²]. Low precision. +#[rustfmt::skip] +pub const COS_Q_LO: [i64; 4] = [ + 0x0000_CD37_95D8_0CFA, // +2.446e-05 + -0x002D_81C1_0FEA_FDD5, // -1.389e-03 + 0x0555_5532_ED10_3C5F, // +4.167e-02 + -0x3FFF_FFFF_563B_4B19, // -5.000e-01 +]; + +/// Minimax coeffs for (cos(x)-1)/x² in u=x² on [0,(π/4)²]. High precision. +#[rustfmt::skip] +pub const COS_Q_HI: [i64; 7] = [ + -0x0000_0000_063F_E751, // -1.137e-11 + 0x0000_0004_7BA9_FC96, // +2.088e-09 + -0x0000_024F_C9F1_C946, // -2.756e-07 + 0x0000_D00D_00CE_F099, // +2.480e-05 + -0x002D_82D8_2D82_BAF2, // -1.389e-03 + 0x0555_5555_5555_5435, // +4.167e-02 + -0x3FFF_FFFF_FFFF_FFFE, // -5.000e-01 +]; + diff --git a/src/tables/circular.rs b/src/tables/circular.rs index 8d70abe..9e5b651 100644 --- a/src/tables/circular.rs +++ b/src/tables/circular.rs @@ -72,6 +72,3 @@ pub const ATAN_TABLE: [i64; 64] = [ 0x0000_0000_0000_0002, 0x0000_0000_0000_0001, ]; - -/// 1/K ≈ 0.6073 (I1F63). Pre-multiply initial vector to compensate for CORDIC gain. -pub const CIRCULAR_GAIN_INV: i64 = 0x4DBA_76D4_21AF_2D34; diff --git a/src/tables/hyperbolic.rs b/src/tables/hyperbolic.rs index 144874d..0a93074 100644 --- a/src/tables/hyperbolic.rs +++ b/src/tables/hyperbolic.rs @@ -73,9 +73,6 @@ pub const ATANH_TABLE: [i64; 64] = [ 0x0000_0000_0000_0001, // rounds to 1 LSB ]; -/// `K_h` ≈ 0.828 (I1F63). Hyperbolic gain factor. -pub const HYPERBOLIC_GAIN: i64 = 0x6A01_203D_99A6_3986; - /// Returns true if iteration `i` must be repeated for hyperbolic CORDIC convergence. /// /// The repeat sequence is 4, 13, 40, 121, 364, ... (each term is 3×previous + 1). @@ -85,8 +82,5 @@ pub const fn needs_repeat(index: u32) -> bool { matches!(index, 4 | 13 | 40 | 121 | 364) } -/// `1/K_h` ≈ 1.2075 (I2F62). Pre-multiply to compensate for hyperbolic gain. -pub const HYPERBOLIC_GAIN_INV: i64 = 0x4D47_A1C8_03BB_08CA; - /// atanh(0.5) ≈ 0.549 (I1F63). Used for argument reduction. pub const ATANH_HALF: i64 = 0x464F_A9EA_B40C_2A5E; diff --git a/src/tables/mod.rs b/src/tables/mod.rs index 06d5450..dc1806f 100644 --- a/src/tables/mod.rs +++ b/src/tables/mod.rs @@ -1,20 +1,18 @@ -//! Precomputed lookup tables for CORDIC algorithms. +//! Precomputed lookup tables for CORDIC algorithms and polynomial evaluation. //! //! Tables are stored as `i64` values representing signed I1F63 fixed-point //! numbers (1 sign bit, 63 fractional bits), which are then converted to -//! the target type at runtime. Some constants (like `HYPERBOLIC_GAIN_INV`) -//! use I2F62 format for values >= 1. +//! the target type at runtime. //! //! # Table Contents //! //! - [`ATAN_TABLE`]: `atan(2^-i)` values for circular CORDIC mode //! - [`ATANH_TABLE`]: `atanh(2^-i)` values for hyperbolic CORDIC mode -//! - [`CIRCULAR_GAIN_INV`]: Inverse of circular gain factor (1/K ≈ 0.6073) -//! - [`HYPERBOLIC_GAIN`]: Hyperbolic gain factor (`K_h` ≈ 0.8282) -//! - [`HYPERBOLIC_GAIN_INV`]: Inverse hyperbolic gain factor (`1/K_h` ≈ 1.2075) +//! - [`chebyshev`]: Minimax polynomial coefficients for sin/cos evaluation +pub mod chebyshev; pub mod circular; pub mod hyperbolic; -pub use circular::{ATAN_TABLE, CIRCULAR_GAIN_INV}; -pub use hyperbolic::{ATANH_TABLE, HYPERBOLIC_GAIN, HYPERBOLIC_GAIN_INV}; +pub use circular::ATAN_TABLE; +pub use hyperbolic::ATANH_TABLE; diff --git a/src/traits.rs b/src/traits.rs index 0bbe64f..e138679 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -75,9 +75,6 @@ pub trait CordicNumber: /// Converts from a raw I1F63 representation (1 sign bit, 63 fractional bits). /// For constants in (-1, 1). fn from_i1f63(bits: i64) -> Self; - /// Converts from a raw I2F62 representation (2 integer bits, 62 fractional bits). - /// For constants in [1, 4). - fn from_i2f62(bits: i64) -> Self; /// Returns true if negative. fn is_negative(self) -> bool; /// Returns true if positive. @@ -223,37 +220,6 @@ macro_rules! impl_cordic_generic { } } - #[inline] - // Casts are safe: frac_bits ≤ 128, shift amounts bounded by type size - #[allow( - clippy::cast_possible_wrap, - clippy::cast_lossless, - reason = "frac_bits bounded by type size" - )] - fn from_i2f62(bits: i64) -> Self { - // Convert from I2F62 representation to our type. - // I2F62 has 62 fractional bits. - let our_frac = Self::FRAC_NBITS as i32; - let shift = 62 - our_frac; - - if shift >= 0 { - // We have fewer frac bits than I2F62, shift right - #[allow( - clippy::cast_possible_truncation, - reason = "intentional truncation to target type" - )] - Self::from_bits((bits >> shift) as $bits_type) - } else { - // We have more frac bits than I2F62, shift left - #[allow( - clippy::cast_possible_truncation, - reason = "intentional truncation to target type" - )] - let wide = bits as $bits_type; - Self::from_bits(wide << (-shift)) - } - } - #[inline] fn is_negative(self) -> bool { self < Self::ZERO diff --git a/tests/unit/kernel/cordic.rs b/tests/unit/kernel/cordic.rs index d3fc03a..b471400 100644 --- a/tests/unit/kernel/cordic.rs +++ b/tests/unit/kernel/cordic.rs @@ -3,24 +3,7 @@ #[cfg(test)] mod tests { use fixed::types::I16F16; - use fixed_analytics::kernel::{ - circular_rotation, circular_vectoring, cordic_scale_factor, hyperbolic_gain, - hyperbolic_gain_inv, - }; - - #[test] - fn circular_rotation_zero_angle() { - let inv_gain = cordic_scale_factor::(); - let (x, y, z) = circular_rotation(inv_gain, I16F16::ZERO, I16F16::ZERO); - // After rotation by 0, x should be close to 1 (after gain compensation), y should be ~0 - let x_f32: f32 = x.to_num(); - let y_f32: f32 = y.to_num(); - let z_f32: f32 = z.to_num(); - // The result depends on gain compensation; x should be close to 1 - assert!((x_f32 - 1.0).abs() < 0.02, "x = {x_f32}, expected ~1.0"); - assert!(y_f32.abs() < 0.01, "y = {y_f32}, expected ~0"); - assert!(z_f32.abs() < 0.01, "z = {z_f32}, expected ~0"); - } + use fixed_analytics::kernel::circular_vectoring; #[test] fn circular_vectoring_atan_one() { @@ -30,25 +13,4 @@ mod tests { let expected = core::f32::consts::FRAC_PI_4; assert!((z_f32 - expected).abs() < 0.01); } - - #[test] - fn hyperbolic_gain_value() { - // K_h ≈ 1.2075 (inverse of 0.8282) - let gain: f32 = hyperbolic_gain::().to_num(); - // Note: HYPERBOLIC_GAIN is actually the gain factor K_h ≈ 0.8282 - assert!( - (gain - 0.8282).abs() < 0.01, - "hyperbolic_gain = {gain}, expected ~0.8282" - ); - } - - #[test] - fn hyperbolic_gain_inv_value() { - // 1/K_h ≈ 1.2075 - let gain_inv: f32 = hyperbolic_gain_inv::().to_num(); - assert!( - (gain_inv - 1.2075).abs() < 0.01, - "hyperbolic_gain_inv = {gain_inv}, expected ~1.2075" - ); - } } diff --git a/tests/unit/ops/circular.rs b/tests/unit/ops/circular.rs index 74847dd..9de8625 100644 --- a/tests/unit/ops/circular.rs +++ b/tests/unit/ops/circular.rs @@ -458,9 +458,9 @@ mod tests { #[test] fn tan_i16f16_near_positive_pole() { // tan saturates to MAX when approaching π/2 from below - // Threshold is approximately 0.00008 (8e-5) + // Saturation occurs within ~3 ULPs of π/2 (≈ 0.00005) let far_from_pole = I16F16::from_num(FRAC_PI_2 - 0.0001); - let near_pole = I16F16::from_num(FRAC_PI_2 - 0.00005); + let near_pole = I16F16::from_num(FRAC_PI_2 - 0.00003); assert!( !is_max_16(tan(far_from_pole)), @@ -468,7 +468,7 @@ mod tests { ); assert!( is_max_16(tan(near_pole)), - "tan(π/2 - 0.00005) should saturate to MAX" + "tan(π/2 - 0.00003) should saturate to MAX" ); } diff --git a/tests/unit/ops/exponential.rs b/tests/unit/ops/exponential.rs index 9632f24..3bbdfd8 100644 --- a/tests/unit/ops/exponential.rs +++ b/tests/unit/ops/exponential.rs @@ -269,9 +269,9 @@ mod tests { val.to_num::() >= I16F16::MAX.to_num::() * 0.9999 } - /// Check if I16F16 value is saturated to zero + /// Check if I16F16 value is exactly zero fn is_zero_16(val: I16F16) -> bool { - val == I16F16::ZERO || val.to_num::().abs() < 0.0001 + val == I16F16::ZERO } /// Check if I32F32 value is saturated to MAX (within 0.01%) @@ -279,26 +279,26 @@ mod tests { val.to_num::() >= I32F32::MAX.to_num::() * 0.9999 } - /// Check if I32F32 value is saturated to zero + /// Check if I32F32 value is exactly zero fn is_zero_32(val: I32F32) -> bool { - val == I32F32::ZERO || val.to_num::().abs() < 0.000_000_1 + val == I32F32::ZERO } // ===== exp saturation thresholds ===== - // I16F16: saturates to MAX at x >= 22.2, to zero at x <= -9.2 - // I32F32: saturates to MAX at x >= 44.4, to zero at x <= -16.2 + // I16F16: saturates to MAX at x >= 10.4, to zero at x <= -11.1 + // I32F32: saturates to MAX at x >= 21.49, to zero at x <= -22.2 #[test] fn exp_i16f16_upper_threshold() { // Below threshold: should NOT saturate assert!( - !is_max_16(exp(I16F16::from_num(22.1))), - "exp(22.1) should not saturate" + !is_max_16(exp(I16F16::from_num(10.39))), + "exp(10.39) should not saturate" ); // At threshold: should saturate assert!( - is_max_16(exp(I16F16::from_num(22.2))), - "exp(22.2) should saturate to MAX" + is_max_16(exp(I16F16::from_num(10.4))), + "exp(10.4) should saturate to MAX" ); } @@ -306,13 +306,13 @@ mod tests { fn exp_i16f16_lower_threshold() { // Above threshold: should NOT be zero assert!( - !is_zero_16(exp(I16F16::from_num(-9.1))), - "exp(-9.1) should not be zero" + !is_zero_16(exp(I16F16::from_num(-11.0))), + "exp(-11.0) should not be zero" ); // At threshold: should be zero assert!( - is_zero_16(exp(I16F16::from_num(-9.2))), - "exp(-9.2) should be zero" + is_zero_16(exp(I16F16::from_num(-11.1))), + "exp(-11.1) should be zero" ); } @@ -320,13 +320,13 @@ mod tests { fn exp_i32f32_upper_threshold() { // Below threshold: should NOT saturate assert!( - !is_max_32(exp(I32F32::from_num(44.3))), - "exp(44.3) should not saturate" + !is_max_32(exp(I32F32::from_num(21.48))), + "exp(21.48) should not saturate" ); // At threshold: should saturate assert!( - is_max_32(exp(I32F32::from_num(44.4))), - "exp(44.4) should saturate to MAX" + is_max_32(exp(I32F32::from_num(21.49))), + "exp(21.49) should saturate to MAX" ); } @@ -334,19 +334,19 @@ mod tests { fn exp_i32f32_lower_threshold() { // Above threshold: should NOT be zero assert!( - !is_zero_32(exp(I32F32::from_num(-16.1))), - "exp(-16.1) should not be zero" + !is_zero_32(exp(I32F32::from_num(-22.1))), + "exp(-22.1) should not be zero" ); // At threshold: should be zero assert!( - is_zero_32(exp(I32F32::from_num(-16.2))), - "exp(-16.2) should be zero" + is_zero_32(exp(I32F32::from_num(-22.2))), + "exp(-22.2) should be zero" ); } // ===== pow2 saturation thresholds ===== - // I16F16: saturates to MAX at x >= 15.0, to zero at x <= -13.2 - // I32F32: saturates to MAX at x >= 31.0, to zero at x <= -23.3 + // I16F16: saturates to MAX at x >= 15.0, to zero at x <= -16.1 + // I32F32: saturates to MAX at x >= 31.0, to zero at x <= -32.1 #[test] fn pow2_i16f16_upper_threshold() { @@ -366,13 +366,13 @@ mod tests { fn pow2_i16f16_lower_threshold() { // Above threshold: should NOT be zero assert!( - !is_zero_16(pow2(I16F16::from_num(-13.1))), - "pow2(-13.1) should not be zero" + !is_zero_16(pow2(I16F16::from_num(-16.0))), + "pow2(-16.0) should not be zero" ); // At threshold: should be zero assert!( - is_zero_16(pow2(I16F16::from_num(-13.2))), - "pow2(-13.2) should be zero" + is_zero_16(pow2(I16F16::from_num(-16.1))), + "pow2(-16.1) should be zero" ); } @@ -394,13 +394,13 @@ mod tests { fn pow2_i32f32_lower_threshold() { // Above threshold: should NOT be zero assert!( - !is_zero_32(pow2(I32F32::from_num(-23.2))), - "pow2(-23.2) should not be zero" + !is_zero_32(pow2(I32F32::from_num(-32.0))), + "pow2(-32.0) should not be zero" ); // At threshold: should be zero assert!( - is_zero_32(pow2(I32F32::from_num(-23.3))), - "pow2(-23.3) should be zero" + is_zero_32(pow2(I32F32::from_num(-32.1))), + "pow2(-32.1) should be zero" ); } } diff --git a/tests/unit/tables/chebyshev.rs b/tests/unit/tables/chebyshev.rs new file mode 100644 index 0000000..d72baab --- /dev/null +++ b/tests/unit/tables/chebyshev.rs @@ -0,0 +1,78 @@ +//! Tests for Chebyshev polynomial coefficient tables + +#[cfg(test)] +#[allow( + clippy::cast_precision_loss, + reason = "test code uses f64 casts for verification" +)] +mod tests { + use fixed_analytics::tables::chebyshev::{COS_Q_HI, COS_Q_LO, SIN_P_HI, SIN_P_LO}; + + const SCALE: f64 = (1_u64 << 63) as f64; + + fn i1f63_to_f64(bits: i64) -> f64 { + (bits as f64) / SCALE + } + + // The constant term (last element, c₀) of each polynomial should match + // the Taylor series leading coefficient to high precision. This guards + // against coefficient ordering mistakes and regeneration drift. + + #[test] + fn sin_constant_term_is_neg_one_sixth() { + // (sin(x)-x)/x³ → -1/6 at x=0 + let expected = -1.0 / 6.0; + let lo = i1f63_to_f64(*SIN_P_LO.last().unwrap_or(&0)); + let hi = i1f63_to_f64(*SIN_P_HI.last().unwrap_or(&0)); + assert!( + (lo - expected).abs() < 1e-6, + "SIN_P_LO constant = {lo}, expected {expected}" + ); + assert!( + (hi - expected).abs() < 1e-15, + "SIN_P_HI constant = {hi}, expected {expected}" + ); + } + + #[test] + fn cos_constant_term_is_neg_one_half() { + // (cos(x)-1)/x² → -1/2 at x=0 + let expected = -0.5; + let lo = i1f63_to_f64(*COS_Q_LO.last().unwrap_or(&0)); + let hi = i1f63_to_f64(*COS_Q_HI.last().unwrap_or(&0)); + assert!( + (lo - expected).abs() < 1e-6, + "COS_Q_LO constant = {lo}, expected {expected}" + ); + assert!( + (hi - expected).abs() < 1e-15, + "COS_Q_HI constant = {hi}, expected {expected}" + ); + } + + #[test] + fn all_coefficients_magnitude_below_one() { + for (name, table) in [ + ("SIN_P_LO", SIN_P_LO.as_slice()), + ("SIN_P_HI", SIN_P_HI.as_slice()), + ("COS_Q_LO", COS_Q_LO.as_slice()), + ("COS_Q_HI", COS_Q_HI.as_slice()), + ] { + for (i, &bits) in table.iter().enumerate() { + let val = i1f63_to_f64(bits).abs(); + assert!( + val < 1.0, + "{name}[{i}] = {val}, exceeds I1F63 range" + ); + } + } + } + + #[test] + fn expected_array_lengths() { + assert_eq!(SIN_P_LO.len(), 4); + assert_eq!(SIN_P_HI.len(), 7); + assert_eq!(COS_Q_LO.len(), 4); + assert_eq!(COS_Q_HI.len(), 7); + } +} diff --git a/tests/unit/tables/circular.rs b/tests/unit/tables/circular.rs index 7b9f844..a5b1aff 100644 --- a/tests/unit/tables/circular.rs +++ b/tests/unit/tables/circular.rs @@ -7,7 +7,7 @@ reason = "test code uses direct indexing and f64 casts" )] mod tests { - use fixed_analytics::tables::circular::{ATAN_TABLE, CIRCULAR_GAIN_INV}; + use fixed_analytics::tables::circular::ATAN_TABLE; #[test] fn atan_table_has_64_entries() { @@ -66,11 +66,4 @@ mod tests { ); } } - - #[test] - fn cordic_scale_factor_value() { - // 1/K ≈ 0.6073 in I1F63 format - // Expected value: 0x4DBA_76D4_21AF_2D34 - assert_eq!(CIRCULAR_GAIN_INV, 0x4DBA_76D4_21AF_2D34); - } } diff --git a/tests/unit/tables/hyperbolic.rs b/tests/unit/tables/hyperbolic.rs index 0bc3d53..6d6cf53 100644 --- a/tests/unit/tables/hyperbolic.rs +++ b/tests/unit/tables/hyperbolic.rs @@ -7,9 +7,7 @@ reason = "test code uses direct indexing and f64 casts" )] mod tests { - use fixed_analytics::tables::hyperbolic::{ - ATANH_HALF, ATANH_TABLE, HYPERBOLIC_GAIN, HYPERBOLIC_GAIN_INV, needs_repeat, - }; + use fixed_analytics::tables::hyperbolic::{ATANH_HALF, ATANH_TABLE, needs_repeat}; /// Repeat indices for hyperbolic CORDIC convergence (used only in tests). const REPEAT_INDICES: [u32; 5] = [4, 13, 40, 121, 364]; @@ -63,20 +61,6 @@ mod tests { assert!(!needs_repeat(3)); } - #[test] - fn hyperbolic_gain_value() { - // K_h ≈ 0.8282 in I1F63 format - // Expected value: 0x6A01_203D_99A6_3986 - assert_eq!(HYPERBOLIC_GAIN, 0x6A01_203D_99A6_3986); - } - - #[test] - fn hyperbolic_gain_inv_value() { - // 1/K_h ≈ 1.2075 in I2F62 format - // Expected value: 0x4D47_A1C8_03BB_08CA - assert_eq!(HYPERBOLIC_GAIN_INV, 0x4D47_A1C8_03BB_08CA); - } - #[test] fn atanh_half_matches_table() { // ATANH_HALF should equal ATANH_TABLE[0] diff --git a/tests/unit/tables/mod.rs b/tests/unit/tables/mod.rs index 4959384..a0b07c8 100644 --- a/tests/unit/tables/mod.rs +++ b/tests/unit/tables/mod.rs @@ -1,4 +1,5 @@ //! Tests for lookup tables +mod chebyshev; mod circular; mod hyperbolic; diff --git a/tests/unit/traits.rs b/tests/unit/traits.rs index 8074be2..22b96ac 100644 --- a/tests/unit/traits.rs +++ b/tests/unit/traits.rs @@ -4,7 +4,6 @@ mod tests { use fixed::types::{I4F12, I4F60, I8F8, I8F24, I16F16, I20F12, I24F8, I32F32, I48F16, I64F64}; use fixed_analytics::CordicNumber; - use fixed_analytics::kernel::hyperbolic_gain_inv; #[test] #[allow(clippy::approx_constant, reason = "testing pi approximation")] @@ -105,26 +104,4 @@ mod tests { "I64F64::frac_pi_4() = {pi_4_64}, expected ~0.7854" ); } - - #[test] - fn from_i2f62_high_precision() { - // Test from_i2f62 with high-precision types (frac_bits > 62) - // This exercises the shift-left branch in from_i2f62 - // 1/K_h ≈ 1.2075 is stored in I2F62 format - // We call hyperbolic_gain_inv which uses from_i2f62 - - // I64F64 has 64 fractional bits > 62, so it should use the shift-left branch - let gain_inv_64: f64 = hyperbolic_gain_inv::().to_num(); - assert!( - (gain_inv_64 - 1.2075).abs() < 0.01, - "hyperbolic_gain_inv::() = {gain_inv_64}, expected ~1.2075" - ); - - // I4F60 has 60 fractional bits < 62, uses shift-right branch (control test) - let gain_inv_60: f64 = hyperbolic_gain_inv::().to_num(); - assert!( - (gain_inv_60 - 1.2075).abs() < 0.01, - "hyperbolic_gain_inv::() = {gain_inv_60}, expected ~1.2075" - ); - } } diff --git a/tools/accuracy-bench/baseline.json b/tools/accuracy-bench/baseline.json index 62e77a7..922dd38 100644 --- a/tools/accuracy-bench/baseline.json +++ b/tools/accuracy-bench/baseline.json @@ -1,33 +1,33 @@ { - "timestamp": 1773659600, + "timestamp": 1773684228, "results": [ { "name": "sin", "i16f16": { "count": 59007, - "abs_max": 0.0003160037525166831, - "abs_mean": 0.00007302468809220042, - "abs_p50": 0.00006047965599365046, - "abs_p95": 0.00017966345004388762, - "abs_p99": 0.0002176151062667775, - "rel_max": 1.2279978980557484, - "rel_mean": 0.0006192941080200728, - "rel_p50": 0.00009339376163702873, - "rel_p95": 0.0012955443476845127, - "rel_p99": 0.006577913240128826 + "abs_max": 0.0002151700847870322, + "abs_mean": 0.00006812612370136238, + "abs_p50": 0.000055933053498491425, + "abs_p95": 0.00017059742871838868, + "abs_p99": 0.00019168937678995523, + "rel_max": 1.116598003152961, + "rel_mean": 0.0006057467007882896, + "rel_p50": 0.00008775899233342654, + "rel_p95": 0.0012777646003236927, + "rel_p99": 0.006502970657507115 }, "i32f32": { "count": 59007, - "abs_max": 6.170748511127866e-9, - "abs_mean": 1.4068116631235018e-9, - "abs_p50": 1.160409990319522e-9, - "abs_p95": 3.493709793689348e-9, - "abs_p99": 4.3037375885290885e-9, - "rel_max": 0.000028914948252430548, - "rel_mean": 1.2205666087308561e-8, - "rel_p50": 1.7896414658167102e-9, - "rel_p95": 2.5180727475800953e-8, - "rel_p99": 1.2476454338609432e-7 + "abs_max": 4.039033038583106e-9, + "abs_mean": 1.3029270023530278e-9, + "abs_p50": 1.0683873785666265e-9, + "abs_p95": 3.2612136324772223e-9, + "abs_p99": 3.6513399248594425e-9, + "rel_max": 0.00002211563826080534, + "rel_mean": 1.1629143820233125e-8, + "rel_p50": 1.6819495291246398e-9, + "rel_p95": 2.4347539155652117e-8, + "rel_p99": 1.240257365713088e-7 }, "samples_tested": 59007 }, @@ -35,29 +35,29 @@ "name": "cos", "i16f16": { "count": 59007, - "abs_max": 0.0003866859736972872, - "abs_mean": 0.00007963810993674399, - "abs_p50": 0.00006443047666948587, - "abs_p95": 0.00020265746088754533, - "abs_p99": 0.0002577840365698414, - "rel_max": 1.2430842225800527, - "rel_mean": 0.0006817355406554445, - "rel_p50": 0.00010175899048076198, - "rel_p95": 0.001462660467267819, - "rel_p99": 0.007494331488013513 + "abs_max": 0.00022643716377789003, + "abs_mean": 0.0000694764940922613, + "abs_p50": 0.000056728088293711565, + "abs_p95": 0.0001758732540115826, + "abs_p99": 0.00020179977026268947, + "rel_max": 1.0246983213829421, + "rel_mean": 0.0006447743404889468, + "rel_p50": 0.00009031274274397404, + "rel_p95": 0.0013835709964284293, + "rel_p99": 0.007078356961918482 }, "i32f32": { "count": 59007, - "abs_max": 7.79997452737824e-9, - "abs_mean": 1.513260353830001e-9, - "abs_p50": 1.2224048440145907e-9, - "abs_p95": 3.833358519500507e-9, - "abs_p99": 4.911403062024533e-9, - "rel_max": 0.00001967115611825825, - "rel_mean": 1.2957298375182963e-8, - "rel_p50": 1.9050275950092017e-9, - "rel_p95": 2.8285160809841404e-8, - "rel_p99": 1.4381941403907706e-7 + "abs_max": 4.1480433046747756e-9, + "abs_mean": 1.3260194254286985e-9, + "abs_p50": 1.0842688968892844e-9, + "abs_p95": 3.343171406378076e-9, + "abs_p99": 3.757965722460399e-9, + "rel_max": 0.000019187851140022507, + "rel_mean": 1.2211029787286696e-8, + "rel_p50": 1.7246555042177107e-9, + "rel_p95": 2.6350420764034186e-8, + "rel_p99": 1.336386501986586e-7 }, "samples_tested": 59007 }, @@ -65,29 +65,29 @@ "name": "tan", "i16f16": { "count": 59003, - "abs_max": 0.02984546861616799, - "abs_mean": 0.0006335346181894033, - "abs_p50": 0.00006621604629541977, - "abs_p95": 0.003373664071061988, - "abs_p99": 0.010401170982039076, - "rel_max": 0.33030677492906185, - "rel_mean": 0.0002468954226306189, - "rel_p50": 0.0000984472250758889, - "rel_p95": 0.0006702847385671937, - "rel_p99": 0.001805460249787153 + "abs_max": 0.005735721706290775, + "abs_mean": 0.00021012307627802187, + "abs_p50": 0.000021381458935015862, + "abs_p95": 0.0012400936712424482, + "abs_p99": 0.0030023153745446507, + "rel_max": 0.035724039917143464, + "rel_mean": 0.00007200288153205199, + "rel_p50": 0.00003572808971675103, + "rel_p95": 0.00022024253781357213, + "rel_p99": 0.0005224337353663624 }, "i32f32": { "count": 59003, - "abs_max": 7.328096778280724e-7, - "abs_mean": 1.1528520931361289e-8, - "abs_p50": 1.317846054504912e-9, - "abs_p95": 6.10929014044359e-8, - "abs_p99": 1.8720116123915886e-7, - "rel_max": 8.624633173421364e-6, - "rel_mean": 6.000943382545689e-9, - "rel_p50": 1.886864892601105e-9, - "rel_p95": 1.3967521794933386e-8, - "rel_p99": 4.7362590860621626e-8 + "abs_max": 6.911457361979956e-8, + "abs_mean": 2.041697369589976e-9, + "abs_p50": 2.785679464878399e-10, + "abs_p95": 1.175965635979992e-8, + "abs_p99": 3.164663908705734e-8, + "rel_max": 6.094313398644838e-7, + "rel_mean": 1.2802478988920128e-9, + "rel_p50": 3.983875797663888e-10, + "rel_p95": 3.0296768479738307e-9, + "rel_p99": 1.330103069791468e-8 }, "samples_tested": 59003 }, @@ -185,29 +185,29 @@ "name": "sinh", "i16f16": { "count": 59007, - "abs_max": 1.0772810843461684, - "abs_mean": 0.06303677330481015, - "abs_p50": 0.004098231957545551, - "abs_p95": 0.34691838864932834, - "abs_p99": 0.6188876148160034, + "abs_max": 0.5025180920220009, + "abs_mean": 0.03346327121695756, + "abs_p50": 0.0020009559816998035, + "abs_p95": 0.19600618191498143, + "abs_p99": 0.3247019419663957, "rel_max": 0.0049975358676744045, - "rel_mean": 0.00018236789394986054, - "rel_p50": 0.0001344182189823812, - "rel_p95": 0.0005027598796161228, - "rel_p99": 0.0006566622127170074 + "rel_mean": 0.00009801628888291605, + "rel_p50": 0.0000622856559642541, + "rel_p95": 0.0002791596523877885, + "rel_p99": 0.0003347132187640623 }, "i32f32": { "count": 59007, - "abs_max": 0.008872128233271326, - "abs_mean": 1.396558004830551e-6, - "abs_p50": 7.002165069991406e-8, - "abs_p95": 5.8817661283683265e-6, - "abs_p99": 0.000011134297892567702, - "rel_max": 0.00020256339056303632, - "rel_mean": 1.0480732995514983e-8, - "rel_p50": 2.3544956150429304e-9, - "rel_p95": 9.303074988477954e-9, - "rel_p99": 1.285273873766771e-8 + "abs_max": 8.038690566536388e-6, + "abs_mean": 5.156156838024176e-7, + "abs_p50": 3.104679535681498e-8, + "abs_p95": 2.9910759167250944e-6, + "abs_p99": 4.959610805599368e-6, + "rel_max": 1.8504251677883088e-7, + "rel_mean": 1.5213766444060955e-9, + "rel_p50": 9.642203459115336e-10, + "rel_p95": 4.2894528488776325e-9, + "rel_p99": 5.100874214755671e-9 }, "samples_tested": 59007 }, @@ -215,29 +215,29 @@ "name": "cosh", "i16f16": { "count": 59007, - "abs_max": 1.0772732080154128, - "abs_mean": 0.06303092881225811, - "abs_p50": 0.004094172796769158, - "abs_p95": 0.3469277889677187, - "abs_p99": 0.6188732979264842, - "rel_max": 0.0010691901922094571, - "rel_mean": 0.00017285180819279924, - "rel_p50": 0.00012268943900841178, - "rel_p95": 0.0005004633648159678, - "rel_p99": 0.0006510488630542412 + "abs_max": 0.502514950379009, + "abs_mean": 0.03347460879545913, + "abs_p50": 0.0020086479998440154, + "abs_p95": 0.1960088821260797, + "abs_p99": 0.3247042028542637, + "rel_max": 0.00042943942695364615, + "rel_mean": 0.0000939959012671695, + "rel_p50": 0.00005751275670377952, + "rel_p95": 0.0002765400244774992, + "rel_p99": 0.0003267974480575182 }, "i32f32": { "count": 59007, - "abs_max": 0.008869816353453075, - "abs_mean": 1.3964231030204069e-6, - "abs_p50": 7.00661288988158e-8, - "abs_p95": 5.882438131266099e-6, - "abs_p99": 0.000011134392480016686, - "rel_max": 0.00020245784587786796, - "rel_mean": 1.0248053709258805e-8, - "rel_p50": 2.073955819960864e-9, - "rel_p95": 9.159278823861197e-9, - "rel_p99": 1.2647926134213153e-8 + "abs_max": 8.038496162043884e-6, + "abs_mean": 5.157946995090689e-7, + "abs_p50": 3.117384039796889e-8, + "abs_p95": 2.991293740706169e-6, + "abs_p99": 4.95977587888774e-6, + "rel_max": 6.332694227282468e-9, + "rel_mean": 1.443812624739602e-9, + "rel_p50": 8.900846992373072e-10, + "rel_p95": 4.252582842376521e-9, + "rel_p99": 4.987789635509372e-9 }, "samples_tested": 59007 }, @@ -245,29 +245,29 @@ "name": "tanh", "i16f16": { "count": 59007, - "abs_max": 0.0001416839219182675, - "abs_mean": 0.000012729098112476967, - "abs_p50": 0.000013373680597172921, - "abs_p95": 0.000027615223236021613, - "abs_p99": 0.000057425093706209296, + "abs_max": 0.000048221084164246086, + "abs_mean": 0.000010746892712553541, + "abs_p50": 0.000012369070582551878, + "abs_p95": 0.000015898153241100665, + "abs_p99": 0.000024410866958035626, "rel_max": 0.004137583155192043, - "rel_mean": 0.000020784111846600086, - "rel_p50": 0.0000138370779552808, - "rel_p95": 0.000058866070650500954, - "rel_p99": 0.00018729767889252025 + "rel_mean": 0.000015991444772431047, + "rel_p50": 0.000013238354595490895, + "rel_p95": 0.000025649082912950514, + "rel_p99": 0.000102512033353458 }, "i32f32": { "count": 59007, - "abs_max": 0.000020695083288946314, - "abs_mean": 1.2050832301560682e-9, - "abs_p50": 1.2859258102793092e-10, - "abs_p95": 5.812544889849391e-10, - "abs_p99": 1.2296446916248982e-9, - "rel_max": 0.00002564595268436019, - "rel_mean": 1.6354159982679383e-9, - "rel_p50": 1.3132154604890873e-10, - "rel_p95": 1.2321919801854915e-9, - "rel_p99": 4.89240139683993e-9 + "abs_max": 6.588033452104014e-10, + "abs_mean": 1.2233799165141846e-10, + "abs_p50": 1.1572420799410565e-10, + "abs_p95": 2.4313007163101474e-10, + "abs_p99": 3.71012220945488e-10, + "rel_max": 1.8882934414975241e-7, + "rel_mean": 2.247399220880143e-10, + "rel_p50": 1.223354757805662e-10, + "rel_p95": 3.8969320859334313e-10, + "rel_p99": 1.7087108679253065e-9 }, "samples_tested": 59007 }, @@ -276,28 +276,28 @@ "i16f16": { "count": 59003, "abs_max": 0.00186560782194789, - "abs_mean": 0.000027539381408770688, - "abs_p50": 4.961126731650722e-6, - "abs_p95": 0.00013365012785548913, - "abs_p99": 0.00048479706134862965, - "rel_max": 0.0003172539281558055, - "rel_mean": 0.000011996517339386098, - "rel_p50": 4.82788872128744e-6, - "rel_p95": 0.00005573355105518167, - "rel_p99": 0.00011790663206009863 + "abs_mean": 0.00001741395288829593, + "abs_p50": 3.7804332484459024e-6, + "abs_p95": 0.000040709809506100925, + "abs_p99": 0.00038648315832379154, + "rel_max": 0.00019295906661834228, + "rel_mean": 6.6810704334344306e-6, + "rel_p50": 3.5447829405984752e-6, + "rel_p95": 0.000018011430599054972, + "rel_p99": 0.00006381944461606493 }, "i32f32": { "count": 59003, - "abs_max": 5.297342871246613e-6, - "abs_mean": 1.0266911358088638e-9, - "abs_p50": 1.4142287341201154e-10, - "abs_p95": 3.0405140627465244e-9, - "abs_p99": 2.25219967120438e-8, - "rel_max": 5.177780320012974e-6, - "rel_mean": 3.9985892305619165e-10, - "rel_p50": 1.3934409121730228e-10, - "rel_p95": 1.298366831659975e-9, - "rel_p99": 4.015827932536157e-9 + "abs_max": 3.212849719602673e-8, + "abs_mean": 3.116893673994062e-10, + "abs_p50": 1.2099476975890866e-10, + "abs_p95": 5.963514126960945e-10, + "abs_p99": 6.057081947119514e-9, + "rel_max": 3.352283828172289e-9, + "rel_mean": 1.4140129263411885e-10, + "rel_p50": 1.1561328631010925e-10, + "rel_p95": 2.7378826992810083e-10, + "rel_p99": 9.944455966243446e-10 }, "samples_tested": 59003 }, @@ -425,29 +425,29 @@ "name": "exp", "i16f16": { "count": 59007, - "abs_max": 0.46813349528929393, - "abs_mean": 0.006142015618932389, - "abs_p50": 0.000014615890257533865, - "abs_p95": 0.03260455542874752, - "abs_p99": 0.1284817312330233, - "rel_max": 0.3332355454934613, - "rel_mean": 0.011417573122417932, - "rel_p50": 0.00006673575383040605, - "rel_p95": 0.07871431908682497, - "rel_p99": 0.1835814167869461 + "abs_max": 0.05963610196704394, + "abs_mean": 0.00148284412568926, + "abs_p50": 0.000013067910371533951, + "abs_p95": 0.009531146223707765, + "abs_p99": 0.02852926914965792, + "rel_max": 0.33333805248595344, + "rel_mean": 0.011412784731031914, + "rel_p50": 0.00002318455857263073, + "rel_p95": 0.07877109889269186, + "rel_p99": 0.18364178877011175 }, "i32f32": { "count": 59007, - "abs_max": 0.000014848937098577153, - "abs_mean": 4.428319596643952e-7, - "abs_p50": 2.8667312967911585e-10, - "abs_p95": 3.271608875365928e-6, - "abs_p99": 7.336121598200407e-6, + "abs_max": 6.483619472419377e-6, + "abs_mean": 3.886059666467785e-7, + "abs_p50": 2.322268084211862e-10, + "abs_p95": 3.0078297186264535e-6, + "abs_p99": 5.427600171969971e-6, "rel_max": 5.084182472060312e-6, - "rel_mean": 1.9135427833977108e-7, - "rel_p50": 2.2358414909904965e-9, - "rel_p95": 1.2962977984451641e-6, - "rel_p99": 3.2213312688224884e-6 + "rel_mean": 1.9130050918738928e-7, + "rel_p50": 1.7319438978244154e-9, + "rel_p95": 1.2962127732347275e-6, + "rel_p99": 3.2225748664160253e-6 }, "samples_tested": 59007 }, @@ -545,28 +545,28 @@ "name": "pow2", "i16f16": { "count": 59007, - "abs_max": 0.1240198076766319, - "abs_mean": 0.003254876418514731, - "abs_p50": 0.000029241600689067226, - "abs_p95": 0.019762391636334087, - "abs_p99": 0.052000798585595476, - "rel_max": 0.015141079259996712, - "rel_mean": 0.0007282995676444927, - "rel_p50": 0.00004740369660883063, - "rel_p95": 0.004705167104145861, + "abs_max": 0.03599762646047111, + "abs_mean": 0.0016799025276145177, + "abs_p50": 0.000018813379432325306, + "abs_p95": 0.011321668885898362, + "abs_p99": 0.021123637255755057, + "rel_max": 0.01538737068576766, + "rel_mean": 0.0007205229655744091, + "rel_p50": 0.000025828291700624078, + "rel_p95": 0.004715341194879804, "rel_p99": 0.01027006938792182 }, "i32f32": { "count": 59007, - "abs_max": 2.711145384637348e-6, - "abs_mean": 6.906259627837998e-8, - "abs_p50": 6.68742838882963e-10, - "abs_p95": 4.3179491626688105e-7, - "abs_p99": 1.0690903309296118e-6, - "rel_max": 2.384185791015625e-7, - "rel_mean": 1.1514619376993808e-8, - "rel_p50": 1.0561495138250126e-9, - "rel_p95": 7.384199695898233e-8, + "abs_max": 6.981339311096235e-7, + "abs_mean": 3.477602797068013e-8, + "abs_p50": 3.6119718327398687e-10, + "abs_p95": 2.3425616291206097e-7, + "abs_p99": 4.4821740630140994e-7, + "rel_max": 2.3427334467174056e-7, + "rel_mean": 1.1309086723812138e-8, + "rel_p50": 4.933149661779767e-10, + "rel_p95": 7.428902583432353e-8, "rel_p99": 1.5696090047105027e-7 }, "samples_tested": 59007 diff --git a/tools/accuracy-bench/src/readme.rs b/tools/accuracy-bench/src/readme.rs index 6401844..b1bb18a 100644 --- a/tools/accuracy-bench/src/readme.rs +++ b/tools/accuracy-bench/src/readme.rs @@ -13,7 +13,7 @@ pub fn generate_accuracy_section(results: &[FunctionResult]) -> String { writeln!(out, "### Accuracy\n").unwrap(); writeln!( out, - "Relative error statistics measured against MPFR reference implementations.\n" + "Relative error statistics measured against MPFR reference implementations. Accuracy regressions are not permitted; every change is benchmarked against the baseline before merging. The file tools/accuracy-bench/baseline.json contains further measurements.\n" ) .unwrap(); @@ -206,7 +206,7 @@ mod tests { let section = r#" ### Accuracy -Relative error statistics measured against MPFR reference implementations. +Relative error statistics measured against MPFR reference implementations. Accuracy regressions are not permitted; every change is benchmarked against the baseline before merging. The file tools/accuracy-bench/baseline.json contains further measurements. | Function | I16F16 Mean | I16F16 Median | I16F16 P95 | I32F32 Mean | I32F32 Median | I32F32 P95 | |----------|-------------|---------------|------------|-------------|---------------|------------| From 5ff2e5374a37368d0186ca3d659a4467c4d991c1 Mon Sep 17 00:00:00 2001 From: GeEom Date: Mon, 16 Mar 2026 19:29:09 +0000 Subject: [PATCH 2/2] Linting --- src/ops/circular.rs | 10 +++------- src/tables/chebyshev.rs | 1 - tests/unit/tables/chebyshev.rs | 5 +---- tests/unit/traits.rs | 16 ++++++++++++++++ 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/ops/circular.rs b/src/ops/circular.rs index 1e99232..8e810a4 100644 --- a/src/ops/circular.rs +++ b/src/ops/circular.rs @@ -4,7 +4,7 @@ use crate::bounded::{NonNegative, UnitInterval}; use crate::error::{Error, Result}; use crate::kernel::circular_vectoring; use crate::ops::algebraic::sqrt_nonneg; -use crate::tables::chebyshev::{horner, COS_Q_HI, COS_Q_LO, SIN_P_HI, SIN_P_LO}; +use crate::tables::chebyshev::{COS_Q_HI, COS_Q_LO, SIN_P_HI, SIN_P_LO, horner}; use crate::traits::CordicNumber; /// Sine and cosine. More efficient than separate calls. Accepts any angle. @@ -68,17 +68,13 @@ pub fn sin_cos(angle: T) -> (T, T) { let (sp_val, cp_val) = if T::frac_bits() >= 24 { // High precision: degree 15 sin, degree 14 cos let sp = horner(&SIN_P_HI, u); - let sin_approx = poly_arg.saturating_add( - poly_arg.saturating_mul(u).saturating_mul(sp), - ); + let sin_approx = poly_arg.saturating_add(poly_arg.saturating_mul(u).saturating_mul(sp)); let cp = horner(&COS_Q_HI, u); (sin_approx, one.saturating_add(u.saturating_mul(cp))) } else { // Low precision: degree 9 sin, degree 8 cos let sp = horner(&SIN_P_LO, u); - let sin_approx = poly_arg.saturating_add( - poly_arg.saturating_mul(u).saturating_mul(sp), - ); + let sin_approx = poly_arg.saturating_add(poly_arg.saturating_mul(u).saturating_mul(sp)); let cp = horner(&COS_Q_LO, u); (sin_approx, one.saturating_add(u.saturating_mul(cp))) }; diff --git a/src/tables/chebyshev.rs b/src/tables/chebyshev.rs index 1a4fd12..79ca242 100644 --- a/src/tables/chebyshev.rs +++ b/src/tables/chebyshev.rs @@ -84,4 +84,3 @@ pub const COS_Q_HI: [i64; 7] = [ 0x0555_5555_5555_5435, // +4.167e-02 -0x3FFF_FFFF_FFFF_FFFE, // -5.000e-01 ]; - diff --git a/tests/unit/tables/chebyshev.rs b/tests/unit/tables/chebyshev.rs index d72baab..efe2d9a 100644 --- a/tests/unit/tables/chebyshev.rs +++ b/tests/unit/tables/chebyshev.rs @@ -60,10 +60,7 @@ mod tests { ] { for (i, &bits) in table.iter().enumerate() { let val = i1f63_to_f64(bits).abs(); - assert!( - val < 1.0, - "{name}[{i}] = {val}, exceeds I1F63 range" - ); + assert!(val < 1.0, "{name}[{i}] = {val}, exceeds I1F63 range"); } } } diff --git a/tests/unit/traits.rs b/tests/unit/traits.rs index 22b96ac..1bb95ff 100644 --- a/tests/unit/traits.rs +++ b/tests/unit/traits.rs @@ -82,6 +82,22 @@ mod tests { assert_eq!(I48F16::frac_bits(), 16); } + #[test] + fn div_overflow_saturates_to_min() { + // Dividing a negative number by zero should saturate to MIN + // (signs disagree: negative / "positive zero"). + let neg = I16F16::from_num(-1); + let zero = I16F16::ZERO; + assert_eq!(neg.div(zero), I16F16::MIN); + } + + #[test] + fn div_by_zero_positive_saturates_to_max() { + let pos = I16F16::from_num(1); + let zero = I16F16::ZERO; + assert_eq!(pos.div(zero), I16F16::MAX); + } + #[test] fn frac_pi_4_values() { // Test the frac_pi_4() default implementation