Skip to content

Commit b899ffc

Browse files
committed
c/epsilon.h: Use _attr_optimize_finite_math. Improve function specialization and vectorization.
1 parent 0232647 commit b899ffc

1 file changed

Lines changed: 93 additions & 38 deletions

File tree

c/epsilon.h

Lines changed: 93 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@
1414
static inline bool
1515
all_positive(const double * restrict points, size_t size, dimension_t dim)
1616
{
17-
ASSUME(dim <= 32);
18-
for (size_t a = 0; a < size; a++)
19-
for (dimension_t d = 0; d < dim; d++)
20-
if (points[a * dim + d] <= 0)
21-
return false;
17+
ASSUME(size > 0);
18+
ASSUME(1 <= dim && dim <= 32);
19+
const size_t len = size * dim;
20+
for (size_t a = 0; a < len; a++)
21+
if (points[a] <= 0)
22+
return false;
2223

2324
return true;
2425
}
@@ -43,19 +44,19 @@ all_positive(const double * restrict points, size_t size, dimension_t dim)
4344
for negative values doesn't make sense.
4445
*/
4546

46-
static inline double
47-
eps_value_(bool do_ratio, double a, double b)
48-
{
49-
return do_ratio ? a / b : a - b;
50-
}
5147

48+
_attr_optimize_finite_math
5249
static inline double
5350
epsilon_helper_(bool do_mult, const enum objs_agree_t agree,
5451
const signed char * restrict minmax, dimension_t dim,
5552
const double * restrict points_a, size_t size_a,
5653
const double * restrict points_b, size_t size_b)
5754
{
55+
// Converting this macro to an inline function hinders vectorization.
56+
#define eps_value_(X,Y) (do_mult ? ((X) / (Y)) : ((X) - (Y)))
57+
5858
ASSUME(2 <= dim && dim <= 32);
59+
ASSUME(size_a > 0 && size_b > 0);
5960
ASSUME(agree == AGREE_MINIMISE || agree == AGREE_MAXIMISE || agree == AGREE_NONE);
6061
ASSUME(minmax == NULL || agree != AGREE_NONE);
6162
double epsilon = do_mult ? 0 : -INFINITY;
@@ -65,29 +66,25 @@ epsilon_helper_(bool do_mult, const enum objs_agree_t agree,
6566
const double * restrict pb = &points_b[b * dim];
6667
for (size_t a = 0; a < size_a; a++) {
6768
const double * restrict pa = &points_a[a * dim];
68-
double epsilon_max;
69-
if (agree == AGREE_NONE) {
70-
epsilon_max = MAX(minmax[0] * eps_value_(do_mult, pb[0], pa[0]),
71-
minmax[1] * eps_value_(do_mult, pb[1], pa[1]));
72-
if (epsilon_max >= epsilon_min)
73-
continue;
74-
for (dimension_t d = 2; d < dim; d++) {
75-
double epsilon_temp = minmax[d] * eps_value_(do_mult, pb[d], pa[d]);
76-
epsilon_max = MAX(epsilon_max, epsilon_temp);
77-
}
78-
} else {
79-
epsilon_max = (agree == AGREE_MINIMISE)
80-
? MAX(eps_value_(do_mult, pa[0], pb[0]), eps_value_(do_mult, pa[1], pb[1]))
81-
: MAX(eps_value_(do_mult, pb[0], pa[0]), eps_value_(do_mult, pb[1], pa[1]));
82-
if (epsilon_max >= epsilon_min)
83-
continue;
84-
for (dimension_t d = 2; d < dim; d++) {
85-
double epsilon_temp = (agree == AGREE_MINIMISE)
86-
? eps_value_(do_mult, pa[d], pb[d])
87-
: eps_value_(do_mult, pb[d], pa[d]);
88-
epsilon_max = MAX(epsilon_max, epsilon_temp);
89-
}
69+
double epsilon_max = (agree == AGREE_NONE)
70+
? MAX(minmax[0] * eps_value_(pb[0], pa[0]),
71+
minmax[1] * eps_value_(pb[1], pa[1]))
72+
: ((agree == AGREE_MINIMISE)
73+
? MAX(eps_value_(pa[0], pb[0]), eps_value_(pa[1], pb[1]))
74+
: MAX(eps_value_(pb[0], pa[0]), eps_value_(pb[1], pa[1])));
75+
76+
if (epsilon_max >= epsilon_min)
77+
continue;
78+
79+
for (dimension_t d = 2; d < dim; d++) {
80+
double epsilon_temp = (agree == AGREE_NONE)
81+
? minmax[d] * eps_value_(pb[d], pa[d])
82+
: ((agree == AGREE_MINIMISE)
83+
? eps_value_(pa[d], pb[d])
84+
: eps_value_(pb[d], pa[d]));
85+
epsilon_max = MAX(epsilon_max, epsilon_temp);
9086
}
87+
9188
if (epsilon_max <= epsilon) {
9289
skip_max = true;
9390
break;
@@ -100,6 +97,63 @@ epsilon_helper_(bool do_mult, const enum objs_agree_t agree,
10097
return epsilon;
10198
}
10299

100+
_attr_optimize_finite_math
101+
static inline double
102+
epsilon_mult_agree_none(const signed char * restrict minmax, dimension_t dim,
103+
const double * restrict points_a, size_t size_a,
104+
const double * restrict points_b, size_t size_b)
105+
{
106+
return epsilon_helper_(/* do_mult=*/true, AGREE_NONE, minmax, dim, points_a, size_a, points_b, size_b);
107+
}
108+
109+
_attr_optimize_finite_math
110+
static inline double
111+
epsilon_mult_agree_min(dimension_t dim,
112+
const double * restrict points_a, size_t size_a,
113+
const double * restrict points_b, size_t size_b)
114+
{
115+
return epsilon_helper_(/* do_mult=*/true, AGREE_MINIMISE, /*minmax=*/NULL, dim, points_a, size_a, points_b, size_b);
116+
}
117+
118+
_attr_optimize_finite_math
119+
static inline double
120+
epsilon_mult_agree_max(dimension_t dim,
121+
const double * restrict points_a, size_t size_a,
122+
const double * restrict points_b, size_t size_b)
123+
{
124+
return epsilon_helper_(/* do_mult=*/true, AGREE_MAXIMISE, /*minmax=*/NULL, dim, points_a, size_a, points_b, size_b);
125+
}
126+
127+
128+
_attr_optimize_finite_math
129+
static inline double
130+
epsilon_addi_agree_none(const signed char * restrict minmax, dimension_t dim,
131+
const double * restrict points_a, size_t size_a,
132+
const double * restrict points_b, size_t size_b)
133+
{
134+
return epsilon_helper_(/* do_mult=*/false, AGREE_NONE, minmax, dim, points_a, size_a, points_b, size_b);
135+
}
136+
137+
_attr_optimize_finite_math
138+
static inline double
139+
epsilon_addi_agree_min(dimension_t dim,
140+
const double * restrict points_a, size_t size_a,
141+
const double * restrict points_b, size_t size_b)
142+
{
143+
return epsilon_helper_(/* do_mult=*/false, AGREE_MINIMISE, /*minmax=*/NULL, dim, points_a, size_a, points_b, size_b);
144+
}
145+
146+
_attr_optimize_finite_math
147+
static inline double
148+
epsilon_addi_agree_max(dimension_t dim,
149+
const double * restrict points_a, size_t size_a,
150+
const double * restrict points_b, size_t size_b)
151+
{
152+
return epsilon_helper_(/* do_mult=*/false, AGREE_MAXIMISE, /*minmax=*/NULL, dim, points_a, size_a, points_b, size_b);
153+
}
154+
155+
156+
_attr_optimize_finite_math
103157
static inline double
104158
epsilon_mult_minmax(const signed char * restrict minmax, dimension_t dim,
105159
const double * restrict points_a, size_t size_a,
@@ -114,14 +168,15 @@ epsilon_mult_minmax(const signed char * restrict minmax, dimension_t dim,
114168
// This forces the compiler to generate three specialized versions of the function.
115169
switch (check_all_minimize_maximize(minmax, dim)) {
116170
case AGREE_MINIMISE:
117-
return epsilon_helper_(/* do_mult=*/true, AGREE_MINIMISE, /*minmax=*/NULL, dim, points_a, size_a, points_b, size_b);
171+
return epsilon_mult_agree_min(dim, points_a, size_a, points_b, size_b);
118172
case AGREE_MAXIMISE:
119-
return epsilon_helper_(/* do_mult=*/true, AGREE_MAXIMISE, /*minmax=*/NULL, dim, points_a, size_a, points_b, size_b);
173+
return epsilon_mult_agree_max(dim, points_a, size_a, points_b, size_b);
120174
default:
121-
return epsilon_helper_(/* do_mult=*/true, AGREE_NONE, minmax, dim, points_a, size_a, points_b, size_b);
175+
return epsilon_mult_agree_none(minmax, dim, points_a, size_a, points_b, size_b);
122176
}
123177
}
124178

179+
_attr_optimize_finite_math
125180
static inline double
126181
epsilon_additive_minmax(const signed char * restrict minmax, dimension_t dim,
127182
const double * restrict points_a, size_t size_a,
@@ -130,11 +185,11 @@ epsilon_additive_minmax(const signed char * restrict minmax, dimension_t dim,
130185
// This forces the compiler to generate three specialized versions of the function.
131186
switch (check_all_minimize_maximize(minmax, dim)) {
132187
case AGREE_MINIMISE:
133-
return epsilon_helper_(/* do_mult=*/false, AGREE_MINIMISE, /*minmax=*/NULL, dim, points_a, size_a, points_b, size_b);
188+
return epsilon_addi_agree_min(dim, points_a, size_a, points_b, size_b);
134189
case AGREE_MAXIMISE:
135-
return epsilon_helper_(/* do_mult=*/false, AGREE_MAXIMISE, /*minmax=*/NULL, dim, points_a, size_a, points_b, size_b);
190+
return epsilon_addi_agree_max(dim, points_a, size_a, points_b, size_b);
136191
default:
137-
return epsilon_helper_(/* do_mult=*/false, AGREE_NONE, minmax, dim, points_a, size_a, points_b, size_b);
192+
return epsilon_addi_agree_none(minmax, dim, points_a, size_a, points_b, size_b);
138193
}
139194
}
140195

0 commit comments

Comments
 (0)