Skip to content

Commit c9d1234

Browse files
vstinnereendebakpt
andauthored
gh-144995: Optimize memoryview == memoryview (#144996)
Optimize memoryview comparison: a memoryview is equal to itself, there is no need to compare values, except if it uses float format. Benchmark comparing 1 MiB: from timeit import timeit with open("/dev/random", 'br') as fp: data = fp.read(2**20) view = memoryview(data) LOOPS = 1_000 b = timeit('x == x', number=LOOPS, globals={'x': data}) m = timeit('x == x', number=LOOPS, globals={'x': view}) print("bytes %f seconds" % b) print("mview %f seconds" % m) print("=> %f time slower" % (m / b)) Result before the change: bytes 0.000026 seconds mview 1.445791 seconds => 55660.873940 time slower Result after the change: bytes 0.000026 seconds mview 0.000028 seconds => 1.104382 time slower This missed optimization was discovered by Pierre-Yves David while working on Mercurial. Co-authored-by: Pieter Eendebak <pieter.eendebak@gmail.com>
1 parent f1de65b commit c9d1234

File tree

3 files changed

+87
-0
lines changed

3 files changed

+87
-0
lines changed

Lib/test/test_memoryview.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,67 @@ def test_array_assign(self):
575575
m[:] = new_a
576576
self.assertEqual(a, new_a)
577577

578+
def test_compare_equal(self):
579+
# A memoryview is equal to itself: there is no need to compare
580+
# individual values. This is not true for float values since they can
581+
# be NaN, and NaN is not equal to itself.
582+
583+
def check_equal(view, is_equal):
584+
self.assertEqual(view == view, is_equal)
585+
self.assertEqual(view != view, not is_equal)
586+
587+
# Comparison with a different memoryview doesn't use
588+
# the optimization and should give the same result.
589+
view2 = memoryview(view)
590+
self.assertEqual(view2 == view, is_equal)
591+
self.assertEqual(view2 != view2, not is_equal)
592+
593+
# Test integer formats
594+
for int_format in 'bBhHiIlLqQ':
595+
with self.subTest(format=int_format):
596+
a = array.array(int_format, [1, 2, 3])
597+
m = memoryview(a)
598+
check_equal(m, True)
599+
600+
if int_format in 'bB':
601+
m2 = m.cast('@' + m.format)
602+
check_equal(m2, True)
603+
604+
# Test 'c' format
605+
a = array.array('B', [1, 2, 3])
606+
m = memoryview(a.tobytes()).cast('c')
607+
check_equal(m, True)
608+
609+
# Test 'n' and 'N' formats
610+
if struct.calcsize('L') == struct.calcsize('N'):
611+
int_format = 'L'
612+
elif struct.calcsize('Q') == struct.calcsize('N'):
613+
int_format = 'Q'
614+
else:
615+
int_format = None
616+
if int_format:
617+
a = array.array(int_format, [1, 2, 3])
618+
m = memoryview(a.tobytes()).cast('N')
619+
check_equal(m, True)
620+
m = memoryview(a.tobytes()).cast('n')
621+
check_equal(m, True)
622+
623+
# Test '?' format
624+
m = memoryview(b'\0\1\2').cast('?')
625+
check_equal(m, True)
626+
627+
# Test float formats
628+
for float_format in 'fd':
629+
with self.subTest(format=float_format):
630+
a = array.array(float_format, [1.0, 2.0, float('nan')])
631+
m = memoryview(a)
632+
# nan is not equal to nan
633+
check_equal(m, False)
634+
635+
a = array.array(float_format, [1.0, 2.0, 3.0])
636+
m = memoryview(a)
637+
check_equal(m, True)
638+
578639

579640
class BytesMemorySliceTest(unittest.TestCase,
580641
BaseMemorySliceTests, BaseBytesMemoryTests):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Optimize :class:`memoryview` comparison: a :class:`memoryview` is equal to
2+
itself, there is no need to compare values. Patch by Victor Stinner.

Objects/memoryobject.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3122,6 +3122,30 @@ memory_richcompare(PyObject *v, PyObject *w, int op)
31223122
}
31233123
vv = VIEW_ADDR(v);
31243124

3125+
// For formats supported by the struct module a memoryview is equal to
3126+
// itself: there is no need to compare individual values.
3127+
// This is not true for float values since they can be NaN, and NaN
3128+
// is not equal to itself. So only use this optimization on format known to
3129+
// not use floats.
3130+
if (v == w) {
3131+
const char *format = vv->format;
3132+
if (format != NULL) {
3133+
if (*format == '@') {
3134+
format++;
3135+
}
3136+
// Include only formats known by struct, exclude float formats
3137+
// "d" (double), "f" (float) and "e" (16-bit float).
3138+
// Do not optimize "P" format.
3139+
if (format[0] != 0
3140+
&& strchr("bBchHiIlLnNqQ?", format[0]) != NULL
3141+
&& format[1] == 0)
3142+
{
3143+
equal = 1;
3144+
goto result;
3145+
}
3146+
}
3147+
}
3148+
31253149
if (PyMemoryView_Check(w)) {
31263150
if (BASE_INACCESSIBLE(w)) {
31273151
equal = (v == w);

0 commit comments

Comments
 (0)