From e4d21e4b6f6e3d5265534f4a18ea79bd8a366394 Mon Sep 17 00:00:00 2001 From: Alexander Kartavenko Date: Wed, 22 Nov 2017 16:07:56 +0200 Subject: [PATCH 1/4] Closes #37. Add \iter\unique which returns an iterator with unique values. --- src/iter.php | 35 +++++++++++++++++++++++++++++++++++ test/iterTest.php | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/src/iter.php b/src/iter.php index 52e0cef..d452161 100644 --- a/src/iter.php +++ b/src/iter.php @@ -85,6 +85,41 @@ function map(callable $function, $iterable) { } } +/** + * Leaves only unique occurrences by using a provided hash function. + * + * If hash function is not provided values of the iterable will be used for comparison. Storing values instead of hashes + * can require more memory but it prevents possible false positives if there are hash collisions. + * + * @param array|Traversable $iterable Iterable to remove duplicates from + * @param callable|null $hashFunction Hash function that returns the value which will be used to determine + * uniqueness of the element + * @param bool $strict If is set to true the types of the values from hash function will also be checked + * @return \Iterator + */ +function unique($iterable, callable $hashFunction = null, $strict = false) { + _assertIterable($iterable, 'First argument'); + + $hashSet = []; + + foreach ($iterable as $key => $value) { + + if ($hashFunction === null) { + $hash = $value; + } else { + $hash = $hashFunction($value); + } + + if (\in_array($hash, $hashSet, $strict)) { + continue; + } + + $hashSet[] = $hash; + + yield $key => $value; + } +} + /** * Applies a mapping function to all keys of an iterator. * diff --git a/test/iterTest.php b/test/iterTest.php index bed5133..f068fe3 100644 --- a/test/iterTest.php +++ b/test/iterTest.php @@ -44,6 +44,49 @@ public function testMap() { $this->assertSame([0, 3, 6, 9, 12, 15], toArray($mapped)); } + public function testUniqueWithoutHashFunction() { + $iterable = [1, 2, '2', '2', 3, 4, 4, null, null, 5, '', '', [1], [1], [2]]; + $expected = [1, 2, '2', 3, 4, null, 5, '', [1], [2]]; + $unique = unique($iterable, null, true); + $this->assertSame($expected, toArray($unique)); + } + + public function testUniqueWithoutHashFunctionNotStrict() { + $iterable = [null, '', 0, 1, '1', 1.0, true]; + $expected = [null, 1]; + $unique = unique($iterable, null); + $this->assertSame($expected, toArray($unique)); + } + + public function testUniqueStringsWithHashFunction() { + $iterable = [ + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', + 'Proin tincidunt mollis dui id efficitur. Vivamus vitae tortor vitae velit imperdiet finibus vel eu lacus.', + ]; + $expected = [ + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', + 'Proin tincidunt mollis dui id efficitur. Vivamus vitae tortor vitae velit imperdiet finibus vel eu lacus.', + ]; + $unique = unique($iterable, function ($v) { + return crc32($v); + }, true); + $this->assertSame($expected, toArray($unique)); + } + + public function testUniqueObjectsWithHashFunction() { + $obj1 = new \stdClass(); + $obj1->a = 1; + $obj2 = new \stdClass(); + $obj2->a = 2; + $iterable = [$obj1, $obj1, $obj2]; + $expected = [$obj1, $obj2]; + $unique = unique($iterable, function ($v) { + return $v->a; + }, true); + $this->assertSame($expected, toArray($unique)); + } + public function testMapKeys() { $range = range(0, 5); $mapped = mapKeys(function($n) { return $n * 3; }, $range); From 94315091b24c4d96214f14d0d1433e89cd2962ef Mon Sep 17 00:00:00 2001 From: Alexander Kartavenko Date: Wed, 22 Nov 2017 21:06:24 +0200 Subject: [PATCH 2/4] Change lookup method to a key based as it is more efficient. --- src/iter.php | 12 ++++-------- test/iterTest.php | 7 ------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/src/iter.php b/src/iter.php index d452161..18e520c 100644 --- a/src/iter.php +++ b/src/iter.php @@ -94,27 +94,23 @@ function map(callable $function, $iterable) { * @param array|Traversable $iterable Iterable to remove duplicates from * @param callable|null $hashFunction Hash function that returns the value which will be used to determine * uniqueness of the element - * @param bool $strict If is set to true the types of the values from hash function will also be checked * @return \Iterator */ -function unique($iterable, callable $hashFunction = null, $strict = false) { +function unique($iterable, callable $hashFunction = null) { _assertIterable($iterable, 'First argument'); - $hashSet = []; - foreach ($iterable as $key => $value) { - if ($hashFunction === null) { - $hash = $value; + $hash = serialize($value); } else { $hash = $hashFunction($value); } - if (\in_array($hash, $hashSet, $strict)) { + if (isset($hashSet[$hash])) { continue; } - $hashSet[] = $hash; + $hashSet[$hash] = ''; yield $key => $value; } diff --git a/test/iterTest.php b/test/iterTest.php index f068fe3..617576e 100644 --- a/test/iterTest.php +++ b/test/iterTest.php @@ -51,13 +51,6 @@ public function testUniqueWithoutHashFunction() { $this->assertSame($expected, toArray($unique)); } - public function testUniqueWithoutHashFunctionNotStrict() { - $iterable = [null, '', 0, 1, '1', 1.0, true]; - $expected = [null, 1]; - $unique = unique($iterable, null); - $this->assertSame($expected, toArray($unique)); - } - public function testUniqueStringsWithHashFunction() { $iterable = [ 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', From 2e64cde0cc723a9a5c903bb04e7640f3d06febcb Mon Sep 17 00:00:00 2001 From: Alexander Kartavenko Date: Wed, 22 Nov 2017 21:21:35 +0200 Subject: [PATCH 3/4] Change comments. --- src/iter.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/iter.php b/src/iter.php index 18e520c..c6e9e0f 100644 --- a/src/iter.php +++ b/src/iter.php @@ -88,8 +88,9 @@ function map(callable $function, $iterable) { /** * Leaves only unique occurrences by using a provided hash function. * - * If hash function is not provided values of the iterable will be used for comparison. Storing values instead of hashes - * can require more memory but it prevents possible false positives if there are hash collisions. + * If hash function is not provided values of the iterable will be serialized and used for comparison. Using serialize + * as a hash function can require more memory then other more efficient hash functions but it prevents possible + * false positives if there are hash collisions. * * @param array|Traversable $iterable Iterable to remove duplicates from * @param callable|null $hashFunction Hash function that returns the value which will be used to determine From 8f5202d0cc95ce063431d66593b77ce5c8dbb416 Mon Sep 17 00:00:00 2001 From: Alexander Kartavenko Date: Wed, 22 Nov 2017 21:31:18 +0200 Subject: [PATCH 4/4] Add more tests. --- test/iterTest.php | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/test/iterTest.php b/test/iterTest.php index 617576e..a8307a6 100644 --- a/test/iterTest.php +++ b/test/iterTest.php @@ -47,7 +47,7 @@ public function testMap() { public function testUniqueWithoutHashFunction() { $iterable = [1, 2, '2', '2', 3, 4, 4, null, null, 5, '', '', [1], [1], [2]]; $expected = [1, 2, '2', 3, 4, null, 5, '', [1], [2]]; - $unique = unique($iterable, null, true); + $unique = unique($iterable, null); $this->assertSame($expected, toArray($unique)); } @@ -63,7 +63,7 @@ public function testUniqueStringsWithHashFunction() { ]; $unique = unique($iterable, function ($v) { return crc32($v); - }, true); + }); $this->assertSame($expected, toArray($unique)); } @@ -76,7 +76,18 @@ public function testUniqueObjectsWithHashFunction() { $expected = [$obj1, $obj2]; $unique = unique($iterable, function ($v) { return $v->a; - }, true); + }); + $this->assertSame($expected, toArray($unique)); + } + + public function testUniqueObjectsWithoutHashFunction() { + $obj1 = new \stdClass(); + $obj1->a = 1; + $obj2 = new \stdClass(); + $obj2->a = 2; + $iterable = [$obj1, $obj1, $obj2]; + $expected = [$obj1, $obj2]; + $unique = unique($iterable); $this->assertSame($expected, toArray($unique)); }