Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions src/iter.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,38 @@ function map(callable $function, $iterable) {
}
}

/**
* Leaves only unique occurrences by using a provided hash function.
*
* If hash function is not provided values of the iterable will be serialized and used for comparison. Using serialize
* as a hash function can require more memory then other more efficient hash functions but it prevents possible
* false positives if there are hash collisions.
*
* @param array|Traversable $iterable Iterable to remove duplicates from
* @param callable|null $hashFunction Hash function that returns the value which will be used to determine
* uniqueness of the element
* @return \Iterator
*/
function unique($iterable, callable $hashFunction = null) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can set here the default value for $hasFunction callable $hashFunction = 'serialize'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, thanks

_assertIterable($iterable, 'First argument');
$hashSet = [];
foreach ($iterable as $key => $value) {
if ($hashFunction === null) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test could be moved out of the foreach loop.

Something like this:

$hashFunction = $hashFunction ?? 'serialize';

$hash = serialize($value);
} else {
$hash = $hashFunction($value);
}

if (isset($hashSet[$hash])) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use array_key_exists() here.

continue;
}

$hashSet[$hash] = '';

yield $key => $value;
}
}

/**
* Applies a mapping function to all keys of an iterator.
*
Expand Down
47 changes: 47 additions & 0 deletions test/iterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,53 @@ public function testMap() {
$this->assertSame([0, 3, 6, 9, 12, 15], toArray($mapped));
}

public function testUniqueWithoutHashFunction() {
$iterable = [1, 2, '2', '2', 3, 4, 4, null, null, 5, '', '', [1], [1], [2]];
$expected = [1, 2, '2', 3, 4, null, 5, '', [1], [2]];
$unique = unique($iterable, null);
$this->assertSame($expected, toArray($unique));
}

public function testUniqueStringsWithHashFunction() {
$iterable = [
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
'Proin tincidunt mollis dui id efficitur. Vivamus vitae tortor vitae velit imperdiet finibus vel eu lacus.',
];
$expected = [
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
'Proin tincidunt mollis dui id efficitur. Vivamus vitae tortor vitae velit imperdiet finibus vel eu lacus.',
];
$unique = unique($iterable, function ($v) {
return crc32($v);
});
$this->assertSame($expected, toArray($unique));
}

public function testUniqueObjectsWithHashFunction() {
$obj1 = new \stdClass();
$obj1->a = 1;
$obj2 = new \stdClass();
$obj2->a = 2;
$iterable = [$obj1, $obj1, $obj2];
$expected = [$obj1, $obj2];
$unique = unique($iterable, function ($v) {
return $v->a;
});
$this->assertSame($expected, toArray($unique));
}

public function testUniqueObjectsWithoutHashFunction() {
$obj1 = new \stdClass();
$obj1->a = 1;
$obj2 = new \stdClass();
$obj2->a = 2;
$iterable = [$obj1, $obj1, $obj2];
$expected = [$obj1, $obj2];
$unique = unique($iterable);
$this->assertSame($expected, toArray($unique));
}

public function testMapKeys() {
$range = range(0, 5);
$mapped = mapKeys(function($n) { return $n * 3; }, $range);
Expand Down