Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
target
Cargo.lock
/src/btree_multiset.rs
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ description = "Multisets/bags"
keywords = ["multiset","bag","data-structure","collection","count"]
license = "MIT/Apache-2.0"
authors = ["Jake Mitchell <jacob.d.mitchell@gmail.com>"]

[dev-dependencies]
quickcheck = "0.9.1"
14 changes: 14 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
fn main() {
let btree_multiset_code = std::fs::read_to_string("./src/hash_multiset.rs")
.expect("Could not open hash_multiset source file")
.replace("Hash + Eq", "Ord")
.replace("Eq + Hash", "Ord")
.replace("hash_map::", "btree_map::")
.replace("HashMap", "BTreeMap")
.replace("HashMultiSet", "BTreeMultiSet")
.replace("use std::hash::Hash;\n", "")
.replace("hash-based multiset", "tree-based multiset");
std::fs::write("./src/btree_multiset.rs", btree_multiset_code.as_bytes())
.expect("Could not write btree_multiset file");
println!("cargo:rerun-if-changed=./src/hash_multiset.rs");
}
49 changes: 6 additions & 43 deletions src/multiset.rs → src/hash_multiset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
// except according to those terms.
#![warn(missing_docs)]

use super::Iter;

use std::borrow::Borrow;
use std::collections::hash_map;
use std::collections::hash_map::{Entry, Keys};
use std::collections::hash_map::{self, Entry, Keys};
use std::collections::HashMap;
use std::fmt;
use std::hash::Hash;
Expand All @@ -26,36 +27,6 @@ where
size: usize,
}

/// An iterator over the items of a `HashMultiSet`.
///
/// This `struct` is created by the [`iter`] method on [`HashMultiSet`].
#[derive(Clone)]
pub struct Iter<'a, K: 'a> {
iter: hash_map::Iter<'a, K, usize>,
duplicate: Option<(&'a K, &'a usize)>,
duplicate_index: usize,
}

impl<'a, K> Iterator for Iter<'a, K> {
type Item = &'a K;

fn next(&mut self) -> Option<&'a K> {
if self.duplicate.is_none() {
self.duplicate = self.iter.next();
}
if let Some((key, count)) = self.duplicate {
self.duplicate_index += 1;
if self.duplicate_index >= *count {
self.duplicate = None;
self.duplicate_index = 0;
}
Some(key)
} else {
None
}
}
}

impl<K> HashMultiSet<K>
where
K: Eq + Hash,
Expand Down Expand Up @@ -94,12 +65,8 @@ where
/// }
/// assert_eq!(3, multiset.iter().count());
/// ```
pub fn iter(&self) -> Iter<K> {
Iter {
iter: self.elem_counts.iter(),
duplicate: None,
duplicate_index: 0,
}
pub fn iter(&self) -> Iter<&K, &usize, hash_map::Iter<K, usize>> {
Iter::new(self.elem_counts.iter(), self.size)
}

/// Returns true if the multiset contains no elements.
Expand All @@ -120,10 +87,6 @@ where

/// Returns `true` if the multiset contains a value.
///
/// The value may be any borrowed form of the set's value type, but
/// [`Hash`] and [`Eq`] on the borrowed form *must* match those for
/// the value type.
///
/// # Examples
///
/// ```
Expand All @@ -136,7 +99,7 @@ where
pub fn contains<Q: ?Sized>(&self, value: &Q) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq,
Q: Eq + Hash,
{
self.elem_counts.contains_key(value)
}
Expand Down
174 changes: 174 additions & 0 deletions src/iter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// Copyright 2019 multiset developers
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![warn(missing_docs)]

use std::borrow::Borrow;
use std::marker::PhantomData;

/// An iterator over the items of a `MultiSet`.
///
/// This `struct` is created by the [`iter`](super::HashMultiSet::iter) method on
/// [`HashMultiSet`](super::HashMultiSet) or [`BTreeMultiSet`](super::BTreeMultiSet).
#[derive(Clone)]
pub struct Iter<K: Clone, V: Borrow<usize>, InnerIter: Iterator<Item = (K, V)>> {
pub(crate) iter: InnerIter,
pub(crate) duplicate: Option<<InnerIter as Iterator>::Item>,
pub(crate) duplicate_index: usize,
pub(crate) duplicate_back: Option<<InnerIter as Iterator>::Item>,
pub(crate) duplicate_index_back: usize,
pub(crate) len: usize,
pub(crate) _ghost: PhantomData<*const (K, V)>,
}

impl<K: Clone, V: Borrow<usize>, InnerIter: Iterator<Item = (K, V)> + ExactSizeIterator>
Iter<K, V, InnerIter>
{
pub(crate) fn new(iter: InnerIter, len: usize) -> Self {
Iter {
iter,
duplicate: None,
duplicate_index: 0,
duplicate_back: None,
duplicate_index_back: 0,
len,
_ghost: PhantomData,
}
}
}

impl<K: Clone, V: Borrow<usize>, InnerIter: Iterator<Item = (K, V)>> Iterator
for Iter<K, V, InnerIter>
{
type Item = K;

fn next(&mut self) -> Option<Self::Item> {
if self.duplicate.is_none() {
self.duplicate = self.iter.next();
}
if let Some((key, count)) = self.duplicate.as_ref() {
self.duplicate_index += 1;
let key = key.clone();
if self.duplicate_index >= *count.borrow() {
self.duplicate = None;
self.duplicate_index = 0;
}
self.len -= 1;
Some(key)
} else {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clippy: this else { if .. } block can be collapsed to else if.

if let Some((key, count)) = self.duplicate_back.as_ref() {
self.duplicate_index_back += 1;
let key = key.clone();
if self.duplicate_index_back >= *count.borrow() {
self.duplicate_back = None;
}
self.len -= 1;
Some(key)
} else {
None
}
}
}

fn count(self) -> usize {
self.len()
}

fn fold<B, F>(self, init: B, mut f: F) -> B
where
F: FnMut(B, Self::Item) -> B,
{
let duplicate_index = self.duplicate_index;
let duplicate_index_back = self.duplicate_index_back;
self.duplicate
.map(move |(val, count)| (val, *count.borrow() - duplicate_index))
.into_iter()
.chain(self.iter.map(move |(val, count)| (val, *count.borrow())))
.chain(
self.duplicate_back
.map(move |(val, count)| (val, *count.borrow() - duplicate_index_back))
.into_iter(),
)
.fold(init, move |acc, (val, count)| {
(0..count).fold(acc, |acc, _| f(acc, val.clone()))
})
}

fn size_hint(&self) -> (usize, Option<usize>) {
let l = self.len();
(l, Some(l))
}
}

impl<K: Clone, V: Borrow<usize>, InnerIter: Iterator<Item = (K, V)>> ExactSizeIterator
for Iter<K, V, InnerIter>
{
fn len(&self) -> usize {
self.len
}
}

impl<K: Clone, V: Borrow<usize>, InnerIter: Iterator<Item = (K, V)> + DoubleEndedIterator>
DoubleEndedIterator for Iter<K, V, InnerIter>
{
fn next_back(&mut self) -> Option<Self::Item> {
if self.duplicate_back.is_none() {
self.duplicate_back = self.iter.next_back();
}
if let Some((key, count)) = self.duplicate_back.as_ref() {
self.duplicate_index_back += 1;
let key = key.clone();
if self.duplicate_index_back >= *count.borrow() {
self.duplicate_back = None;
self.duplicate_index_back = 0;
}
self.len -= 1;
Some(key)
} else {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clippy: this else { if .. } block can be collapsed to else if.

if let Some((key, count)) = self.duplicate.as_ref() {
self.duplicate_index += 1;
let key = key.clone();
if self.duplicate_index >= *count.borrow() {
self.duplicate = None;
}
self.len -= 1;
Some(key)
} else {
None
}
}
}

fn rfold<B, F>(self, init: B, mut f: F) -> B
where
F: FnMut(B, Self::Item) -> B,
{
let duplicate_index = self.duplicate_index;
let duplicate_index_back = self.duplicate_index_back;
self.duplicate_back
.map(move |(val, count)| (val, *count.borrow() - duplicate_index_back))
.into_iter()
.chain(
self.iter
.rev()
.map(move |(val, count)| (val, *count.borrow())),
)
.chain(
self.duplicate
.map(move |(val, count)| (val, *count.borrow() - duplicate_index))
.into_iter(),
)
.fold(init, move |acc, (val, count)| {
(0..count).fold(acc, |acc, _| f(acc, val.clone()))
})
}
}

impl<K: Clone, V: Borrow<usize>, InnerIter: Iterator<Item = (K, V)> + std::iter::FusedIterator>
std::iter::FusedIterator for Iter<K, V, InnerIter>
{
}
8 changes: 6 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
//! permit duplicates. Consequently, they're useful for maintaining a
//! count of distinct values.

mod multiset;
mod btree_multiset;
mod hash_multiset;
mod iter;

pub use multiset::{HashMultiSet, Iter};
pub use btree_multiset::BTreeMultiSet;
pub use hash_multiset::HashMultiSet;
pub use iter::Iter;
96 changes: 96 additions & 0 deletions tests/specializations.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#[macro_use]
extern crate quickcheck;

use std::fmt::Debug;
use std::ops::BitXor;

struct Unspecialized<I>(I);
impl<I> Iterator for Unspecialized<I>
where
I: Iterator,
{
type Item = I::Item;

#[inline(always)]
fn next(&mut self) -> Option<I::Item> {
self.0.next()
}

#[inline(always)]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}

fn check_specialized<'a, V, IterItem, Iter, F>(iterator: &Iter, mapper: F)
where
V: Eq + Debug,
IterItem: 'a,
Iter: Iterator<Item = IterItem> + Clone + 'a,
F: Fn(Box<dyn Iterator<Item = IterItem> + 'a>) -> V,
{
assert_eq!(
mapper(Box::new(Unspecialized(iterator.clone()))),
mapper(Box::new(iterator.clone()))
)
}

fn check_specialized_count_last_nth_sizeh<'a, IterItem, Iter>(
it: &Iter,
known_expected_size: Option<usize>,
) where
IterItem: 'a + Eq + Debug,
Iter: Iterator<Item = IterItem> + Clone + 'a,
{
let size = it.clone().count();
if let Some(expected_size) = known_expected_size {
assert_eq!(size, expected_size);
}
check_specialized(it, |i| i.count());
check_specialized(it, |i| i.last());
for n in 0..size + 2 {
check_specialized(it, |mut i| i.nth(n));
}
let mut it_sh = it.clone();
for n in 0..size + 2 {
let len = it_sh.clone().count();
let (min, max) = it_sh.size_hint();
assert_eq!((size - n.min(size)), len);
assert!(min <= len);
if let Some(max) = max {
assert!(len <= max);
}
it_sh.next();
}
}

fn check_specialized_fold_xor<'a, IterItem, Iter>(it: &Iter)
where
IterItem: 'a
+ BitXor
+ Eq
+ Debug
+ BitXor<<IterItem as BitXor>::Output, Output = <IterItem as BitXor>::Output>
+ Clone,
<IterItem as BitXor>::Output:
BitXor<Output = <IterItem as BitXor>::Output> + Eq + Debug + Clone,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems unnecessary to abstract this much and create so many single use functions. I wrote-up an example of a more specific implementation. (I took the freedom to use a macro for check_specialized.)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comes from an implementation I had originally written for itertools, where I had several different iterators to test, so the functions were not single use. I just reused the same. ^^
Tbh, I hate writing tests, but I'm absolutely open to use whichever test implementation you prefer. :)

Iter: Iterator<Item = IterItem> + Clone + 'a,
{
check_specialized(it, |mut i| {
let first = i.next().map(|f| f.clone() ^ (f.clone() ^ f));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why since x ^ x ^ x == x? How about Default instead? Or even better yet 0i32?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That minimizes the amount of requirements on the generic function. Only output is constrained, so I need to hit a few XORs here to satisfy the type system.
Now again, if you'd rather use another implementation that doesn't try to minimize the constraints when writing the final test (and I agree this particular constraint probably wouldn't change anything), I'm absolutely open to it :)

i.fold(first, |acc, v: IterItem| acc.map(move |a| v ^ a))
});
}

fn hms_test(test_vec: Vec<i32>, known_expected_size: Option<usize>) {
let hms: multiset::HashMultiSet<_> = test_vec.into_iter().collect();
let iter = hms.iter();
check_specialized_count_last_nth_sizeh(&iter, known_expected_size.map(|x| x + 1));
check_specialized_fold_xor(&iter)
}

quickcheck! {
fn hms_test_qc(test_vec: Vec<i32>) -> () {
Copy link
Collaborator

@mashedcode mashedcode Feb 23, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "hms" stand for? hash multi set test? Or does it just test some iterator functionality?

Copy link
Author

@Ten0 Ten0 Feb 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, hms stands for "hash multi set". qc is for quickcheck, a crate that allows to test on randomly generated inputs.

hms_test(test_vec, None)
}
}