From 1fd4a15e938a743c9afe59631535c7292a135e1b Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Fri, 15 Apr 2016 12:38:07 -0400 Subject: [PATCH 1/5] `FusedIterator` marker trait and `iter::Fuse` specialization This RFC adds a `FusedIterator` marker trait and specializes `iter::Fuse` to do nothing when the underlying iterator already provides the `Fuse` guarantee. --- text/0000-fused-iterator.md | 247 ++++++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 text/0000-fused-iterator.md diff --git a/text/0000-fused-iterator.md b/text/0000-fused-iterator.md new file mode 100644 index 00000000000..4a57bde77c1 --- /dev/null +++ b/text/0000-fused-iterator.md @@ -0,0 +1,247 @@ +- Feature Name: fused +- Start Date: 2016-04-15 +- RFC PR: (leave this empty) +- Rust Issue: (leave this empty) + +# Summary +[summary]: #summary + +Add a marker trait `FusedIterator` to `std::iter` and implement it on `Fuse` and +applicable iterators and adapters. By implementing `FusedIterator`, an iterator +promises to behave as if `Iterator::fuse()` had been called on it (i.e. return +`None` forever after returning `None` once). Then, specialize `Fuse` to be a +no-op iff `I` implements `FusedIterator`. + +# Motivation +[motivation]: #motivation + +Iterators are allowed to return whatever they want after returning `None` once. +However, assuming that an iterator continues to return `None` can make +implementing some algorithms/adapters easier. Therefore, `Fused` and +`Iterator::fuse` exist. Unfortunately, the `Fused` iterator adapter introduces a +noticeable overhead. Furthermore, many iterators (most if not all iterators in +std) already act as if they were fused (this is considered to be the "polite" +behavior). Therefore, it would be nice to be able to pay the `Fused` overhead +iff necessary. + +Microbenchmarks: + +```text +test fuse ... bench: 200 ns/iter (+/- 13) +test fuse_fuse ... bench: 250 ns/iter (+/- 10) +test myfuse ... bench: 48 ns/iter (+/- 4) +test myfuse_myfuse ... bench: 48 ns/iter (+/- 3) +test range ... bench: 48 ns/iter (+/- 2) +``` + +```rust +#![feature(test, specialization)] +extern crate test; + +use std::ops::Range; + +#[derive(Clone, Debug)] +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub struct MyFuse { + iter: I, + done: bool +} + +pub trait Fused: Iterator {} + +trait IterExt: Iterator + Sized { + fn myfuse(self) -> MyFuse { + MyFuse { + iter: self, + done: false, + } + } +} + +impl Fused for MyFuse where MyFuse: Iterator {} +impl Fused for Range where Range: Iterator {} + +impl IterExt for T {} + +impl Iterator for MyFuse where I: Iterator { + type Item = ::Item; + + #[inline] + default fn next(&mut self) -> Option<::Item> { + if self.done { + None + } else { + let next = self.iter.next(); + self.done = next.is_none(); + next + } + } +} + +impl Iterator for MyFuse where I: Iterator + Fused { + #[inline] + fn next(&mut self) -> Option<::Item> { + self.iter.next() + } +} + +impl ExactSizeIterator for MyFuse where I: ExactSizeIterator {} + +#[bench] +fn myfuse(b: &mut test::Bencher) { + b.iter(|| { + for i in (0..100).myfuse() { + test::black_box(i); + } + }) +} + +#[bench] +fn myfuse_myfuse(b: &mut test::Bencher) { + b.iter(|| { + for i in (0..100).myfuse().myfuse() { + test::black_box(i); + } + }); +} + + +#[bench] +fn fuse(b: &mut test::Bencher) { + b.iter(|| { + for i in (0..100).fuse() { + test::black_box(i); + } + }) +} + +#[bench] +fn fuse_fuse(b: &mut test::Bencher) { + b.iter(|| { + for i in (0..100).fuse().fuse() { + test::black_box(i); + } + }); +} + +#[bench] +fn range(b: &mut test::Bencher) { + b.iter(|| { + for i in (0..100) { + test::black_box(i); + } + }) +} +``` + +# Detailed Design +[design]: #detailed-design + +``` +trait FusedIterator: Iterator {} + +impl FusedIterator for Fuse {} + +impl FusedIterator for Range {} +// ...and for most std/core iterators... + + +// Existing implementation of Fuse repeated for convenience +pub struct Fuse { + iterator: I, + done: bool, +} + +impl Iterator for Fuse where I: Iterator { + type Item = I::Item; + + #[inline] + fn next(&mut self) -> Self::Item { + if self.done { + None + } else { + let next = self.iterator.next(); + self.done = next.is_none(); + next + } + } +} + +// Then, specialize Fuse... +impl Iterator for Fuse where I: FusedIterator { + type Item = I::Item; + + #[inline] + fn next(&mut self) -> Self::Item { + // Ignore the done flag and pass through. + // Note: this means that the done flag should *never* be exposed to the + // user. + self.iterator.next() + } +} + +``` + +# Drawbacks +[drawbacks]: #drawbacks + +1. Yet another special iterator trait. +2. There is a useless done flag on no-op `Fuse` adapters. +3. Fuse isn't used very often anyways. However, I would argue that it should be + used more often and people are just playing fast and loose. I'm hoping that + making `Fuse` free when unneeded will encourage people to use it when they should. + +# Alternatives + +## Do Nothing + +Just pay the overhead on the rare occasions when fused is actually used. + +## Associated Type + +Use an associated type (and set it to `Self` for iterators that already provide +the fused guarantee) and an `IntoFused` trait: + +```rust +#![feature(specialization)] +use std::iter::Fuse; + +trait FusedIterator: Iterator {} + +trait IntoFused: Iterator + Sized { + type Fused: Iterator; + fn into_fused(self) -> Self::Fused; +} + +impl IntoFused for T where T: Iterator { + default type Fused = Fuse; + default fn into_fused(self) -> Self::Fused { + // Currently complains about a mismatched type but I think that's a + // specialization bug. + self.fuse() + } +} + +impl IntoFused for T where T: FusedIterator { + type Fused = Self; + + fn into_fused(self) -> Self::Fused { + self + } +} +``` + +For now, this doesn't actually compile because rust believes that the associated +type `Fused` could be specialized independent of the `into_fuse` function. + +While this method gets rid of memory overhead of a no-op `Fuse` wrapper, it adds +complexity, needs to be implemented as a separate trait (because adding +associated types is a breaking change), and can't be used to optimize the +iterators returned from `Iterator::fuse` (users would *have* to call +`IntoFused::into_fused`). + +# Unresolved questions +[unresolved]: #unresolved-questions + +Should this trait be unsafe? I can't think of any way generic unsafe code could +end up relying on the guarantees of `Fused`. From 4b24c5d541b843110e339223c582b0cb41653012 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Fri, 15 Apr 2016 15:09:56 -0400 Subject: [PATCH 2/5] note associated type alternative --- text/0000-fused-iterator.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/text/0000-fused-iterator.md b/text/0000-fused-iterator.md index 4a57bde77c1..c7a8570c3a8 100644 --- a/text/0000-fused-iterator.md +++ b/text/0000-fused-iterator.md @@ -197,7 +197,7 @@ impl Iterator for Fuse where I: FusedIterator { Just pay the overhead on the rare occasions when fused is actually used. -## Associated Type +## IntoFused Use an associated type (and set it to `Self` for iterators that already provide the fused guarantee) and an `IntoFused` trait: @@ -240,6 +240,30 @@ associated types is a breaking change), and can't be used to optimize the iterators returned from `Iterator::fuse` (users would *have* to call `IntoFused::into_fused`). +## Associated Type + +If we add the ability to condition associated types on `Self: Sized`, I believe +we can add them without it being a breaking change (associated types only need +to be fully specified on DSTs). If so (after fixing the bug in specialization +noted above), we could do the following: + +```rust +trait Iterator { + type Item; + type Fuse: Iterator where Self: Sized = Fuse; + fn fuse(self) -> Self::Fuse where Self: Sized { + Fuse { + done: false, + iter: self, + } + } + // ... +} +``` + +However, changing an iterator to take advantage of this would be a breaking +change. + # Unresolved questions [unresolved]: #unresolved-questions From 2f7a03010fd31d5c18bbd3835cb79cd9a8b72819 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Fri, 15 Apr 2016 15:20:56 -0400 Subject: [PATCH 3/5] remove potentially confusing iffs --- text/0000-fused-iterator.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text/0000-fused-iterator.md b/text/0000-fused-iterator.md index c7a8570c3a8..88093c97f3c 100644 --- a/text/0000-fused-iterator.md +++ b/text/0000-fused-iterator.md @@ -10,7 +10,7 @@ Add a marker trait `FusedIterator` to `std::iter` and implement it on `Fuse` applicable iterators and adapters. By implementing `FusedIterator`, an iterator promises to behave as if `Iterator::fuse()` had been called on it (i.e. return `None` forever after returning `None` once). Then, specialize `Fuse` to be a -no-op iff `I` implements `FusedIterator`. +no-op if `I` implements `FusedIterator`. # Motivation [motivation]: #motivation @@ -22,7 +22,7 @@ implementing some algorithms/adapters easier. Therefore, `Fused` and noticeable overhead. Furthermore, many iterators (most if not all iterators in std) already act as if they were fused (this is considered to be the "polite" behavior). Therefore, it would be nice to be able to pay the `Fused` overhead -iff necessary. +only when necessary. Microbenchmarks: From 055c5005ddfad70c51ed5ca52ed4c4533c00c252 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 27 Jul 2016 10:38:38 -0400 Subject: [PATCH 4/5] Fix type names in motivation/microbenchmark. --- text/0000-fused-iterator.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/text/0000-fused-iterator.md b/text/0000-fused-iterator.md index 88093c97f3c..a6a5d3553a9 100644 --- a/text/0000-fused-iterator.md +++ b/text/0000-fused-iterator.md @@ -17,11 +17,11 @@ no-op if `I` implements `FusedIterator`. Iterators are allowed to return whatever they want after returning `None` once. However, assuming that an iterator continues to return `None` can make -implementing some algorithms/adapters easier. Therefore, `Fused` and -`Iterator::fuse` exist. Unfortunately, the `Fused` iterator adapter introduces a +implementing some algorithms/adapters easier. Therefore, `Fuse` and +`Iterator::fuse` exist. Unfortunately, the `Fuse` iterator adapter introduces a noticeable overhead. Furthermore, many iterators (most if not all iterators in std) already act as if they were fused (this is considered to be the "polite" -behavior). Therefore, it would be nice to be able to pay the `Fused` overhead +behavior). Therefore, it would be nice to be able to pay the `Fuse` overhead only when necessary. Microbenchmarks: @@ -42,28 +42,28 @@ use std::ops::Range; #[derive(Clone, Debug)] #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -pub struct MyFuse { +pub struct Fuse { iter: I, done: bool } -pub trait Fused: Iterator {} +pub trait FusedIterator: Iterator {} trait IterExt: Iterator + Sized { - fn myfuse(self) -> MyFuse { - MyFuse { + fn myfuse(self) -> Fuse { + Fuse { iter: self, done: false, } } } -impl Fused for MyFuse where MyFuse: Iterator {} -impl Fused for Range where Range: Iterator {} +impl FusedIterator for Fuse where Fuse: Iterator {} +impl FusedIterator for Range where Range: Iterator {} impl IterExt for T {} -impl Iterator for MyFuse where I: Iterator { +impl Iterator for Fuse where I: Iterator { type Item = ::Item; #[inline] @@ -78,14 +78,14 @@ impl Iterator for MyFuse where I: Iterator { } } -impl Iterator for MyFuse where I: Iterator + Fused { +impl Iterator for Fuse where I: FusedIterator { #[inline] fn next(&mut self) -> Option<::Item> { self.iter.next() } } -impl ExactSizeIterator for MyFuse where I: ExactSizeIterator {} +impl ExactSizeIterator for Fuse where I: ExactSizeIterator {} #[bench] fn myfuse(b: &mut test::Bencher) { From c697b6a11392acd8a4ad01d00447867d8968c535 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 27 Jul 2016 10:42:19 -0400 Subject: [PATCH 5/5] Add unresolved question about removal of the Fuse::done unnecessary field. --- text/0000-fused-iterator.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/text/0000-fused-iterator.md b/text/0000-fused-iterator.md index a6a5d3553a9..f1d6093245a 100644 --- a/text/0000-fused-iterator.md +++ b/text/0000-fused-iterator.md @@ -269,3 +269,8 @@ change. Should this trait be unsafe? I can't think of any way generic unsafe code could end up relying on the guarantees of `Fused`. + +Also, it's possible to implement the specialized `Fuse` struct without a useless +`don` bool. Unfortunately, it's *very* messy. IMO, this is not worth it for now +and can always be fixed in the future as it doesn't change the `FusedIterator` +trait.