10

The following code does not compile:

use std::str::Chars;

struct Chunks {
    remaining: Chars,
}

impl Chunks {
    fn new(s: String) -> Self {
        Chunks {
            remaining: s.chars(),
        }
    }
}

The error is:

error[E0106]: missing lifetime specifier
 --> src/main.rs:4:16
  |
4 |     remaining: Chars,
  |                ^^^^^ expected lifetime parameter

Chars doesn't own the characters it iterates over and it can't outlive the &str or String it was created from.

Is there an owned version of Chars that does not need a lifetime parameter or do I have to keep a Vec<char> and an index myself?

1
  • As an aside: it's likely good to make Chunks generic over the iterator type: struct Chunks<Iter> {...}, impl<Iter: Iterator<Item = char>> Chunks<Iter> {..}, fn new<IntoIter: IntoIterator<Item = char>>(iter: IntoIter) ->Self {...}. Then it doesn't care at all about where the chars come from; it could be owned, borrowed, whatever.
    – chbaker0
    Commented Mar 14, 2023 at 21:28

6 Answers 6

7

There's also the owned-chars crate, which

provides an extension trait for String with two methods, into_chars and into_char_indices. These methods parallel String::chars and String::char_indices, but the iterators they create consume the String instead of borrowing it.

6

std::vec::IntoIter is an owned version of every iterator, in a sense.

use std::vec::IntoIter;

struct Chunks {
    remaining: IntoIter<char>,
}

impl Chunks {
    fn new(s: String) -> Self {
        Chunks {
            remaining: s.chars().collect::<Vec<_>>().into_iter(),
        }
    }
}

Playground link

Downside is additional allocation and a space overhead, but I am not aware of the iterator for your specific case.

5

Ouroboros

You can use the ouroboros crate to create a self-referential struct containing the String and a Chars iterator:

use ouroboros::self_referencing; // 0.4.1
use std::str::Chars;

#[self_referencing]
pub struct IntoChars {
    string: String,
    #[borrows(string)]
    chars: Chars<'this>,
}

// All these implementations are based on what `Chars` implements itself

impl Iterator for IntoChars {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        self.with_mut(|me| me.chars.next())
    }

    #[inline]
    fn count(mut self) -> usize {
        self.with_mut(|me| me.chars.count())
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        self.with(|me| me.chars.size_hint())
    }

    #[inline]
    fn last(mut self) -> Option<Self::Item> {
        self.with_mut(|me| me.chars.last())
    }
}

impl DoubleEndedIterator for IntoChars {
    #[inline]
    fn next_back(&mut self) -> Option<Self::Item> {
        self.with_mut(|me| me.chars.next_back())
    }
}

impl std::iter::FusedIterator for IntoChars {}

// And an extension trait for convenience

trait IntoCharsExt {
    fn into_chars(self) -> IntoChars;
}

impl IntoCharsExt for String {
    fn into_chars(self) -> IntoChars {
        IntoCharsBuilder {
            string: self,
            chars_builder: |s| s.chars(),
        }
        .build()
    }
}

See also:

Rental

You can use the rental crate to create a self-referential struct containing the String and a Chars iterator:

#[macro_use]
extern crate rental;

rental! {
    mod into_chars {
        pub use std::str::Chars;

        #[rental]
        pub struct IntoChars {
            string: String,
            chars: Chars<'string>,
        }
    }
}

use into_chars::IntoChars;

// All these implementations are based on what `Chars` implements itself

impl Iterator for IntoChars {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        self.rent_mut(|chars| chars.next())
    }

    #[inline]
    fn count(mut self) -> usize {
        self.rent_mut(|chars| chars.count())
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        self.rent(|chars| chars.size_hint())
    }

    #[inline]
    fn last(mut self) -> Option<Self::Item> {
        self.rent_mut(|chars| chars.last())
    }
}

impl DoubleEndedIterator for IntoChars {
    #[inline]
    fn next_back(&mut self) -> Option<Self::Item> {
        self.rent_mut(|chars| chars.next_back())
    }
}

impl std::iter::FusedIterator for IntoChars {}

// And an extension trait for convenience 

trait IntoCharsExt {
    fn into_chars(self) -> IntoChars;
}

impl IntoCharsExt for String {
    fn into_chars(self) -> IntoChars {
        IntoChars::new(self, |s| s.chars())
    }
}

See also:

3

Here is a solution without unsafe.

It provides the same effect as s.chars().collect::<Vec<_>>().into_iter(), but without the allocation overhead.

Further, it probably as fast as it's possible to get it. It doesn't reallocate, doesn't iterate repeatedly, it simply steps from character to character, in O(1) for every step, giving you a total iteration of O(n). This is at the same time the lower bound of iterating over anything.

On top of it it isn't self-referential. So this approach is probably what you want, it combines all the advantages of the other answers and doesn't have any drawbacks.

struct OwnedChars {
    s: String,
    index: usize,
}

impl OwnedChars {
    pub fn new(s: String) -> Self {
        Self { s, index: 0 }
    }
}

impl Iterator for OwnedChars {
    type Item = char;

    fn next(&mut self) -> Option<Self::Item> {
        // Slice of leftover characters
        let slice = &self.s[self.index..];

        // Iterator over leftover characters
        let mut chars = slice.chars();

        // Query the next char
        let next_char = chars.next()?;

        // Compute the new index by looking at how many bytes are left
        // after querying the next char
        self.index = self.s.len() - chars.as_str().len();

        // Return next char
        Some(next_char)
    }
}

Together with a little bit of trait magic:

trait StringExt {
    fn into_chars(self) -> OwnedChars;
}
impl StringExt for String {
    fn into_chars(self) -> OwnedChars {
        OwnedChars::new(self)
    }
}

You can do:

struct Chunks {
    remaining: OwnedChars,
}

impl Chunks {
    fn new(s: String) -> Self {
        Chunks {
            remaining: s.into_chars(),
        }
    }
}
2
  • I suppose s.chars().collect::<Vec<_>>().into_iter() does not allocate since it is a special case that is handled by the standard library? Commented Sep 19, 2023 at 6:54
  • @StephenChung I'm not sure. I had the impression that it does allocate. What's the reason you believe it doesn't?
    – Finomnis
    Commented Sep 21, 2023 at 15:42
1

As copied from How can I store a Chars iterator in the same struct as the String it is iterating on?:

use std::mem;
use std::str::Chars;

/// I believe this struct to be safe because the String is
/// heap-allocated (stable address) and will never be modified
/// (stable address). `chars` will not outlive the struct, so
/// lying about the lifetime should be fine.
///
/// TODO: What about during destruction?
///       `Chars` shouldn't have a destructor...
struct OwningChars {
    _s: String,
    chars: Chars<'static>,
}

impl OwningChars {
    fn new(s: String) -> Self {
        let chars = unsafe { mem::transmute(s.chars()) };
        OwningChars { _s: s, chars }
    }
}

impl Iterator for OwningChars {
    type Item = char;
    fn next(&mut self) -> Option<Self::Item> {
        self.chars.next()
    }
}
0

You could implement your own iterator, or wrap Chars like this (with just one small unsafe block):

// deriving Clone would be buggy. With Rc<>/Arc<> instead of Box<> it would work though.
struct OwnedChars {
    // struct fields are dropped in order they are declared,
    // see https://stackoverflow.com/a/41056727/1478356
    // with `Chars` it probably doesn't matter, but for good style `inner`
    // should be dropped before `storage`.

    // 'static lifetime must not "escape" lifetime of the struct
    inner: ::std::str::Chars<'static>,
    // we need to box anyway to be sure the inner reference doesn't move when
    // moving the storage, so we can erase the type as well.
    // struct OwnedChar<S: AsRef<str>> { ..., storage: Box<S> } should work too
    storage: Box<AsRef<str>>,
}

impl OwnedChars {
    pub fn new<S: AsRef<str>+'static>(s: S) -> Self {
        let storage = Box::new(s) as Box<AsRef<str>>;
        let raw_ptr : *const str = storage.as_ref().as_ref();
        let ptr : &'static str = unsafe { &*raw_ptr };
        OwnedChars{
            storage: storage,
            inner: ptr.chars(),
        }
    }

    pub fn as_str(&self) -> &str {
        self.inner.as_str()
    }
}

impl Iterator for OwnedChars {
    // just `char` of course
    type Item = <::std::str::Chars<'static> as Iterator>::Item;

    fn next(&mut self) -> Option<Self::Item> {
        self.inner.next()
    }
}

impl DoubleEndedIterator for OwnedChars {
    fn next_back(&mut self) -> Option<Self::Item> {
        self.inner.next_back()
    }
}

impl Clone for OwnedChars {
    fn clone(&self) -> Self {
        // need a new allocation anyway, so simply go for String, and just
        // clone the remaining string
        OwnedChars::new(String::from(self.inner.as_str()))
    }
}

impl ::std::fmt::Debug for OwnedChars {
    fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
        let storage : &str = self.storage.as_ref().as_ref();
        f.debug_struct("OwnedChars")
            .field("storage", &storage)
            .field("inner", &self.inner)
            .finish()
    }
}

// easy access
trait StringExt {
    fn owned_chars(self) -> OwnedChars;
}
impl<S: AsRef<str>+'static> StringExt for S {
    fn owned_chars(self) -> OwnedChars {
        OwnedChars::new(self)
    }
}

See playground

5
  • 2
    The same thing, but using rental crate. Unfortunately, it doesn't work in playground.
    – red75prime
    Commented Nov 9, 2017 at 10:15
  • Why is the extra box necessary? S can only be String, Box<str> or some other kind of owning str reference, right? So the storage must be heap allocated (if it's not 'static) and therefore won't move until the S is dropped. (As long as OwnedChars doesn't push things on or otherwise trigger a move.)
    – trent
    Commented Nov 9, 2017 at 14:11
  • I could create a string storage type with small-string optimization (see smallvec create).
    – Stefan
    Commented Nov 9, 2017 at 14:22
  • @Stefan Ah, true. But it seems like the normal use for this struct is when you have a String in hand and in that case it's double boxed. Do you think it would be safe to store a Box<str> instead and have new<S: Into<Box<str>>>? That would work for any reference as well as owned Strings, only copies the contents when necessary, and doesn't double-box.
    – trent
    Commented Nov 9, 2017 at 14:51
  • I'm not sure about the allocation overhead of converting String to Box<str> - if it reuses the Vec memory this should be faster, yes. If you know you only want to do this for Strings you can just use that (unboxed) instead too of course - afaict String guarantees heap allocation.
    – Stefan
    Commented Nov 9, 2017 at 15:01

Not the answer you're looking for? Browse other questions tagged or ask your own question.