Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions Sources/NIOFS/BufferedReader+Split.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the SwiftNIO open source project
//
// Copyright (c) 2023 Apple Inc. and the SwiftNIO project authors
// Licensed under Apache License v2.0
//
// See LICENSE.txt for license information
// See CONTRIBUTORS.txt for the list of SwiftNIO project authors
//
// SPDX-License-Identifier: Apache-2.0
//
//===----------------------------------------------------------------------===//

import NIOCore

@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
extension BufferedReader {
/// Returns the longest possible subsequences of the sequence, in order, that
/// don't contain elements satisfying the given predicate. Elements that are
/// used to split the sequence are not returned as part of any subsequence.
///
/// Usage example:
/// ```swift
/// let myBufferedReader: BufferedReader = ...
/// let whitespace = UInt8(ascii: " ")
/// for try await buffer in myBufferedReader.split(whereSeparator: { $0 == whitespace }) {
/// print("Split by whitespaces!\n", buffer.hexDump(format: .detailed))
/// }
/// ```
///
/// - Parameters:
/// - omittingEmptySubsequences: If `false`, an empty subsequence is
/// returned in the result for each pair of consecutive elements
/// satisfying the `isSeparator` predicate and for each element at the
/// start or end of the sequence satisfying the `isSeparator` predicate.
/// If `true`, only nonempty subsequences are returned. The default
/// value is `true`.
/// - isSeparator: A closure that returns `true` if its argument should be
/// used to split the file's bytes; otherwise, `false`.
/// - Returns: An ``AsyncSequence`` of ``ByteBuffer``s, split from the ``BufferedReader``'s file.
///
/// - Complexity: O(*n*), where *n* is the length of the file.
@inlinable
public consuming func split(
omittingEmptySubsequences: Bool = true,
whereSeparator isSeparator: @Sendable @escaping (UInt8) -> Bool
) -> SplitSequence {
SplitSequence(
reader: self,
omittingEmptySubsequences: omittingEmptySubsequences,
isSeparator: isSeparator
)
}

/// Returns the longest possible subsequences of the sequence, in order,
/// around elements equal to the given element.
///
/// Usage example:
/// ```swift
/// let myBufferedReader: BufferedReader = ...
/// let whitespace = UInt8(ascii: " ")
/// for try await buffer in myBufferedReader.split(separator: whitespace) {
/// print("Split by whitespaces!\n", buffer.hexDump(format: .detailed))
/// }
/// ```
///
/// - Parameters:
/// - separator: The element that should be split upon.
/// - omittingEmptySubsequences: If `false`, an empty subsequence is
/// returned in the result for each consecutive pair of `separator`
/// elements in the sequence and for each instance of `separator` at the
/// start or end of the sequence. If `true`, only nonempty subsequences
/// are returned. The default value is `true`.
/// - Returns: An ``AsyncSequence`` of ``ByteBuffer``s, split from the ``BufferedReader``'s file.
///
/// - Complexity: O(*n*), where *n* is the length of the file.
@inlinable
public consuming func split(
separator: UInt8,
omittingEmptySubsequences: Bool = true
) -> SplitSequence {
self.split(
omittingEmptySubsequences: omittingEmptySubsequences,
whereSeparator: { $0 == separator }
)
}

/// An ``AsyncSequence`` of ``ByteBuffer``s, split from the ``BufferedReader``'s file.
///
/// Use ``BufferedReader/split(omittingEmptySubsequences:whereSeparator:)`` or ``BufferedReader/split(separator:omittingEmptySubsequences:)`` to create an instance of this sequence.
public struct SplitSequence {
var reader: BufferedReader<Handle>
var omittingEmptySubsequences: Bool
var isSeparator: @Sendable (UInt8) -> Bool

@usableFromInline
init(
reader: BufferedReader<Handle>,
omittingEmptySubsequences: Bool,
isSeparator: @Sendable @escaping (UInt8) -> Bool
) {
self.reader = reader
self.omittingEmptySubsequences = omittingEmptySubsequences
self.isSeparator = isSeparator
}
}
}

@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
extension BufferedReader.SplitSequence: AsyncSequence {
/// Returns an iterator over the elements of this sequence.
public func makeAsyncIterator() -> AsyncIterator {
AsyncIterator(base: self)
}

/// An iterator over the elements of this sequence.
public struct AsyncIterator: AsyncIteratorProtocol {
var base: BufferedReader<Handle>.SplitSequence
var ended = false

/// Returns the next element in the sequence, or `nil` if the sequence has ended.
public mutating func next() async throws -> ByteBuffer? {
if self.ended { return nil }

let (buffer, eof) = try await self.base.reader.read(while: {
!self.base.isSeparator($0)
})
if eof {
self.ended = true
} else {
try await self.base.reader.drop(1)
}

if self.base.omittingEmptySubsequences,
buffer.readableBytes == 0
{
return try await self.next()
}

return buffer
}
}
}

@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
extension BufferedReader.SplitSequence: Sendable where BufferedReader: Sendable {}

@available(macOS 10.15, iOS 13.0, watchOS 6.0, tvOS 13.0, *)
extension BufferedReader.SplitSequence.AsyncIterator: Sendable where BufferedReader: Sendable {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These look wrong: I think they need to be when Handle: Sendable.

Copy link
Contributor Author

@MahdiBM MahdiBM Oct 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Emmm both are fine. BufferedReader being Sendable implies Handle being Sendable.
This type stores a BufferedReader, so my preference was to not concern it with Handle.
I can use Handle if you prefer that.

85 changes: 68 additions & 17 deletions Tests/NIOFSIntegrationTests/BufferedReaderTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ final class BufferedReaderTests: XCTestCase {
}
}

func testBufferedReaderReadingText() async throws {
func testBufferedReaderSplitOmittingEmptySubsequences() async throws {
let fs = FileSystem.shared
let path = try await fs.temporaryFilePath()

Expand All @@ -221,32 +221,27 @@ final class BufferedReaderTests: XCTestCase {
}

try await fs.withFileHandle(forReadingAt: path) { file in
var reader = file.bufferedReader()
let reader = file.bufferedReader()
var words = [String]()

func isWordIsh(_ byte: UInt8) -> Bool {
@Sendable func isNotWordIsh(_ byte: UInt8) -> Bool {
switch byte {
case UInt8(ascii: "a")...UInt8(ascii: "z"),
UInt8(ascii: "A")...UInt8(ascii: "Z"),
UInt8(ascii: "'"):
return true
default:
return false
default:
return true
}
}

repeat {
// Gobble up whitespace etc..
try await reader.drop(while: { !isWordIsh($0) })
// Read the next word.
var (characters, _) = try await reader.read(while: isWordIsh(_:))

if characters.readableBytes == 0 {
break // Done.
} else {
words.append(characters.readString(length: characters.readableBytes)!)
}
} while true
// Gobble up whitespace etc..
for try await var characters in reader.split(
omittingEmptySubsequences: true,
whereSeparator: isNotWordIsh(_:)
) {
words.append(characters.readString(length: characters.readableBytes)!)
}

let expected: [String] = [
"Here's",
Expand Down Expand Up @@ -278,4 +273,60 @@ final class BufferedReaderTests: XCTestCase {
XCTAssertEqual(words, expected)
}
}

func testBufferedReaderSplitNotOmittingEmptySubsequences() async throws {
let fs = FileSystem.shared
let path = try await fs.temporaryFilePath()

try await fs.withFileHandle(
forWritingAt: path,
options: .newFile(replaceExisting: false)
) { handle in
let text = " Here's to the crazy ones, the misfits, the rebels, the ones who see things differently. "

var writer = handle.bufferedWriter()
try await writer.write(contentsOf: text.utf8)
try await writer.flush()
}

try await fs.withFileHandle(forReadingAt: path) { file in
let reader = file.bufferedReader()
var words = [String]()

for try await var characters in reader.split(
separator: UInt8(ascii: " "),
omittingEmptySubsequences: false
) {
words.append(characters.readString(length: characters.readableBytes)!)
}

let expected: [String] = [
"",
"",
"Here\'s",
"",
"to",
"the",
"crazy",
"ones,",
"",
"",
"the",
"misfits,",
"the",
"rebels,",
"",
"the",
"ones",
"who",
"see",
"things",
"differently.",
"",
"",
]

XCTAssertEqual(words, expected)
}
}
}