apple / swift-algorithms

Commonly used sequence and collection algorithms for Swift
Apache License 2.0
5.93k stars 440 forks source link

Merging Two Sorted Sequences, Attempt 3 #236

Open CTMacUser opened 1 month ago

CTMacUser commented 1 month ago

Description

This is a successor to #184, which succeeded #43. Should fix #192.

This library adapts the C++ functions merge and inplace_merge.

This library also adapts the C++ functions set_union, set_intersection, set_difference, and set_symmetric_difference. (They share the same internal implementation as merge.)

The main changes are:

Detailed Design

The library composes of:

extension MutableCollection {
    /// Given a partition point,
    /// where each side is sorted according to the given predicate,
    /// rearrange the elements until a single sorted run is formed.
    public mutating func mergeSortedPartitions(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows
}

extension MutableCollection where Self.Element : Comparable {
    /// Given a partition point, where each side is sorted,
    /// rearrange the elements until a single sorted run is formed.
    @inlinable public mutating func mergeSortedPartitions(across pivot: Index)
}

extension MutableCollection where Self : BidirectionalCollection {
    /// Given a partition point,
    /// where each side is sorted according to the given predicate,
    /// rearrange the elements until a single sorted run is formed,
    /// using minimal scratch memory.
    public mutating func mergeSortedPartitionsInPlace(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows
}

extension MutableCollection where Self : BidirectionalCollection, Self.Element : Comparable {
    /// Given a partition point, where each side is sorted,
    /// rearrange the elements until a single sorted run is formed,
    /// using minimal scratch memory.
    @inlinable public mutating func mergeSortedPartitionsInPlace(across pivot: Index)
}

/// Description of which elements of a merger will be retained.
public enum MergerSubset : UInt, CaseIterable {
    case none, firstWithoutSecond, secondWithoutFirst, symmetricDifference,
         intersection, first, second, union,
         sum
}

extension MergerSubset {
    /// Whether the elements exclusive to the first source are emitted.
    @inlinable public var emitsExclusivesToFirst: Bool { get }
    /// Whether the elements exclusive to the second source are emitted.
    @inlinable public var emitsExclusivesToSecond: Bool { get }
    /// Whether the elements shared by both sources are emitted.
    @inlinable public var emitsSharedElements: Bool { get }

    /// Create a filter specifying a full merge (duplicating the shared elements).
    @inlinable public init()
    /// Create a filter specifying which categories of elements are included in
    /// the merger, with shared elements consolidated.
    public init(keepExclusivesToFirst: Bool, keepExclusivesToSecond: Bool, keepSharedElements: Bool)
}

extension RangeReplaceableCollection {
    /// Given two sequences that are both sorted according to the given predicate,
    /// treat them as sets, and create the sorted result of the given set
    /// operation.
    public init<T, U>(mergeSorted first: T, and second: U, retaining filter: MergerSubset = .sum, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element
}

extension RangeReplaceableCollection where Self.Element : Comparable {
    /// Given two sorted sequences, treat them as sets, and create the sorted
    /// result of the given set operation.
    @inlinable public init<T, U>(mergeSorted first: T, and second: U, retaining filter: MergerSubset = .sum) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element
}

/// Given two sequences that are both sorted according to the given predicate
/// and treated as sets, apply the given set operation, returning the result as
/// a lazy sequence also sorted by the same predicate.
public func mergeSorted<T, U>(_ first: T, _ second: U, retaining filter: MergerSubset = .sum, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSequence<LazySequence<T>, LazySequence<U>> where T : Sequence, U : Sequence, T.Element == U.Element

/// Given two sorted sequences treated as sets, apply the given set operation,
/// returning the result as a sorted lazy sequence.
@inlinable public func mergeSorted<T, U>(_ first: T, _ second: U, retaining filter: MergerSubset = .sum) -> MergeSortedSequence<LazySequence<T>, LazySequence<U>> where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element

/// A sequence that lazily vends the sorted result of a set operation upon
/// two sorted sequences treated as sets spliced together, using a predicate as
/// the sorting criteria for all three sequences involved.
public struct MergeSortedSequence<First, Second>
where First : Sequence, Second : Sequence, First.Element == Second.Element
{ /*...*/ }

extension MergeSortedSequence : Sequence { /*...*/ }

extension MergeSortedSequence : LazySequenceProtocol {}

/// An iterator that applies a set operation on two virtual sequences,
/// both treated as sets sorted according a predicate, spliced together to
/// vend a virtual sequence that is also sorted.
public struct MergeSortedIterator<First, Second>
where First : IteratorProtocol, Second : IteratorProtocol, First.Element == Second.Element
{ /*...*/ }

extension MergeSortedIterator : IteratorProtocol { /*...*/ }

Documentation Plan

A simple breakdown list and a guide have been provided.

Test Plan

A test file has been provided.

Source Impact

The changes should be additive.

Checklist