swiftlang / swift

The Swift Programming Language
https://swift.org
Apache License 2.0
67.3k stars 10.34k forks source link

[SR-7637] Using ContiguousArray is sometimes slower than using Array #50178

Open jepers opened 6 years ago

jepers commented 6 years ago
Previous ID SR-7637
Radar None
Original Reporter @jepers
Type Bug
Environment Xcode 9.3, default toolchain and recent dev snapshot
Additional Detail from JIRA | | | |------------------|-----------------| |Votes | 0 | |Component/s | Compiler, Standard Library | |Labels | Bug, Performance | |Assignee | None | |Priority | Medium | md5: af2fb06a06f3943fac4c6c833fdc3ab8

Issue Description:

Related discussion: https://forums.swift.org/t/execution-time-contiguousarray-vs-array/12467

Note how alternativeInnerLoop true/false makes a big difference for the execution time when A = ContiguousArray, but not when A = Array.

// Note how alternativeInnerLoop true/false makes a big difference for the
// execution time when A = ContiguousArray, but not when A = Array.

import Foundation

//typealias A = Array<Bool>
typealias A = ContiguousArray<Bool>

func forSieveOfEratosthenes(limit: Int,
                            alternativeInnerLoop: Bool) -> [Int] {
    var possibles = A(repeating: true, count: limit + 1)
    var result = [Int]()
    for i in 2 ... limit {
        guard possibles[i] else { continue }
        if alternativeInnerLoop {
            var j = i * i
            while j <= limit {
                possibles[j] = false
                j += i
            }
        } else {
            for j in stride(from: i * i, through: limit, by: i) {
                possibles[j] = false
            }
        }
        result.append(i)
    }
    return result
}

func test(alternativeInnerLoop ail: Bool) {
    print("  alternativeInnerLoop = \(ail)")
    for _ in 0 ..< 5 {
        let start = Date()
        let result = forSieveOfEratosthenes(limit: 1_000_000,
                                            alternativeInnerLoop: ail)
        let end = Date()
        print("    biggest:", result.last ?? 0,
              "time: ", end.timeIntervalSince(start))
    }
}

print("Using", String(describing: A.self))
test(alternativeInnerLoop: false)
test(alternativeInnerLoop: true)

Here's the output on my MBP, Xcode 9.3, optimized build, can reproduce the issue using both default toolchain and recent dev snapshot:

Using Array<Bool>
  alternativeInnerLoop = false
    biggest: 999983 time:  0.00781095027923584
    biggest: 999983 time:  0.00577998161315918
    biggest: 999983 time:  0.005715012550354
    biggest: 999983 time:  0.00546598434448242
    biggest: 999983 time:  0.00544905662536621
  alternativeInnerLoop = true
    biggest: 999983 time:  0.00532197952270508
    biggest: 999983 time:  0.00527501106262207
    biggest: 999983 time:  0.00532591342926025
    biggest: 999983 time:  0.00568902492523193
    biggest: 999983 time:  0.00555896759033203

Using ContiguousArray<Bool>
  alternativeInnerLoop = false
    biggest: 999983 time:  0.0161910057067871 <----
    biggest: 999983 time:  0.0147560834884644 <----
    biggest: 999983 time:  0.0145959854125977 <----
    biggest: 999983 time:  0.0141019821166992 <----
    biggest: 999983 time:  0.0143810510635376 <----
  alternativeInnerLoop = true
    biggest: 999983 time:  0.00555908679962158
    biggest: 999983 time:  0.00570595264434814
    biggest: 999983 time:  0.00537300109863281
    biggest: 999983 time:  0.00555789470672607
    biggest: 999983 time:  0.00564098358154297
belkadan commented 6 years ago

cc @eeckstein

swift-ci commented 6 years ago

Comment by Howard Lovatt (JIRA)

In 4.2 the original problem goes away but it now doesn't work for lazy. EG:

import Foundation

protocol Possibles {
    init(repeating: Bool, count: Int)
    subscript(index: Int) -> Bool { get set }
    var count: Int { get }
}
extension Array: Possibles where Element == Bool {}
extension ContiguousArray: Possibles where Element == Bool {}

func lazySieveOfEratosthenes<Storage: Possibles>(makeStorage: () -> Storage) -> [Int] {
    var possibles = makeStorage()
    let limit = possibles.count - 1
    return (2 ... limit).lazy.filter { i in // Lazy, so that `possibles` is updated before it is used.
        possibles[i]
    }.map { i in
        stride(from: i * i, through: limit, by: i).lazy.forEach { j in
            possibles[j] = false
        }
        return i
    }.reduce(into: [Int]()) { (result, next) in
        result.append(next)
    }
}
func forSieveOfEratosthenes<Storage: Possibles>(makeStorage: () -> Storage) -> [Int] {
    var possibles = makeStorage()
    let limit = possibles.count - 1
    var result = [Int]()
    for i in 2 ... limit where possibles[i] { // Lazy, so that `possibles` is updated before it is used.
        for j in stride(from: i * i, through: limit, by: i) {
            possibles[j] = false
        }
        result.append(i)
    }
    return result
}
func test(_ name: String, _ storageType: String, _ function: (Int) -> [Int]) {
    let start = Date()
    let result = function(1_000_000)
    let end = Date()
    print("\(name) \(storageType) biggest: \(result.last ?? 0) time: \(end.timeIntervalSince(start))")
}
test("for   ", "Array          ") { limit in
    forSieveOfEratosthenes {
        Array(repeating: true, count: limit + 1)
    }
}
test("for   ", "ContiguousArray") { limit in
    forSieveOfEratosthenes {
        ContiguousArray(repeating: true, count: limit + 1)
    }
}
test("lazy  ", "Array          ") { limit in
    lazySieveOfEratosthenes {
        Array(repeating: true, count: limit + 1)
    }
}
test("lazy  ", "ContiguousArray") { limit in
    lazySieveOfEratosthenes {
        ContiguousArray(repeating: true, count: limit + 1)
    }
}

Prints:

for    Array           biggest: 999983 time: 0.009091973304748535
for    ContiguousArray biggest: 999983 time: 0.007838010787963867
lazy   Array           biggest: 999983 time: 0.006950974464416504
lazy   ContiguousArray biggest: 999983 time: 0.007106900215148926

Note that 2nd line is now quicker than 1st but unfortunately 3rd line is quicker than 4th.

belkadan commented 6 years ago

I'm pretty sure 6.9ms and 7.1ms are within normal variation between runs. If I run your code multiple times locally I see the last two switching places quite a bit, and even the first two switching every now and then.

swift-ci commented 6 years ago

Comment by Howard Lovatt (JIRA)

It is consistently slower on my machine, but I do accept that the difference is small. I just wondered if it was indicative of an optimization that had somehow gone missing. Thanks for taking a look.

jepers commented 6 years ago

Just to be clear, the following is not true for this issue (SR-7637): "In 4.2 the original problem goes away".

Also, this issue is not about lazy, only about ContiguousArray being (significantly slower) than Array, in a specific situation, and the result of the demonstration program in the description of this issue has not changed in any dramatic way as of def toolchain of Xcode 10 beta 1, and most recent snapshot 2018-06-06.

Please see my post regarding the separate question about lazy in this forum post.

swift-ci commented 6 years ago

Comment by Howard Lovatt (JIRA)

A number of suggestions were made by Jens in the forum he referenced above.

Taking on board these suggestions an improved benchmark is:

import Foundation

func forArraySieveOfEratosthenes(_ limit: Int) -> [Int] {
    var possibles = Array(repeating: true, count: limit + 1)
    var result = [Int]()
    for i in 2 ... limit where possibles[i] { // Lazy, so that `possibles` is updated before it is used.
        for j in stride(from: i * i, through: limit, by: i) {
            possibles[j] = false
        }
        result.append(i)
    }
    return result
}

func forContiguousArraySieveOfEratosthenes(_ limit: Int) -> [Int] {
    var possibles = ContiguousArray(repeating: true, count: limit + 1)
    var result = [Int]()
    for i in 2 ... limit where possibles[i] { // Lazy, so that `possibles` is updated before it is used.
        for j in stride(from: i * i, through: limit, by: i) {
            possibles[j] = false
        }
        result.append(i)
    }
    return result
}

func whileArraySieveOfEratosthenes(_ limit: Int) -> [Int] {
    var possibles = Array(repeating: true, count: limit + 1)
    var result = [Int]()
    for i in 2 ... limit where possibles[i] { // Lazy, so that `possibles` is updated before it is used.
        var j = i * i
        while j <= limit {  // <-- While loop instead of stride.
            possibles[j] = false
            j += i
        }
        result.append(i)
    }
    return result
}

func whileContiguousArraySieveOfEratosthenes(_ limit: Int) -> [Int] {
    var possibles = ContiguousArray(repeating: true, count: limit + 1)
    var result = [Int]()
    for i in 2 ... limit where possibles[i] { // Lazy, so that `possibles` is updated before it is used.
        var j = i * i
        while j <= limit {  // <-- While loop instead of stride.
            possibles[j] = false
            j += i
        }
        result.append(i)
    }
    return result
}

func timeOneTest(_ name: String, _ storageType: String, _ function: (Int) -> [Int]) -> (Double, Int)  {
    let start = Date()
    let last = function(4_000_037).last ?? 0
    let end = Date()
    let time = end.timeIntervalSince(start)
    //print("\(name) \(storageType) biggest: \(last) time: \(time)")
    return (time, last)
}
func test(_ name: String, _ storageType: String, _ function: @escaping (Int) -> [Int]) {
    let n = 47
    let minimum = (0 ..< n).lazy.map { _ in
        timeOneTest(name, storageType, function)
    }.reduce(Double.greatestFiniteMagnitude) { (r, t) in
        r > t.0 ? t.0 : r
    }
    print("\(name) \(storageType) min time: \(minimum * 1000.0) ms")
}
test("for   ", "Array          ", forArraySieveOfEratosthenes(_:))
test("for   ", "ContiguousArray", forContiguousArraySieveOfEratosthenes(_:))
test("while ", "Array          ", whileArraySieveOfEratosthenes(_:))
test("while ", "ContiguousArray", whileContiguousArraySieveOfEratosthenes(_:))

The results obtained on my MacBook Pro, using Xcode 10 beta 1, Swift 4.2, Swift optimization -O, LLVM optimization -Ofast are:

for    Array           min time: 16.0750150680542 ms
for    ContiguousArray min time: 31.419038772583008 ms
while  Array           min time: 15.406012535095215 ms
while  ContiguousArray min time: 15.559077262878418 ms

It seems likely therefore that the problem lies with the interaction of `ContiguousArray` and `stride`.

jepers commented 6 years ago

Sure. This is exactly what my original demonstration program (in the description of the report) is meant to demonstrate.

That's why I made it so you can set alternativeInnerLoop to true/false in order to compare the effects of using stride or an alternative-to-stride (eg using while).

        ...
        if alternativeInnerLoop {
            var j = i * i
            while j <= limit {
                possibles[j] = false
                j += i
            }
        } else {
            for j in stride(from: i * i, through: limit, by: i) {
                possibles[j] = false
            }
        }
        ...

It might be a bounds check that isn't optimized away