swiftlang / swift

The Swift Programming Language
https://swift.org
Apache License 2.0
67.34k stars 10.34k forks source link

Instability in the optimization results : native Windows toolchain vs Mac / Linux / Win (cross-compile) #62085

Closed asl closed 1 year ago

asl commented 1 year ago

Result of swiftc -O test\AutoDiff\e2e_optimizations.swift, cross compilation on Mac:

sil hidden @test_gradient_float : $@convention(thin) () -> () {
[global: ]
bb0:
  %0 = float_literal $Builtin.FPIEEE32, 0x41200000 // 10 // users: %1, %21
  debug_value %0 : $Builtin.FPIEEE32, let, name "x0", argno 1, type $Float, expr op_fragment:#Float._value // id: %1
  %2 = float_literal $Builtin.FPIEEE32, 0x43480000 // 200 // users: %3, %9
  debug_value %2 : $Builtin.FPIEEE32, let, name "x2", type $Float, expr op_fragment:#Float._value // id: %3
  %4 = float_literal $Builtin.FPIEEE32, 0x42C80000 // 100 // users: %5, %10
  debug_value %4 : $Builtin.FPIEEE32, let, name "x3", type $Float, expr op_fragment:#Float._value // id: %5
  %6 = integer_literal $Builtin.Int64, 1          // user: %7
  %7 = builtin "sitofp_Int64_FPIEEE32"(%6 : $Builtin.Int64) : $Builtin.FPIEEE32 // users: %8, %9, %13
  debug_value %7 : $Builtin.FPIEEE32, let, name "x4", type $Float, expr op_fragment:#Float._value // id: %8
  %9 = builtin "fdiv_FPIEEE32"(%7 : $Builtin.FPIEEE32, %2 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // users: %14, %16, %15
  %10 = builtin "fneg_FPIEEE32"(%4 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // user: %12
  %11 = float_literal $Builtin.FPIEEE32, 0x471C4000 // 4.0E+4 // user: %12
  %12 = builtin "fdiv_FPIEEE32"(%10 : $Builtin.FPIEEE32, %11 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // user: %13
  %13 = builtin "fmul_FPIEEE32"(%12 : $Builtin.FPIEEE32, %7 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // user: %16
  debug_value %9 : $Builtin.FPIEEE32, let, name "x3", type $Float, expr op_fragment:#Float._value // id: %14
  %15 = builtin "fneg_FPIEEE32"(%9 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // user: %18
  %16 = builtin "fadd_FPIEEE32"(%9 : $Builtin.FPIEEE32, %13 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // users: %17, %18, %19
  debug_value %16 : $Builtin.FPIEEE32, let, name "x2", type $Float, expr op_fragment:#Float._value // id: %17
  %18 = builtin "fadd_FPIEEE32"(%16 : $Builtin.FPIEEE32, %15 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // user: %19
  %19 = builtin "fadd_FPIEEE32"(%16 : $Builtin.FPIEEE32, %18 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // users: %20, %21
  debug_value %19 : $Builtin.FPIEEE32, let, name "x1", type $Float, expr op_fragment:#Float._value // id: %20
  %21 = builtin "fmul_FPIEEE32"(%0 : $Builtin.FPIEEE32, %19 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // users: %22, %22
  %22 = builtin "fadd_FPIEEE32"(%21 : $Builtin.FPIEEE32, %21 : $Builtin.FPIEEE32) : $Builtin.FPIEEE32 // user: %23
  %23 = struct $Float (%22 : $Builtin.FPIEEE32)   // users: %26, %24
  debug_value %23 : $Float, let, name "x0", argno 1 // id: %24
  // function_ref specialized blackHole
  %25 = function_ref @$s9blackHoleSf_Tg5 : $@convention(thin) (Float) -> Float // user: %26
  %26 = apply %25(%23) : $@convention(thin) (Float) -> Float
  %27 = tuple ()                                  // user: %28
  return %27 : $()                                // id: %28
} // end sil function 'test_gradient_float'

Result of swiftc -O test\AutoDiff\e2e_optimizations.swift natively on Windows:

sil hidden @test_gradient_float : $@convention(thin) () -> () {
bb0:
  %0 = float_literal $Builtin.FPIEEE32, 0x41200000 // 10 // user: %11
  // function_ref float(_:)
  %1 = function_ref @$s4fooO5floatyS2fF : $@convention(thin) (Float) -> Float // user: %2
  %2 = thin_to_thick_function %1 : $@convention(thin) (Float) -> Float to $@callee_guaranteed (Float) -> Float // users: %34, %24
  // function_ref forward-mode derivative of float(_:)
  %3 = function_ref @$s4fooO5floatyS2fFTJfSpSr : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user: %4
  %4 = thin_to_thick_function %3 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %35, %25
  // function_ref reverse-mode derivative of float(_:)
  %5 = function_ref @$s4fooO5floatyS2fFTJrSpSr : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user: %6
  %6 = thin_to_thick_function %5 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %36, %26
  // function_ref specialized thunk for @callee_guaranteed (@unowned Float) -> (@unowned Float, @owned @escaping @callee_guaranteed (@unowned Float) -> (@unowned Float))
  %7 = function_ref @$sS4fIegyd_Igydo_S2fxq_r0_lyS2fIsegnr_Iegnro_TR25$s4fooO5floatyS2fFTJrSpSrTf3nnpf_n : $@convention(thin) (@in_guaranteed Float) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) // user: %8
  %8 = thin_to_thick_function %7 : $@convention(thin) (@in_guaranteed Float) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) to $@callee_guaranteed (@in_guaranteed Float) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) // users: %37, %9
  %9 = convert_function %8 : $@callee_guaranteed (@in_guaranteed Float) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) to $@callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // user: %10
  %10 = convert_escape_to_noescape %9 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> to $@noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // user: %16
  %11 = struct $Float (%0 : $Builtin.FPIEEE32)    // user: %14
  %12 = alloc_stack $Float                        // users: %31, %30, %27
  %13 = alloc_stack $Float                        // users: %18, %16, %14
  store %11 to %13 : $*Float                      // id: %14
  %15 = alloc_stack $Float                        // users: %17, %16
  %16 = apply %10(%15, %13) : $@noescape @callee_guaranteed @substituted <τ_0_0, τ_0_1, τ_0_2, τ_0_3> (@in_guaranteed τ_0_0) -> (@out τ_0_1, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <τ_0_2, τ_0_3>) for <Float, Float, Float, Float> // users: %28, %27
  dealloc_stack %15 : $*Float                     // id: %17
  dealloc_stack %13 : $*Float                     // id: %18
  %19 = alloc_stack $Float                        // users: %29, %27, %23
  %20 = integer_literal $Builtin.Int64, 1         // user: %21
  %21 = builtin "sitofp_Int64_FPIEEE32"(%20 : $Builtin.Int64) : $Builtin.FPIEEE32 // user: %22
  %22 = struct $Float (%21 : $Builtin.FPIEEE32)   // user: %23
  store %22 to %19 : $*Float                      // id: %23
  strong_retain %2 : $@callee_guaranteed (Float) -> Float // id: %24
  strong_retain %4 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %25
  strong_retain %6 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %26
  %27 = apply %16(%12, %19) : $@callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>
  strong_release %16 : $@callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float> // id: %28
  dealloc_stack %19 : $*Float                     // id: %29
  %30 = load %12 : $*Float                        // user: %33
  dealloc_stack %12 : $*Float                     // id: %31
  // function_ref specialized blackHole
  %32 = function_ref @$s9blackHoleSf_Tg5 : $@convention(thin) (Float) -> Float // user: %33
  %33 = apply %32(%30) : $@convention(thin) (Float) -> Float
  strong_release %2 : $@callee_guaranteed (Float) -> Float // id: %34
  strong_release %4 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %35
  strong_release %6 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %36
  strong_release %8 : $@callee_guaranteed (@in_guaranteed Float) -> (@out Float, @owned @callee_guaranteed @substituted <τ_0_0, τ_0_1> (@in_guaranteed τ_0_0) -> @out τ_0_1 for <Float, Float>) // id: %37
  %38 = tuple ()                                  // user: %39
  return %38 : $()                                // id: %39
} // end sil function 'test_gradient_float'

Originally posted by @asl in https://github.com/apple/swift/issues/62012#issuecomment-1312744279

asl commented 1 year ago

Tagging @compnerd @BradLarson @rxwei @dan-zheng

asl commented 1 year ago

The difference could be observed inside silcombiner. At one of its iteration we're having input SIL:

  %0 = float_literal $Builtin.FPIEEE32, 0x41200000 // 10 // user: %29
  // function_ref float(_:)
  %1 = function_ref @$s17e2e_optimizations5floatyS2fF : $@convention(thin) (Float) -> Float // users: %13, %2
  %2 = thin_to_thick_function %1 : $@convention(thin) (Float) -> Float to $@callee_guaranteed (Float) -> Float // users: %8, %7
  // function_ref forward-mode derivative of float(_:)
  %3 = function_ref @$s17e2e_optimizations5floatyS2fFTJfSpSr : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %14, %4
  %4 = thin_to_thick_function %3 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user:
%7
  // function_ref reverse-mode derivative of float(_:)
  %5 = function_ref @$s17e2e_optimizations5floatyS2fFTJrSpSr : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // users: %15, %6
  %6 = thin_to_thick_function %5 : $@convention(thin) (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) to $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // user:
%7
  %7 = differentiable_function [parameters 0] [results 0] %2 : $@callee_guaranteed (Float) -> Float with_derivative {%4 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float), %6 : $
@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float)} // users: %56, %54, %52, %11, %9
  strong_retain %2 : $@callee_guaranteed (Float) -> Float // id: %8
  %9 = differentiable_function_extract [jvp] %7 : $@differentiable(reverse) @callee_guaranteed (Float) -> Float // user: %10
  strong_retain %9 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %10
  %11 = differentiable_function_extract [vjp] %7 : $@differentiable(reverse) @callee_guaranteed (Float) -> Float // user: %12
  strong_retain %11 : $@callee_guaranteed (Float) -> (Float, @owned @callee_guaranteed (Float) -> Float) // id: %12

Here strong_retain %2 : $@callee_guaranteed (Float) -> Float // id: %8 is removed on Mac (cross-compiling to Windows) and retained on native Windows.

asl commented 1 year ago

The difference is caused by Swift optimization passes. In particular, strong_retain is deleted by simplifyStrongRetainPass from SwiftCompilerSources/Sources/Optimizer/InstructionPasses/SimplifyStrongRetainRelease.swift

Optimization passes written in Swift are not run by default on Windows native build. The behaviour could be reproduced on Mac / Linux via setting BOOTSTRAPPING_MODE to OFF.

@compnerd What is the proper solution on Windows then? Should the test in #62012 be marked as XFAIL on Windows? For now I'm marking it as // REQUIRES: swift_in_compiler

compnerd commented 1 year ago

I think that the requires is correct approach.

tbkka commented 1 year ago

Here strong_retain %2 : $@callee_guaranteed (Float) -> Float // id: %8 is removed on Mac (cross-compiling to Windows) and retained on native Windows.

Does this mean we're building incorrect code on windows? Or is this only a performance difference? (The former would be much more serious.)

tbkka commented 1 year ago

CC: @eeckstein

asl commented 1 year ago

@tbkka It's a performance difference. Basically some optimizations are not run on Windows.

asl commented 1 year ago

https://github.com/apple/swift/pull/62012 was landed. The failed test is effectively disabled on Windows native build via // REQUIRES: swift_in_compiler, the differences are caused by some optimizations that are not available in the Windows native build.