rust-lang / rust

Empowering everyone to build reliable and efficient software.
https://www.rust-lang.org
Other
97.98k stars 12.68k forks source link

Option::map_or generates a bunch of garbage IR #46758

Closed arielb1 closed 9 months ago

arielb1 commented 6 years ago

See e.g.

fn main() {
    let x = Some("hello".to_owned());
    x.map_or(0, |_| 1);
}

Which generates this initial IR:

; <core::option::Option<T>>::map_or
; Function Attrs: inlinehint uwtable
define internal i32 @"_ZN38_$LT$core..option..Option$LT$T$GT$$GT$6map_or17hf55f2f906b05703eE"(%"core::option::Option<alloc::string::String>"* noalias nocapture dereferenceable(24) %self, i32 %default) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
  %personalityslot = alloca { i8*, i32 }, align 8
  %_12 = alloca i8, align 1
  %_11 = alloca i8, align 1
  %_10 = alloca i8, align 1
  %_8 = alloca %"alloc::string::String", align 8
  %_7 = alloca { [0 x i8], %"alloc::string::String", [0 x i8] }, align 8
  %t = alloca %"alloc::string::String", align 8
  %_0 = alloca i32, align 4
  store i8 0, i8* %_12
  store i8 0, i8* %_10
  store i8 0, i8* %_11
  store i8 1, i8* %_10
  store i8 1, i8* %_11
  store i8 1, i8* %_12
  %0 = bitcast %"core::option::Option<alloc::string::String>"* %self to {}**
  %1 = load {}*, {}** %0
  %2 = icmp eq {}* %1, null
  %3 = select i1 %2, i64 0, i64 1
  switch i64 %3, label %bb3 [
    i64 0, label %bb2
    i64 1, label %bb4
  ]

bb1:                                              ; preds = %bb11, %bb10, %bb12
  %4 = bitcast { i8*, i32 }* %personalityslot to i8**
  %5 = load i8*, i8** %4
  %6 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %personalityslot, i32 0, i32 1
  %7 = load i32, i32* %6
  %8 = insertvalue { i8*, i32 } undef, i8* %5, 0
  %9 = insertvalue { i8*, i32 } %8, i32 %7, 1
  resume { i8*, i32 } %9

bb2:                                              ; preds = %start
  store i8 0, i8* %_11
  store i32 %default, i32* %_0
  br label %bb5

bb3:                                              ; preds = %start
  unreachable

bb4:                                              ; preds = %start
  store i8 0, i8* %_10
  %10 = bitcast %"core::option::Option<alloc::string::String>"* %self to %"core::option::Option<alloc::string::String>::Some"*
  %11 = bitcast %"core::option::Option<alloc::string::String>::Some"* %10 to %"alloc::string::String"*
  %12 = bitcast %"alloc::string::String"* %11 to i8*
  %13 = bitcast %"alloc::string::String"* %t to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %13, i8* %12, i64 24, i32 8, i1 false)
  store i8 0, i8* %_12
  %14 = bitcast %"alloc::string::String"* %t to i8*
  %15 = bitcast %"alloc::string::String"* %_8 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %15, i8* %14, i64 24, i32 8, i1 false)
  %16 = bitcast { [0 x i8], %"alloc::string::String", [0 x i8] }* %_7 to %"alloc::string::String"*
  %17 = bitcast %"alloc::string::String"* %_8 to i8*
  %18 = bitcast %"alloc::string::String"* %16 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %18, i8* %17, i64 24, i32 8, i1 false)
  %19 = bitcast { [0 x i8], %"alloc::string::String", [0 x i8] }* %_7 to %"alloc::string::String"*
; invoke map_example::main::{{closure}}
  %20 = invoke i32 @"_ZN11map_example4main28_$u7b$$u7b$closure$u7d$$u7d$17h0799836f5ea6b921E"(%"alloc::string::String"* noalias nocapture dereferenceable(24) %19)
          to label %bb7 unwind label %cleanup

bb5:                                              ; preds = %bb2, %bb7
  %21 = load i8, i8* %_12, !range !1
  %22 = trunc i8 %21 to i1
  br i1 %22, label %bb15, label %bb8

bb6:                                              ; preds = %bb13, %bb14
  %23 = bitcast %"core::option::Option<alloc::string::String>"* %self to {}**
  %24 = load {}*, {}** %23
  %25 = icmp eq {}* %24, null
  %26 = select i1 %25, i64 0, i64 1
  switch i64 %26, label %bb12 [
    i64 1, label %bb10
  ]

bb7:                                              ; preds = %bb4
  store i32 %20, i32* %_0
  br label %bb5

bb8:                                              ; preds = %bb15, %bb5
  %27 = load i8, i8* %_11, !range !1
  %28 = trunc i8 %27 to i1
  br i1 %28, label %bb16, label %bb9

bb9:                                              ; preds = %bb16, %bb8
  %29 = bitcast %"core::option::Option<alloc::string::String>"* %self to {}**
  %30 = load {}*, {}** %29
  %31 = icmp eq {}* %30, null
  %32 = select i1 %31, i64 0, i64 1
  switch i64 %32, label %bb20 [
    i64 1, label %bb18
  ]

bb10:                                             ; preds = %bb6
  %33 = load i8, i8* %_10, !range !1
  %34 = trunc i8 %33 to i1
  br i1 %34, label %bb11, label %bb1

bb11:                                             ; preds = %bb10
  store i8 0, i8* %_10
  %35 = bitcast %"core::option::Option<alloc::string::String>"* %self to %"core::option::Option<alloc::string::String>::Some"*
  %36 = bitcast %"core::option::Option<alloc::string::String>::Some"* %35 to %"alloc::string::String"*
; call core::ptr::drop_in_place
  call void @_ZN4core3ptr13drop_in_place17h5aaf0cb4e5af22beE(%"alloc::string::String"* %36) #8
  br label %bb1

bb12:                                             ; preds = %bb6
; call core::ptr::drop_in_place
  call void @_ZN4core3ptr13drop_in_place17h85c4ff3726432e4eE(%"core::option::Option<alloc::string::String>"* %self) #8
  br label %bb1

bb13:                                             ; preds = %bb14
  store i8 0, i8* %_11
  br label %bb6

bb14:                                             ; preds = %cleanup
  %37 = load i8, i8* %_11, !range !1
  %38 = trunc i8 %37 to i1
  br i1 %38, label %bb13, label %bb6

bb15:                                             ; preds = %bb5
  store i8 0, i8* %_12
  br label %bb8

bb16:                                             ; preds = %bb8
  store i8 0, i8* %_11
  br label %bb9

bb17:                                             ; preds = %bb19, %bb18, %bb20
  %39 = load i32, i32* %_0
  ret i32 %39

bb18:                                             ; preds = %bb9
  %40 = load i8, i8* %_10, !range !1
  %41 = trunc i8 %40 to i1
  br i1 %41, label %bb19, label %bb17

bb19:                                             ; preds = %bb18
  store i8 0, i8* %_10
  %42 = bitcast %"core::option::Option<alloc::string::String>"* %self to %"core::option::Option<alloc::string::String>::Some"*
  %43 = bitcast %"core::option::Option<alloc::string::String>::Some"* %42 to %"alloc::string::String"*
; call core::ptr::drop_in_place
  call void @_ZN4core3ptr13drop_in_place17h5aaf0cb4e5af22beE(%"alloc::string::String"* %43)
  br label %bb17

bb20:                                             ; preds = %bb9
; call core::ptr::drop_in_place
  call void @_ZN4core3ptr13drop_in_place17h85c4ff3726432e4eE(%"core::option::Option<alloc::string::String>"* %self)
  br label %bb17

cleanup:                                          ; preds = %bb4
  %44 = landingpad { i8*, i32 }
          cleanup
  %45 = extractvalue { i8*, i32 } %44, 0
  %46 = extractvalue { i8*, i32 } %44, 1
  %47 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %personalityslot, i32 0, i32 0
  store i8* %45, i8** %47
  %48 = getelementptr inbounds { i8*, i32 }, { i8*, i32 }* %personalityslot, i32 0, i32 1
  store i32 %46, i32* %48
  br label %bb14

Which is quite a bit of IR for such a little function:

    pub fn map_or<U, F: FnOnce(T) -> U>(self, default: U, f: F) -> U {
        match self {
            Some(t) => f(t),
            None => default,
        }
    }

I suspect #46525 might be involved in causing drop elaboration to go crazy

arielb1 commented 6 years ago

So the main problem is that this pollution goes further than map_or alone - every match probably has this sort of IR growth.

mrhota commented 6 years ago

@arielb1 I'd like to help with this. Would you consider this a good issue for mentorship? The linked issue suggests this might be hard to tackle...?

dtolnay commented 6 years ago

Here is that IR transliterated into Rust syntax to be more readable at a glance.

impl Option<String> {
    fn map_or(self: *Option<String>, default: i32) {
        let personalityslot: struct(*i8, i32);
        let t: String;
        let ret: i32;

        let _12 = true;
        let _11 = true;
        let _10 = true;

        let %3 = if self.ptr == null { 0i64 } else { 1i64 }
        match %3 {
            0 => goto 'bb2,
            1 => goto 'bb4,
            _ => goto 'bb3,
        }

    'bb1:
        panic!(personalityslot);

    'bb2:
        _11 = false;
        ret = default;
        goto 'bb5;

    'bb3:
        unreachable!();

    'bb4:
        _10 = false;
        t = *(self as *Some<String> as *String);
        _12 = false;

        try {
            let %20 = (main::{{closure}})(&t);
            goto 'bb7;
        } catch_unwind {
            goto 'cleanup;
        }

    'bb5:
        if _12 {
            goto 'bb15;
        } else {
            goto 'bb8;
        }

    'bb6:
        let %26 = if self.ptr == null { 0i64 } else { 1i64 };
        match %26 {
            1 => goto 'bb10,
            _ => goto 'bb12,
        }

    'bb7:
        ret = %20;
        goto 'bb5;

    'bb8:
        if _11 {
            goto 'bb16;
        } else {
            goto 'bb9;
        }

    'bb9:
        let %32 = if self.ptr == null { 0i64 } else { 1i64 };
        match %32 {
            1 => goto 'bb18,
            _ => goto 'bb20,
        }

    'bb10:
        if _10 {
            goto 'bb11;
        } else {
            goto 'bb1;
        }

    'bb11:
        _10 = false;
        ptr::drop_in_place::<String>(self as *Some<String> as *String);
        goto 'bb1;

    'bb12:
        ptr::drop_in_place::<Option<String>>(self);
        goto 'bb1;

    'bb13:
        _11 = false;
        goto 'bb6;

    'bb14:
        if _11 {
            goto 'bb13;
        } else {
            goto 'bb6;
        }

    'bb15:
        _12 = false;
        goto 'bb8;

    'bb16:
        _11 = false;
        goto 'bb9;

    'bb17:
        return ret;

    'bb18:
        if _10 {
            goto 'bb19;
        } else {
            goto 'bb17;
        }

    'bb19:
        _10 = false;
        ptr::drop_in_place::<String>(self as *Some<String> as *String)
        goto 'bb17;

    'bb20:
        ptr::drop_in_place::<Option<String>>(self);
        goto 'bb17;

    'cleanup:
        personalityslot = landingpad();
        goto 'bb14;
    }
}
dtolnay commented 6 years ago

selection_094

dtolnay commented 6 years ago

The first thing that stands out is that some of these drop flags (?) are useless. In particular _12 and _11 are both initially set to true, then we check them and branch, set them to false but do nothing else, and keep going.

let mut _12 = true;
let mut _11 = true;
if _12 {
    _12 = false;
}
if _11 {
    _11 = false;
}
// never used again

@arielb1 are these somehow tracking whether a non-Drop type has been dropped, which is unnecessary? Or is it more complicated? Eliminating these unused flags would cut the number of basic blocks by about a third.

dtolnay commented 6 years ago

Mentioning @nagisa and @eddyb who were involved in relevant-sounding PR #34307. Is this the control flow you would expect to see for Option::map_or and do you think it would be worthwhile to invest in doing better?

nagisa commented 6 years ago

These drop flags should trivially disappear in near future as a consequence of us starting to do constant propagation with MIRI.

eddyb commented 6 years ago

Note that initially we'll only run the const-eval to produce warnings, without actually modifying the MIR , to avoid misoptimizations. But we do want to do const-prop soon after.

dtolnay commented 6 years ago

Thanks @nagisa @eddyb. MIR constant propagation is nice but in this case some of the drop flags are not controlling any behavior. Check out the drop flags _12 and _11 in the flowchart. In your experience would this be because there used to be something else in bb13 and bb15 and bb16 that got optimized away before translation to LLVM IR, or can we have avoided generating _12 and _11 in the first place? Avoiding unnecessary drop flags to begin with seems like a cheaper and safer optimization than full-blown MIR constant propagation.

dtolnay commented 6 years ago

Response in #rustc:

\ in general, "avoiding something to begin with" tends to be as expensive if not more. of course, this is not always the case, but at least in the case of constant propagation we more or less know how to make it sound \ since we already have an SSA-like rule for a few other things \ (e.g. rustc_trans avoids putting variables into memory if it can)

I will investigate with -Z dump-mir and maybe after drop elaboration it will be clearer what the IR is doing.

Just from the LLVM IR it looks like _12 could be the drop flag for the closure f which is the argument to map_or, and _11 could be the drop flag for the default value. In the monomorphization shown here, it happens that neither of those types has a Drop impl to call.

@arielb1 do you have a sense of where this IR is "garbage"? Which of the basic blocks would you expect us not to be emitting or expect to be simpler?

arielb1 commented 6 years ago

IIRC, these drop flags are used if the destructor of one of the types (i.e. T or the closure) panics, and therefore need to be present in the constructed MIR and will not be removed by constant propagation on the generic MIR.

dtolnay commented 6 years ago

Would you be able to briefly suggest a path forward for @mrhota or someone else to start working on?

arielb1 commented 6 years ago

The MIR cfg is the following - see https://play.rust-lang.org/?gist=a642496d2949e24738b27b030644f833&version=stable (not sure how to upload graphviz graphs):

// WARNING: This output format is intended for human consumers only
// and is subject to change without notice. Knock yourself out.
fn main() -> () {
    let mut _0: ();                      // return pointer

    bb0: {                              
        _0 = ();                         // scope 0 at src/main.rs:7:11: 8:2
        return;                          // scope 0 at src/main.rs:8:2: 8:2
    }
}

fn map_or(_1: std::option::Option<T>, _2: U, _3: F) -> U {
    let mut _0: U;                       // return pointer
    scope 1 {
        let _4: T;                       // "t" in scope 1 at src/main.rs:3:18: 3:19
    }
    let mut _5: isize;
    let mut _6: F;
    let mut _7: (T,);
    let mut _8: T;
    let mut _9: U;
    let mut _10: bool;
    let mut _11: bool;
    let mut _12: bool;
    let mut _13: isize;
    let mut _14: isize;

    bb0: {                              
        _12 = const false;               // scope 1 at src/main.rs:3:13: 3:20
        _10 = const false;               // scope 1 at src/main.rs:3:13: 3:20
        _11 = const false;               // scope 1 at src/main.rs:3:13: 3:20
        _10 = const true;                // scope 1 at src/main.rs:3:13: 3:20
        _11 = const true;                // scope 1 at src/main.rs:3:13: 3:20
        _12 = const true;                // scope 1 at src/main.rs:3:13: 3:20
        _5 = discriminant(_1);           // scope 1 at src/main.rs:3:13: 3:20
        switchInt(_5) -> [0isize: bb1, 1isize: bb3, otherwise: bb2]; // scope 1 at src/main.rs:3:13: 3:20
    }

    bb1: {                              
        StorageLive(_9);                 // scope 1 at src/main.rs:4:21: 4:28
        _11 = const false;               // scope 1 at src/main.rs:4:21: 4:28
        _9 = _2;                         // scope 1 at src/main.rs:4:21: 4:28
        _0 = _9;                         // scope 1 at src/main.rs:4:21: 4:28
        StorageDead(_9);                 // scope 1 at src/main.rs:4:28: 4:28
        goto -> bb4;                     // scope 1 at src/main.rs:2:9: 5:10
    }

    bb2: {                              
        unreachable;                     // scope 0 at src/main.rs:6:6: 6:6
    }

    bb3: {                              
        StorageLive(_4);                 // scope 1 at src/main.rs:3:18: 3:19
        _10 = const false;               // scope 1 at src/main.rs:3:18: 3:19
        _4 = ((_1 as Some).0: T);        // scope 1 at src/main.rs:3:18: 3:19
        StorageLive(_6);                 // scope 1 at src/main.rs:3:24: 3:25
        _12 = const false;               // scope 1 at src/main.rs:3:24: 3:25
        _6 = _3;                         // scope 1 at src/main.rs:3:24: 3:25
        StorageLive(_7);                 // scope 1 at src/main.rs:3:24: 3:28
        StorageLive(_8);                 // scope 1 at src/main.rs:3:26: 3:27
        _8 = _4;                         // scope 1 at src/main.rs:3:26: 3:27
        _7 = (_8,);                      // scope 1 at src/main.rs:3:24: 3:28
        _0 = const std::ops::FnOnce::call_once(_6, _7) -> [return: bb7, unwind: bb14]; // scope 1 at src/main.rs:3:24: 3:28
    }

    bb4: {                              
        StorageDead(_4);                 // scope 0 at src/main.rs:5:10: 5:10
        switchInt(_12) -> [0u8: bb8, otherwise: bb15]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb5: {                               // cleanup
        resume;                          // scope 0 at src/main.rs:1:5: 6:6
    }

    bb6: {                               // cleanup
        _13 = discriminant(_1);          // scope 0 at src/main.rs:6:6: 6:6
        switchInt(_13) -> [1isize: bb10, otherwise: bb12]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb7: {                              
        StorageDead(_7);                 // scope 1 at src/main.rs:3:28: 3:28
        StorageDead(_8);                 // scope 1 at src/main.rs:3:28: 3:28
        StorageDead(_6);                 // scope 1 at src/main.rs:3:28: 3:28
        goto -> bb4;                     // scope 1 at src/main.rs:2:9: 5:10
    }

    bb8: {                              
        switchInt(_11) -> [0u8: bb9, otherwise: bb16]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb9: {                              
        _14 = discriminant(_1);          // scope 0 at src/main.rs:6:6: 6:6
        switchInt(_14) -> [1isize: bb18, otherwise: bb20]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb10: {                              // cleanup
        switchInt(_10) -> [0u8: bb5, otherwise: bb11]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb11: {                              // cleanup
        _10 = const false;               // scope 0 at src/main.rs:6:6: 6:6
        drop(((_1 as Some).0: T)) -> bb5; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb12: {                              // cleanup
        drop(_1) -> bb5;                 // scope 0 at src/main.rs:6:6: 6:6
    }

    bb13: {                              // cleanup
        _11 = const false;               // scope 0 at src/main.rs:6:6: 6:6
        drop(_2) -> bb6;                 // scope 0 at src/main.rs:6:6: 6:6
    }

    bb14: {                              // cleanup
        switchInt(_11) -> [0u8: bb6, otherwise: bb13]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb15: {                             
        _12 = const false;               // scope 0 at src/main.rs:6:6: 6:6
        drop(_3) -> [return: bb8, unwind: bb14]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb16: {                             
        _11 = const false;               // scope 0 at src/main.rs:6:6: 6:6
        drop(_2) -> [return: bb9, unwind: bb6]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb17: {                             
        return;                          // scope 0 at src/main.rs:6:6: 6:6
    }

    bb18: {                             
        switchInt(_10) -> [0u8: bb17, otherwise: bb19]; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb19: {                             
        _10 = const false;               // scope 0 at src/main.rs:6:6: 6:6
        drop(((_1 as Some).0: T)) -> bb17; // scope 0 at src/main.rs:6:6: 6:6
    }

    bb20: {                             
        drop(_1) -> bb17;                // scope 0 at src/main.rs:6:6: 6:6
    }
}

I'm actually quite unsure of the best way to do it, but it should be possible to turn this into a better generic MIR. This requires some work to look at it and find ways to optimize.

dtolnay commented 6 years ago

Might it help to represent conditional_drop(flag, (...: T)) as such in MIR? That could translate to nothing (or flag = const false?) in the case that T has no drop impl to call, and the required pattern of basic blocks if there is a drop impl.

Are there MIR passes that need to see the control flow in a conditional drop as control flow?

eddyb commented 6 years ago

Adding more complexity to the MIR shouldn't be needed here IMO.

Mark-Simulacrum commented 9 months ago

Current optimized, generic MIR for Option::map_or contains only two drop flags:

current optimized mir

This is pretty good already, but not perfect. With I think jump threading, maybe some other opts on (-Zmir-opt-level=4), we get this:

opt-4

which eliminates the closure's drop flag entirely but not the alternative value's (_2) -- maybe too high cost in the pass, not entirely clear.

The LLVM IR for the particular case at the top isn't ideal -- we still generate some no-op goto basic blocks -- but I don't think we can do much at the generic MIR level there, it's mostly due to drop(usize) being a no-op.

Mark-Simulacrum commented 9 months ago

I don't think there are any clear further opportunities left that need to be tracked here, vs. just enabling e.g. jump threading by default (as is planned: https://github.com/rust-lang/rust/pull/117206).