Closed guilhermeleobas closed 2 years ago
Failure is not related to this PR. Ibis updated earlier this week and broke ibis_omniscidb package.
Failure is not related to this PR. Ibis updated earlier this week is broke ibis_omniscidb package.
Until ibis-heavyai is fixed for ibis 3 (see https://github.com/heavyai/ibis-heavyai/issues/67), we'll need to use ibis 2. That is, change the line
mamba install -c conda-forge omniscidb nbval ibis-omniscidb matplotlib pandas ibis-framework
in .github/workflows/rbc_test.yml to
mamba install -c conda-forge omniscidb nbval ibis-heavyai matplotlib pandas ibis-framework=2
(This could deserve a separate PR).
Fix issue #487.
In the code below, the if branch returns the correct value and the else branch returns null. Disabling optimizations seems to have some effect as the function starts to return the correct value.
I can't explain why this happens, but it seems to be related to changes done by Scalar Replacement of Aggregates (SROA) pass. After it executes, the pass mutates the IR to a state where the else branch doesn't assign anything.
First execution of SROA
```llvm *** IR Dump After Promote 'by reference' arguments to scalars ***define void @classify_slope__cpu_0({ i8*, i64, i8 }* nocapture %.1, double %.2) local_unnamed_addr { entry: %.9.i = alloca { i8*, i64, i8 }, align 8 %.30.i = alloca { i8*, i64, i8 }, align 8 %0 = bitcast { i8*, i64, i8 }* %.9.i to i8* call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %0) %1 = bitcast { i8*, i64, i8 }* %.30.i to i8* call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %1) %.fca.0.gep4.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.9.i, i64 0, i32 0 store i8* null, i8** %.fca.0.gep4.i, align 8, !noalias !2 %.fca.1.gep5.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.9.i, i64 0, i32 1 store i64 0, i64* %.fca.1.gep5.i, align 8, !noalias !2 %.fca.2.gep6.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.9.i, i64 0, i32 2 store i8 0, i8* %.fca.2.gep6.i, align 8, !noalias !2 %.fca.0.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 0 store i8* null, i8** %.fca.0.gep.i, align 8, !noalias !2 %.fca.1.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 1 store i64 0, i64* %.fca.1.gep.i, align 8, !noalias !2 %.fca.2.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 2 store i8 0, i8* %.fca.2.gep.i, align 8, !noalias !2 %.6.i = fcmp ugt double %.2, 5.000000e+00 %.8.i = call i8* @allocate_varlen_buffer(i64 3, i64 1), !noalias !2 br i1 %.6.i, label %B22.i, label %B10.i B10.i: ; preds = %entry store i8* null, i8** %.fca.0.gep4.i, align 8, !noalias !2 store i64 0, i64* %.fca.1.gep5.i, align 8, !noalias !2 store i8 0, i8* %.fca.2.gep6.i, align 8, !noalias !2 store i8* %.8.i, i8** %.fca.0.gep4.i, align 8, !noalias !2 store i64 3, i64* %.fca.1.gep5.i, align 8, !noalias !2 store i8 0, i8* %.fca.2.gep6.i, align 8, !noalias !2 call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %.8.i, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds ([3 x i8], [3 x i8]* @"Text(low)", i64 0, i64 0), i64 3, i1 false), !noalias !2 %2 = bitcast { i8*, i64, i8 }* %.9.i to i8* call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %2) %3 = bitcast { i8*, i64, i8 }* %.30.i to i8* call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %3) br label %entry.endif B22.i: ; preds = %entry store i8* null, i8** %.fca.0.gep.i, align 8, !noalias !2 store i64 0, i64* %.fca.1.gep.i, align 8, !noalias !2 store i8 0, i8* %.fca.2.gep.i, align 8, !noalias !2 store i8* %.8.i, i8** %.fca.0.gep.i, align 8, !noalias !2 store i64 3, i64* %.fca.1.gep.i, align 8, !noalias !2 store i8 0, i8* %.fca.2.gep.i, align 8, !noalias !2 call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %.8.i, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds ([3 x i8], [3 x i8]* @"Text(low).1", i64 0, i64 0), i64 3, i1 false), !noalias !2 %4 = bitcast { i8*, i64, i8 }* %.9.i to i8* call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %4) %5 = bitcast { i8*, i64, i8 }* %.30.i to i8* call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %5) br label %entry.endif entry.endif: ; preds = %B10.i, %B22.i %.4.0 = phi { i8*, i64, i8 }* [ %.9.i, %B10.i ], [ %.30.i, %B22.i ] %.21 = load { i8*, i64, i8 }, { i8*, i64, i8 }* %.4.0, align 8 store { i8*, i64, i8 } %.21, { i8*, i64, i8 }* %.1, align 8 ret void } *** IR Dump After SROA *** define void @classify_slope__cpu_0({ i8*, i64, i8 }* nocapture %.1, double %.2) local_unnamed_addr { entry: %.30.i = alloca { i8*, i64, i8 }, align 8 %.30.i.sroa.gep7 = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i32 0, i32 2 %.30.i.sroa.gep4 = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i32 0, i32 1 %.30.i.sroa.gep = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i32 0, i32 0 %0 = bitcast { i8*, i64, i8 }* %.30.i to i8* call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %0) %.fca.0.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 0 store i8* null, i8** %.fca.0.gep.i, align 8, !noalias !2 %.fca.1.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 1 store i64 0, i64* %.fca.1.gep.i, align 8, !noalias !2 %.fca.2.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 2 store i8 0, i8* %.fca.2.gep.i, align 8, !noalias !2 %.6.i = fcmp ugt double %.2, 5.000000e+00 %.8.i = call i8* @allocate_varlen_buffer(i64 3, i64 1), !noalias !2 br i1 %.6.i, label %B22.i, label %B10.i B10.i: ; preds = %entry call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %.8.i, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds ([3 x i8], [3 x i8]* @"Text(low)", i64 0, i64 0), i64 3, i1 false), !noalias !2 %1 = bitcast { i8*, i64, i8 }* %.30.i to i8* call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %1) br label %entry.endif B22.i: ; preds = %entry store i8* null, i8** %.fca.0.gep.i, align 8, !noalias !2 store i64 0, i64* %.fca.1.gep.i, align 8, !noalias !2 store i8 0, i8* %.fca.2.gep.i, align 8, !noalias !2 store i8* %.8.i, i8** %.fca.0.gep.i, align 8, !noalias !2 store i64 3, i64* %.fca.1.gep.i, align 8, !noalias !2 store i8 0, i8* %.fca.2.gep.i, align 8, !noalias !2 call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %.8.i, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds ([3 x i8], [3 x i8]* @"Text(low).1", i64 0, i64 0), i64 3, i1 false), !noalias !2 %2 = bitcast { i8*, i64, i8 }* %.30.i to i8* call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %2) %.4.0.sroa.phi5.sroa.speculate.load.B22.i = load i8, i8* %.30.i.sroa.gep7, align 8 %.4.0.sroa.phi2.sroa.speculate.load.B22.i = load i64, i64* %.30.i.sroa.gep4, align 8 %.4.0.sroa.phi.sroa.speculate.load.B22.i = load i8*, i8** %.30.i.sroa.gep, align 8 br label %entry.endif entry.endif: ; preds = %B10.i, %B22.i %.4.0.sroa.phi.sroa.speculated = phi i8* [ %.8.i, %B10.i ], [ %.4.0.sroa.phi.sroa.speculate.load.B22.i, %B22.i ] %.4.0.sroa.phi2.sroa.speculated = phi i64 [ 3, %B10.i ], [ %.4.0.sroa.phi2.sroa.speculate.load.B22.i, %B22.i ] %.4.0.sroa.phi5.sroa.speculated = phi i8 [ 0, %B10.i ], [ %.4.0.sroa.phi5.sroa.speculate.load.B22.i, %B22.i ] %.21.fca.0.insert = insertvalue { i8*, i64, i8 } undef, i8* %.4.0.sroa.phi.sroa.speculated, 0 %.21.fca.1.insert = insertvalue { i8*, i64, i8 } %.21.fca.0.insert, i64 %.4.0.sroa.phi2.sroa.speculated, 1 %.21.fca.2.insert = insertvalue { i8*, i64, i8 } %.21.fca.1.insert, i8 %.4.0.sroa.phi5.sroa.speculated, 2 store { i8*, i64, i8 } %.21.fca.2.insert, { i8*, i64, i8 }* %.1, align 8 ret void } ```After a few more optimization iterations
```llvm *** IR Dump After Promote 'by reference' arguments to scalars ***define void @classify_slope__cpu_0({ i8*, i64, i8 }* nocapture %.1, double %.2) local_unnamed_addr { entry: %.30.i = alloca { i8*, i64, i8 }, align 8 %0 = bitcast { i8*, i64, i8 }* %.30.i to i8* call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %0) %.6.i = fcmp ugt double %.2, 5.000000e+00 %1 = bitcast { i8*, i64, i8 }* %.30.i to i8* call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(17) %1, i8 0, i64 17, i1 false) %.8.i = tail call i8* @allocate_varlen_buffer(i64 3, i64 1), !noalias !2 br i1 %.6.i, label %B22.i, label %B10.i B10.i: ; preds = %entry tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %.8.i, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds ([3 x i8], [3 x i8]* @"Text(low).1", i64 0, i64 0), i64 3, i1 false), !noalias !2 call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %0) br label %entry.endif B22.i: ; preds = %entry %.fca.2.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 2 %.fca.1.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 1 %.fca.0.gep.i = getelementptr inbounds { i8*, i64, i8 }, { i8*, i64, i8 }* %.30.i, i64 0, i32 0 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %.8.i, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds ([3 x i8], [3 x i8]* @"Text(low).1", i64 0, i64 0), i64 3, i1 false), !noalias !2 call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %0) %.4.0.sroa.phi5.sroa.speculate.load.B22.i = load i8, i8* %.fca.2.gep.i, align 8 %.4.0.sroa.phi2.sroa.speculate.load.B22.i = load i64, i64* %.fca.1.gep.i, align 8 %.4.0.sroa.phi.sroa.speculate.load.B22.i = load i8*, i8** %.fca.0.gep.i, align 8 br label %entry.endif entry.endif: ; preds = %B10.i, %B22.i %.4.0.sroa.phi.sroa.speculated = phi i8* [ %.8.i, %B10.i ], [ %.4.0.sroa.phi.sroa.speculate.load.B22.i, %B22.i ] %.4.0.sroa.phi2.sroa.speculated = phi i64 [ 3, %B10.i ], [ %.4.0.sroa.phi2.sroa.speculate.load.B22.i, %B22.i ] %.4.0.sroa.phi5.sroa.speculated = phi i8 [ 0, %B10.i ], [ %.4.0.sroa.phi5.sroa.speculate.load.B22.i, %B22.i ] %.21.fca.0.insert = insertvalue { i8*, i64, i8 } undef, i8* %.4.0.sroa.phi.sroa.speculated, 0 %.21.fca.1.insert = insertvalue { i8*, i64, i8 } %.21.fca.0.insert, i64 %.4.0.sroa.phi2.sroa.speculated, 1 %.21.fca.2.insert = insertvalue { i8*, i64, i8 } %.21.fca.1.insert, i8 %.4.0.sroa.phi5.sroa.speculated, 2 store { i8*, i64, i8 } %.21.fca.2.insert, { i8*, i64, i8 }* %.1, align 8 ret void } *** IR Dump After SROA *** define void @classify_slope__cpu_0({ i8*, i64, i8 }* nocapture %.1, double %.2) local_unnamed_addr { entry: %.6.i = fcmp ugt double %.2, 5.000000e+00 %0 = ptrtoint i8* null to i64 %.8.i = tail call i8* @allocate_varlen_buffer(i64 3, i64 1), !noalias !2 br i1 %.6.i, label %B22.i, label %B10.i B10.i: ; preds = %entry tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %.8.i, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds ([3 x i8], [3 x i8]* @"Text(low).1", i64 0, i64 0), i64 3, i1 false), !noalias !2 br label %entry.endif B22.i: ; preds = %entry tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(3) %.8.i, i8* nonnull align 1 dereferenceable(3) getelementptr inbounds ([3 x i8], [3 x i8]* @"Text(low).1", i64 0, i64 0), i64 3, i1 false), !noalias !2 br label %entry.endif entry.endif: ; preds = %B10.i, %B22.i %.4.0.sroa.phi.sroa.speculated = phi i8* [ %.8.i, %B10.i ], [ null, %B22.i ] %.4.0.sroa.phi2.sroa.speculated = phi i64 [ 3, %B10.i ], [ 0, %B22.i ] %.4.0.sroa.phi5.sroa.speculated = phi i8 [ 0, %B10.i ], [ 0, %B22.i ] %.21.fca.0.insert = insertvalue { i8*, i64, i8 } undef, i8* %.4.0.sroa.phi.sroa.speculated, 0 %.21.fca.1.insert = insertvalue { i8*, i64, i8 } %.21.fca.0.insert, i64 %.4.0.sroa.phi2.sroa.speculated, 1 %.21.fca.2.insert = insertvalue { i8*, i64, i8 } %.21.fca.1.insert, i8 %.4.0.sroa.phi5.sroa.speculated, 2 store { i8*, i64, i8 } %.21.fca.2.insert, { i8*, i64, i8 }* %.1, align 8 ret void } ```SROA changes the PHI node in the following order:
%.4.0 = phi { i8*, i64, i8 }* [ %.9.i, %B10.i ], [ %.30.i, %B22.i ]
%.4.0.sroa.phi.sroa.speculated = phi i8* [ %.8.i, %B10.i ], [ %.4.0.sroa.phi.sroa.speculate.load.B22.i, %B22.i ]
%.4.0.sroa.phi.sroa.speculated = phi i8* [ %.8.i, %B10.i ], [ null, %B22.i ]
My "fix" creates a function to execute the assignment with optimizations disabled. This means that LLVM will not attempt to run SROA in this part of the code.