EnzymeAD / Enzyme

High-performance automatic differentiation of LLVM and MLIR.
https://enzyme.mit.edu
Other
1.28k stars 108 forks source link

Poision Propagation [LLVM upstream bug] #592

Open wsmoses opened 2 years ago

wsmoses commented 2 years ago
$LLVM_DIR/bin/clang++ test.cpp -S -emit-llvm -o input.ll -g -O2 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops
$LLVM_DIR/bin/opt input.ll -load=$ENZYME_DIR/lib/LLVMEnzyme-14.so -enable-new-pm=0 -enzyme -o output.ll -S
$LLVM_DIR/bin/clang output.ll -O1 -g -o a1.exe
$LLVM_DIR/bin/clang output.ll -O2 -g -o a2.exe
wmoses@beast:~/git/Enzyme/enzyme/buildomp (context) $ valgrind ./a2.exe
==109987== Memcheck, a memory error detector
==109987== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==109987== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==109987== Command: ./a2.exe
==109987==
--109987-- WARNING: Serious error when reading debug info
--109987-- When reading debug info from /mnt/pci4/wmdata/Enzyme/enzyme/buildomp/a2.exe:
--109987-- Ignoring non-Dwarf2/3/4 block in .debug_info
--109987-- WARNING: Serious error when reading debug info
--109987-- When reading debug info from /mnt/pci4/wmdata/Enzyme/enzyme/buildomp/a2.exe:
--109987-- parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4
==109987==
==109987== HEAP SUMMARY:
==109987==     in use at exit: 0 bytes in 0 blocks
==109987==   total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==109987==
==109987== All heap blocks were freed -- no leaks are possible
==109987==
==109987== For counts of detected and suppressed errors, rerun with: -v
==109987== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
// test.c
#include <stdio.h>

int enzyme_dup;
int enzyme_out;
int enzyme_const;

template<typename... Args>
void __enzyme_autodiff(void*, Args...);

void square(double const* x, double* y, int size) {
  for(int i = 0; i < size; i += 1) {
    y[i] = x[i] * x[i];
  }
}

void dsquare(double const* x, double* x_b, double* y, double const* y_b, int size) {
    // This returns the derivative of square or 2 * x
    return __enzyme_autodiff((void*) square,
        enzyme_dup, x, x_b,
        enzyme_dup, y, y_b,
        enzyme_const, size);
}
int main() {
    int const n = 5;
    double x[n];
    double x_b[n];
    double y[n];
    double y_b[n];

    for(int i=0; i<5; i++) {
      x[i] = i + 1;
      y_b[i] = 1.0;
    }

    dsquare(x, x_b, y, y_b, n);

    for(int i=0; i<5; i++) {
      printf("t2 square(%f)=%f, dsquare(%f)=%f\n", x[i], y[i], x[i], x_b[i]);
    }
}
; input.ll
; ModuleID = 'test.cpp'
source_filename = "test.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@enzyme_dup = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0
@enzyme_out = dso_local local_unnamed_addr global i32 0, align 4, !dbg !7
@enzyme_const = dso_local local_unnamed_addr global i32 0, align 4, !dbg !10
@.str = private unnamed_addr constant [34 x i8] c"t2 square(%f)=%f, dsquare(%f)=%f\0A\00", align 1

; Function Attrs: mustprogress nofree norecurse nosync nounwind uwtable
define dso_local void @_Z6squarePKdPdi(double* nocapture noundef readonly %x, double* nocapture noundef writeonly %y, i32 noundef %size) #0 !dbg !17 {
entry:
  call void @llvm.dbg.value(metadata double* %x, metadata !25, metadata !DIExpression()), !dbg !30
  call void @llvm.dbg.value(metadata double* %y, metadata !26, metadata !DIExpression()), !dbg !30
  call void @llvm.dbg.value(metadata i32 %size, metadata !27, metadata !DIExpression()), !dbg !30
  call void @llvm.dbg.value(metadata i32 0, metadata !28, metadata !DIExpression()), !dbg !31
  %cmp11 = icmp sgt i32 %size, 0, !dbg !32
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup, !dbg !34

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %size to i64, !dbg !32
  br label %for.body, !dbg !34

for.cond.cleanup:                                 ; preds = %for.body, %entry
  ret void, !dbg !35

for.body:                                         ; preds = %for.body.preheader, %for.body
  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
  call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !28, metadata !DIExpression()), !dbg !31
  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv, !dbg !36
  %0 = load double, double* %arrayidx, align 8, !dbg !36, !tbaa !38
  %mul = fmul double %0, %0, !dbg !42
  %arrayidx4 = getelementptr inbounds double, double* %y, i64 %indvars.iv, !dbg !43
  store double %mul, double* %arrayidx4, align 8, !dbg !44, !tbaa !38
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !45
  call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !28, metadata !DIExpression()), !dbg !31
  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !32
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !34, !llvm.loop !46
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2

; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2

; Function Attrs: mustprogress uwtable
define dso_local void @_Z7dsquarePKdPdS1_S0_i(double* noundef %x, double* noundef %x_b, double* noundef %y, double* noundef %y_b, i32 noundef %size) local_unnamed_addr #3 !dbg !50 {
entry:
  call void @llvm.dbg.value(metadata double* %x, metadata !54, metadata !DIExpression()), !dbg !59
  call void @llvm.dbg.value(metadata double* %x_b, metadata !55, metadata !DIExpression()), !dbg !59
  call void @llvm.dbg.value(metadata double* %y, metadata !56, metadata !DIExpression()), !dbg !59
  call void @llvm.dbg.value(metadata double* %y_b, metadata !57, metadata !DIExpression()), !dbg !59
  call void @llvm.dbg.value(metadata i32 %size, metadata !58, metadata !DIExpression()), !dbg !59
  %0 = load i32, i32* @enzyme_dup, align 4, !dbg !60, !tbaa !61
  %1 = load i32, i32* @enzyme_const, align 4, !dbg !63, !tbaa !61
  tail call void @_Z17__enzyme_autodiffIJiPKdPdiS2_S1_iiEEvPvDpT_(i8* noundef bitcast (void (double*, double*, i32)* @_Z6squarePKdPdi to i8*), i32 noundef %0, double* noundef %x, double* noundef %x_b, i32 noundef %0, double* noundef %y, double* noundef %y_b, i32 noundef %1, i32 noundef %size), !dbg !64
  ret void, !dbg !65
}

declare dso_local void @_Z17__enzyme_autodiffIJiPKdPdiS2_S1_iiEEvPvDpT_(i8* noundef, i32 noundef, double* noundef, double* noundef, i32 noundef, double* noundef, double* noundef, i32 noundef, i32 noundef) local_unnamed_addr #4

; Function Attrs: mustprogress norecurse uwtable
define dso_local noundef i32 @main() local_unnamed_addr #5 !dbg !66 {
entry:
  %x = alloca [5 x double], align 16
  %x_b = alloca [5 x double], align 16
  %y = alloca [5 x double], align 16
  %y_b = alloca [5 x double], align 16
  call void @llvm.dbg.value(metadata i32 5, metadata !70, metadata !DIExpression()), !dbg !83
  %0 = bitcast [5 x double]* %x to i8*, !dbg !84
  call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %0) #8, !dbg !84
  call void @llvm.dbg.declare(metadata [5 x double]* %x, metadata !72, metadata !DIExpression()), !dbg !85
  %1 = bitcast [5 x double]* %x_b to i8*, !dbg !86
  call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %1) #8, !dbg !86
  call void @llvm.dbg.declare(metadata [5 x double]* %x_b, metadata !76, metadata !DIExpression()), !dbg !87
  %2 = bitcast [5 x double]* %y to i8*, !dbg !88
  call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %2) #8, !dbg !88
  call void @llvm.dbg.declare(metadata [5 x double]* %y, metadata !77, metadata !DIExpression()), !dbg !89
  %3 = bitcast [5 x double]* %y_b to i8*, !dbg !90
  call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %3) #8, !dbg !90
  call void @llvm.dbg.declare(metadata [5 x double]* %y_b, metadata !78, metadata !DIExpression()), !dbg !91
  call void @llvm.dbg.value(metadata i32 0, metadata !79, metadata !DIExpression()), !dbg !92
  br label %for.body, !dbg !93

for.cond.cleanup:                                 ; preds = %for.body
  %arraydecay = getelementptr inbounds [5 x double], [5 x double]* %x, i64 0, i64 0, !dbg !94
  %arraydecay3 = getelementptr inbounds [5 x double], [5 x double]* %x_b, i64 0, i64 0, !dbg !95
  %arraydecay4 = getelementptr inbounds [5 x double], [5 x double]* %y, i64 0, i64 0, !dbg !96
  %arraydecay5 = getelementptr inbounds [5 x double], [5 x double]* %y_b, i64 0, i64 0, !dbg !97
  call void @llvm.dbg.value(metadata double* %arraydecay, metadata !54, metadata !DIExpression()), !dbg !98
  call void @llvm.dbg.value(metadata double* %arraydecay3, metadata !55, metadata !DIExpression()), !dbg !98
  call void @llvm.dbg.value(metadata double* %arraydecay4, metadata !56, metadata !DIExpression()), !dbg !98
  call void @llvm.dbg.value(metadata double* %arraydecay5, metadata !57, metadata !DIExpression()), !dbg !98
  call void @llvm.dbg.value(metadata i32 5, metadata !58, metadata !DIExpression()), !dbg !98
  %4 = load i32, i32* @enzyme_dup, align 4, !dbg !100, !tbaa !61
  %5 = load i32, i32* @enzyme_const, align 4, !dbg !101, !tbaa !61
  call void @_Z17__enzyme_autodiffIJiPKdPdiS2_S1_iiEEvPvDpT_(i8* noundef bitcast (void (double*, double*, i32)* @_Z6squarePKdPdi to i8*), i32 noundef %4, double* noundef nonnull %arraydecay, double* noundef nonnull %arraydecay3, i32 noundef %4, double* noundef nonnull %arraydecay4, double* noundef nonnull %arraydecay5, i32 noundef %5, i32 noundef 5), !dbg !102
  call void @llvm.dbg.value(metadata i32 0, metadata !81, metadata !DIExpression()), !dbg !103
  br label %for.body10, !dbg !104

for.body:                                         ; preds = %entry, %for.body
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !79, metadata !DIExpression()), !dbg !92
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !105
  %6 = trunc i64 %indvars.iv.next to i32, !dbg !108
  %conv = sitofp i32 %6 to double, !dbg !108
  %arrayidx = getelementptr inbounds [5 x double], [5 x double]* %x, i64 0, i64 %indvars.iv, !dbg !109
  store double %conv, double* %arrayidx, align 8, !dbg !110, !tbaa !38
  %arrayidx2 = getelementptr inbounds [5 x double], [5 x double]* %y_b, i64 0, i64 %indvars.iv, !dbg !111
  store double 1.000000e+00, double* %arrayidx2, align 8, !dbg !112, !tbaa !38
  call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !79, metadata !DIExpression()), !dbg !92
  %exitcond.not = icmp eq i64 %indvars.iv.next, 5, !dbg !113
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !93, !llvm.loop !114

for.cond.cleanup9:                                ; preds = %for.body10
  call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %3) #8, !dbg !116
  call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %2) #8, !dbg !116
  call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %1) #8, !dbg !116
  call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %0) #8, !dbg !116
  ret i32 0, !dbg !116

for.body10:                                       ; preds = %for.cond.cleanup, %for.body10
  %indvars.iv37 = phi i64 [ 0, %for.cond.cleanup ], [ %indvars.iv.next38, %for.body10 ]
  call void @llvm.dbg.value(metadata i64 %indvars.iv37, metadata !81, metadata !DIExpression()), !dbg !103
  %arrayidx12 = getelementptr inbounds [5 x double], [5 x double]* %x, i64 0, i64 %indvars.iv37, !dbg !117
  %7 = load double, double* %arrayidx12, align 8, !dbg !117, !tbaa !38
  %arrayidx14 = getelementptr inbounds [5 x double], [5 x double]* %y, i64 0, i64 %indvars.iv37, !dbg !120
  %8 = load double, double* %arrayidx14, align 8, !dbg !120, !tbaa !38
  %arrayidx18 = getelementptr inbounds [5 x double], [5 x double]* %x_b, i64 0, i64 %indvars.iv37, !dbg !121
  %9 = load double, double* %arrayidx18, align 8, !dbg !121, !tbaa !38
  %call = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([34 x i8], [34 x i8]* @.str, i64 0, i64 0), double noundef %7, double noundef %8, double noundef %7, double noundef %9), !dbg !122
  %indvars.iv.next38 = add nuw nsw i64 %indvars.iv37, 1, !dbg !123
  call void @llvm.dbg.value(metadata i64 %indvars.iv.next38, metadata !81, metadata !DIExpression()), !dbg !103
  %exitcond40.not = icmp eq i64 %indvars.iv.next38, 5, !dbg !124
  br i1 %exitcond40.not, label %for.cond.cleanup9, label %for.body10, !dbg !104, !llvm.loop !125
}

; Function Attrs: nofree nounwind
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr #6

; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #7

attributes #0 = { mustprogress nofree norecurse nosync nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind readnone speculatable willreturn }
attributes #2 = { argmemonly mustprogress nocallback nofree nosync nounwind willreturn }
attributes #3 = { mustprogress uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #4 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #5 = { mustprogress norecurse uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #6 = { nofree nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #7 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
attributes #8 = { nounwind }

!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!12, !13, !14, !15}
!llvm.ident = !{!16}

!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "enzyme_dup", scope: !2, file: !3, line: 4, type: !9, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0 (git@github.com:jdoerfert/llvm-project b2d4937701a3ddc99fae0c988cd69e6d0744112c)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !4, globals: !6, splitDebugInlining: false, nameTableKind: None)
!3 = !DIFile(filename: "test.cpp", directory: "/home/wmoses/git/Enzyme/enzyme/buildomp", checksumkind: CSK_MD5, checksum: "3cac590f176080abbb52b794dd22086d")
!4 = !{!5}
!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
!6 = !{!0, !7, !10}
!7 = !DIGlobalVariableExpression(var: !8, expr: !DIExpression())
!8 = distinct !DIGlobalVariable(name: "enzyme_out", scope: !2, file: !3, line: 5, type: !9, isLocal: false, isDefinition: true)
!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression())
!11 = distinct !DIGlobalVariable(name: "enzyme_const", scope: !2, file: !3, line: 6, type: !9, isLocal: false, isDefinition: true)
!12 = !{i32 7, !"Dwarf Version", i32 5}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{i32 1, !"wchar_size", i32 4}
!15 = !{i32 7, !"uwtable", i32 2}
!16 = !{!"clang version 15.0.0 (git@github.com:jdoerfert/llvm-project b2d4937701a3ddc99fae0c988cd69e6d0744112c)"}
!17 = distinct !DISubprogram(name: "square", linkageName: "_Z6squarePKdPdi", scope: !3, file: !3, line: 11, type: !18, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !24)
!18 = !DISubroutineType(types: !19)
!19 = !{null, !20, !23, !9}
!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !21, size: 64)
!21 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22)
!22 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
!23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
!24 = !{!25, !26, !27, !28}
!25 = !DILocalVariable(name: "x", arg: 1, scope: !17, file: !3, line: 11, type: !20)
!26 = !DILocalVariable(name: "y", arg: 2, scope: !17, file: !3, line: 11, type: !23)
!27 = !DILocalVariable(name: "size", arg: 3, scope: !17, file: !3, line: 11, type: !9)
!28 = !DILocalVariable(name: "i", scope: !29, file: !3, line: 12, type: !9)
!29 = distinct !DILexicalBlock(scope: !17, file: !3, line: 12, column: 3)
!30 = !DILocation(line: 0, scope: !17)
!31 = !DILocation(line: 0, scope: !29)
!32 = !DILocation(line: 12, column: 20, scope: !33)
!33 = distinct !DILexicalBlock(scope: !29, file: !3, line: 12, column: 3)
!34 = !DILocation(line: 12, column: 3, scope: !29)
!35 = !DILocation(line: 15, column: 1, scope: !17)
!36 = !DILocation(line: 13, column: 12, scope: !37)
!37 = distinct !DILexicalBlock(scope: !33, file: !3, line: 12, column: 36)
!38 = !{!39, !39, i64 0}
!39 = !{!"double", !40, i64 0}
!40 = !{!"omnipotent char", !41, i64 0}
!41 = !{!"Simple C++ TBAA"}
!42 = !DILocation(line: 13, column: 17, scope: !37)
!43 = !DILocation(line: 13, column: 5, scope: !37)
!44 = !DILocation(line: 13, column: 10, scope: !37)
!45 = !DILocation(line: 12, column: 30, scope: !33)
!46 = distinct !{!46, !34, !47, !48, !49}
!47 = !DILocation(line: 14, column: 3, scope: !29)
!48 = !{!"llvm.loop.mustprogress"}
!49 = !{!"llvm.loop.unroll.disable"}
!50 = distinct !DISubprogram(name: "dsquare", linkageName: "_Z7dsquarePKdPdS1_S0_i", scope: !3, file: !3, line: 17, type: !51, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !53)
!51 = !DISubroutineType(types: !52)
!52 = !{null, !20, !23, !23, !20, !9}
!53 = !{!54, !55, !56, !57, !58}
!54 = !DILocalVariable(name: "x", arg: 1, scope: !50, file: !3, line: 17, type: !20)
!55 = !DILocalVariable(name: "x_b", arg: 2, scope: !50, file: !3, line: 17, type: !23)
!56 = !DILocalVariable(name: "y", arg: 3, scope: !50, file: !3, line: 17, type: !23)
!57 = !DILocalVariable(name: "y_b", arg: 4, scope: !50, file: !3, line: 17, type: !20)
!58 = !DILocalVariable(name: "size", arg: 5, scope: !50, file: !3, line: 17, type: !9)
!59 = !DILocation(line: 0, scope: !50)
!60 = !DILocation(line: 20, column: 9, scope: !50)
!61 = !{!62, !62, i64 0}
!62 = !{!"int", !40, i64 0}
!63 = !DILocation(line: 22, column: 9, scope: !50)
!64 = !DILocation(line: 19, column: 12, scope: !50)
!65 = !DILocation(line: 19, column: 5, scope: !50)
!66 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 24, type: !67, scopeLine: 24, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !69)
!67 = !DISubroutineType(types: !68)
!68 = !{!9}
!69 = !{!70, !72, !76, !77, !78, !79, !81}
!70 = !DILocalVariable(name: "n", scope: !66, file: !3, line: 25, type: !71)
!71 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
!72 = !DILocalVariable(name: "x", scope: !66, file: !3, line: 26, type: !73)
!73 = !DICompositeType(tag: DW_TAG_array_type, baseType: !22, size: 320, elements: !74)
!74 = !{!75}
!75 = !DISubrange(count: 5)
!76 = !DILocalVariable(name: "x_b", scope: !66, file: !3, line: 27, type: !73)
!77 = !DILocalVariable(name: "y", scope: !66, file: !3, line: 28, type: !73)
!78 = !DILocalVariable(name: "y_b", scope: !66, file: !3, line: 29, type: !73)
!79 = !DILocalVariable(name: "i", scope: !80, file: !3, line: 31, type: !9)
!80 = distinct !DILexicalBlock(scope: !66, file: !3, line: 31, column: 5)
!81 = !DILocalVariable(name: "i", scope: !82, file: !3, line: 38, type: !9)
!82 = distinct !DILexicalBlock(scope: !66, file: !3, line: 38, column: 5)
!83 = !DILocation(line: 0, scope: !66)
!84 = !DILocation(line: 26, column: 5, scope: !66)
!85 = !DILocation(line: 26, column: 12, scope: !66)
!86 = !DILocation(line: 27, column: 5, scope: !66)
!87 = !DILocation(line: 27, column: 12, scope: !66)
!88 = !DILocation(line: 28, column: 5, scope: !66)
!89 = !DILocation(line: 28, column: 12, scope: !66)
!90 = !DILocation(line: 29, column: 5, scope: !66)
!91 = !DILocation(line: 29, column: 12, scope: !66)
!92 = !DILocation(line: 0, scope: !80)
!93 = !DILocation(line: 31, column: 5, scope: !80)
!94 = !DILocation(line: 36, column: 13, scope: !66)
!95 = !DILocation(line: 36, column: 16, scope: !66)
!96 = !DILocation(line: 36, column: 21, scope: !66)
!97 = !DILocation(line: 36, column: 24, scope: !66)
!98 = !DILocation(line: 0, scope: !50, inlinedAt: !99)
!99 = distinct !DILocation(line: 36, column: 5, scope: !66)
!100 = !DILocation(line: 20, column: 9, scope: !50, inlinedAt: !99)
!101 = !DILocation(line: 22, column: 9, scope: !50, inlinedAt: !99)
!102 = !DILocation(line: 19, column: 12, scope: !50, inlinedAt: !99)
!103 = !DILocation(line: 0, scope: !82)
!104 = !DILocation(line: 38, column: 5, scope: !82)
!105 = !DILocation(line: 32, column: 16, scope: !106)
!106 = distinct !DILexicalBlock(scope: !107, file: !3, line: 31, column: 28)
!107 = distinct !DILexicalBlock(scope: !80, file: !3, line: 31, column: 5)
!108 = !DILocation(line: 32, column: 14, scope: !106)
!109 = !DILocation(line: 32, column: 7, scope: !106)
!110 = !DILocation(line: 32, column: 12, scope: !106)
!111 = !DILocation(line: 33, column: 7, scope: !106)
!112 = !DILocation(line: 33, column: 14, scope: !106)
!113 = !DILocation(line: 31, column: 19, scope: !107)
!114 = distinct !{!114, !93, !115, !48, !49}
!115 = !DILocation(line: 34, column: 5, scope: !80)
!116 = !DILocation(line: 41, column: 1, scope: !66)
!117 = !DILocation(line: 39, column: 52, scope: !118)
!118 = distinct !DILexicalBlock(scope: !119, file: !3, line: 38, column: 28)
!119 = distinct !DILexicalBlock(scope: !82, file: !3, line: 38, column: 5)
!120 = !DILocation(line: 39, column: 58, scope: !118)
!121 = !DILocation(line: 39, column: 70, scope: !118)
!122 = !DILocation(line: 39, column: 7, scope: !118)
!123 = !DILocation(line: 38, column: 24, scope: !119)
!124 = !DILocation(line: 38, column: 19, scope: !119)
!125 = distinct !{!125, !104, !126, !48, !49}
!126 = !DILocation(line: 40, column: 5, scope: !82)
; ModuleID = 'input.ll'
source_filename = "test.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@enzyme_dup = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0
@enzyme_out = dso_local local_unnamed_addr global i32 0, align 4, !dbg !7
@enzyme_const = dso_local local_unnamed_addr global i32 0, align 4, !dbg !10
@.str = private unnamed_addr constant [34 x i8] c"t2 square(%f)=%f, dsquare(%f)=%f\0A\00", align 1

; Function Attrs: mustprogress nofree norecurse nosync nounwind uwtable
define dso_local void @_Z6squarePKdPdi(double* nocapture noundef readonly %x, double* nocapture noundef writeonly %y, i32 noundef %size) #0 !dbg !17 {
entry:
  call void @llvm.dbg.value(metadata double* %x, metadata !25, metadata !DIExpression()), !dbg !30
  call void @llvm.dbg.value(metadata double* %y, metadata !26, metadata !DIExpression()), !dbg !30
  call void @llvm.dbg.value(metadata i32 %size, metadata !27, metadata !DIExpression()), !dbg !30
  call void @llvm.dbg.value(metadata i32 0, metadata !28, metadata !DIExpression()), !dbg !31
  %cmp11 = icmp sgt i32 %size, 0, !dbg !32
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup, !dbg !34

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %size to i64, !dbg !32
  br label %for.body, !dbg !34

for.cond.cleanup:                                 ; preds = %for.body, %entry
  ret void, !dbg !35

for.body:                                         ; preds = %for.body, %for.body.preheader
  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
  call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !28, metadata !DIExpression()), !dbg !31
  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv, !dbg !36
  %0 = load double, double* %arrayidx, align 8, !dbg !36, !tbaa !38
  %mul = fmul double %0, %0, !dbg !42
  %arrayidx4 = getelementptr inbounds double, double* %y, i64 %indvars.iv, !dbg !43
  store double %mul, double* %arrayidx4, align 8, !dbg !44, !tbaa !38
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !45
  call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !28, metadata !DIExpression()), !dbg !31
  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !32
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !34, !llvm.loop !46
}

; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2

; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2

; Function Attrs: mustprogress uwtable
define dso_local void @_Z7dsquarePKdPdS1_S0_i(double* noundef %x, double* noundef %x_b, double* noundef %y, double* noundef %y_b, i32 noundef %size) local_unnamed_addr #3 !dbg !50 {
entry:
  call void @llvm.dbg.value(metadata double* %x, metadata !54, metadata !DIExpression()), !dbg !59
  call void @llvm.dbg.value(metadata double* %x_b, metadata !55, metadata !DIExpression()), !dbg !59
  call void @llvm.dbg.value(metadata double* %y, metadata !56, metadata !DIExpression()), !dbg !59
  call void @llvm.dbg.value(metadata double* %y_b, metadata !57, metadata !DIExpression()), !dbg !59
  call void @llvm.dbg.value(metadata i32 %size, metadata !58, metadata !DIExpression()), !dbg !59
  %0 = load i32, i32* @enzyme_dup, align 4, !dbg !60, !tbaa !61
  %1 = load i32, i32* @enzyme_const, align 4, !dbg !63, !tbaa !61
  call void @diffe_Z6squarePKdPdi(double* %x, double* %x_b, double* %y, double* %y_b, i32 %size), !dbg !64
  ret void, !dbg !65
}

declare dso_local void @_Z17__enzyme_autodiffIJiPKdPdiS2_S1_iiEEvPvDpT_(i8* noundef, i32 noundef, double* noundef, double* noundef, i32 noundef, double* noundef, double* noundef, i32 noundef, i32 noundef) local_unnamed_addr #4

; Function Attrs: mustprogress norecurse uwtable
define dso_local noundef i32 @main() local_unnamed_addr #5 !dbg !66 {
entry:
  %x = alloca [5 x double], align 16
  %x_b = alloca [5 x double], align 16
  %y = alloca [5 x double], align 16
  %y_b = alloca [5 x double], align 16
  call void @llvm.dbg.value(metadata i32 5, metadata !70, metadata !DIExpression()), !dbg !83
  %0 = bitcast [5 x double]* %x to i8*, !dbg !84
  call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %0) #8, !dbg !84
  call void @llvm.dbg.declare(metadata [5 x double]* %x, metadata !72, metadata !DIExpression()), !dbg !85
  %1 = bitcast [5 x double]* %x_b to i8*, !dbg !86
  call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %1) #8, !dbg !86
  call void @llvm.dbg.declare(metadata [5 x double]* %x_b, metadata !76, metadata !DIExpression()), !dbg !87
  %2 = bitcast [5 x double]* %y to i8*, !dbg !88
  call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %2) #8, !dbg !88
  call void @llvm.dbg.declare(metadata [5 x double]* %y, metadata !77, metadata !DIExpression()), !dbg !89
  %3 = bitcast [5 x double]* %y_b to i8*, !dbg !90
  call void @llvm.lifetime.start.p0i8(i64 40, i8* nonnull %3) #8, !dbg !90
  call void @llvm.dbg.declare(metadata [5 x double]* %y_b, metadata !78, metadata !DIExpression()), !dbg !91
  call void @llvm.dbg.value(metadata i32 0, metadata !79, metadata !DIExpression()), !dbg !92
  br label %for.body, !dbg !93

for.cond.cleanup:                                 ; preds = %for.body
  %arraydecay = getelementptr inbounds [5 x double], [5 x double]* %x, i64 0, i64 0, !dbg !94
  %arraydecay3 = getelementptr inbounds [5 x double], [5 x double]* %x_b, i64 0, i64 0, !dbg !95
  %arraydecay4 = getelementptr inbounds [5 x double], [5 x double]* %y, i64 0, i64 0, !dbg !96
  %arraydecay5 = getelementptr inbounds [5 x double], [5 x double]* %y_b, i64 0, i64 0, !dbg !97
  call void @llvm.dbg.value(metadata double* %arraydecay, metadata !54, metadata !DIExpression()), !dbg !98
  call void @llvm.dbg.value(metadata double* %arraydecay3, metadata !55, metadata !DIExpression()), !dbg !98
  call void @llvm.dbg.value(metadata double* %arraydecay4, metadata !56, metadata !DIExpression()), !dbg !98
  call void @llvm.dbg.value(metadata double* %arraydecay5, metadata !57, metadata !DIExpression()), !dbg !98
  call void @llvm.dbg.value(metadata i32 5, metadata !58, metadata !DIExpression()), !dbg !98
  %4 = load i32, i32* @enzyme_dup, align 4, !dbg !100, !tbaa !61
  %5 = load i32, i32* @enzyme_const, align 4, !dbg !101, !tbaa !61
  call void @diffe_Z6squarePKdPdi(double* %arraydecay, double* %arraydecay3, double* %arraydecay4, double* %arraydecay5, i32 5), !dbg !102
  call void @llvm.dbg.value(metadata i32 0, metadata !81, metadata !DIExpression()), !dbg !103
  br label %for.body10, !dbg !104

for.body:                                         ; preds = %for.body, %entry
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !79, metadata !DIExpression()), !dbg !92
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !105
  %6 = trunc i64 %indvars.iv.next to i32, !dbg !108
  %conv = sitofp i32 %6 to double, !dbg !108
  %arrayidx = getelementptr inbounds [5 x double], [5 x double]* %x, i64 0, i64 %indvars.iv, !dbg !109
  store double %conv, double* %arrayidx, align 8, !dbg !110, !tbaa !38
  %arrayidx2 = getelementptr inbounds [5 x double], [5 x double]* %y_b, i64 0, i64 %indvars.iv, !dbg !111
  store double 1.000000e+00, double* %arrayidx2, align 8, !dbg !112, !tbaa !38
  call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !79, metadata !DIExpression()), !dbg !92
  %exitcond.not = icmp eq i64 %indvars.iv.next, 5, !dbg !113
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !93, !llvm.loop !114

for.cond.cleanup9:                                ; preds = %for.body10
  call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %3) #8, !dbg !116
  call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %2) #8, !dbg !116
  call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %1) #8, !dbg !116
  call void @llvm.lifetime.end.p0i8(i64 40, i8* nonnull %0) #8, !dbg !116
  ret i32 0, !dbg !116

for.body10:                                       ; preds = %for.body10, %for.cond.cleanup
  %indvars.iv37 = phi i64 [ 0, %for.cond.cleanup ], [ %indvars.iv.next38, %for.body10 ]
  call void @llvm.dbg.value(metadata i64 %indvars.iv37, metadata !81, metadata !DIExpression()), !dbg !103
  %arrayidx12 = getelementptr inbounds [5 x double], [5 x double]* %x, i64 0, i64 %indvars.iv37, !dbg !117
  %7 = load double, double* %arrayidx12, align 8, !dbg !117, !tbaa !38
  %arrayidx14 = getelementptr inbounds [5 x double], [5 x double]* %y, i64 0, i64 %indvars.iv37, !dbg !120
  %8 = load double, double* %arrayidx14, align 8, !dbg !120, !tbaa !38
  %arrayidx18 = getelementptr inbounds [5 x double], [5 x double]* %x_b, i64 0, i64 %indvars.iv37, !dbg !121
  %9 = load double, double* %arrayidx18, align 8, !dbg !121, !tbaa !38
  %call = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([34 x i8], [34 x i8]* @.str, i64 0, i64 0), double noundef %7, double noundef %8, double noundef %7, double noundef %9), !dbg !122
  %indvars.iv.next38 = add nuw nsw i64 %indvars.iv37, 1, !dbg !123
  call void @llvm.dbg.value(metadata i64 %indvars.iv.next38, metadata !81, metadata !DIExpression()), !dbg !103
  %exitcond40.not = icmp eq i64 %indvars.iv.next38, 5, !dbg !124
  br i1 %exitcond40.not, label %for.cond.cleanup9, label %for.body10, !dbg !104, !llvm.loop !125
}

; Function Attrs: nofree nounwind
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr #6

; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.value(metadata, metadata, metadata) #1

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable
define dso_local void @preprocess__Z6squarePKdPdi(double* nocapture noundef readonly %x, double* nocapture noundef writeonly %y, i32 noundef %size) #7 !dbg !127 {
entry:
  call void @llvm.dbg.value(metadata double* %x, metadata !129, metadata !DIExpression()) #9, !dbg !134
  call void @llvm.dbg.value(metadata double* %y, metadata !130, metadata !DIExpression()) #9, !dbg !134
  call void @llvm.dbg.value(metadata i32 %size, metadata !131, metadata !DIExpression()) #9, !dbg !134
  call void @llvm.dbg.value(metadata i32 0, metadata !132, metadata !DIExpression()) #9, !dbg !135
  %cmp11 = icmp sgt i32 %size, 0, !dbg !136
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup, !dbg !138

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %size to i64, !dbg !136
  br label %for.body, !dbg !138

for.cond.cleanup.loopexit:                        ; preds = %for.body
  br label %for.cond.cleanup, !dbg !139

for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
  ret void, !dbg !139

for.body:                                         ; preds = %for.body, %for.body.preheader
  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
  call void @llvm.dbg.value(metadata i64 %iv, metadata !132, metadata !DIExpression()) #9, !dbg !135
  %iv.next = add nuw nsw i64 %iv, 1, !dbg !140
  %arrayidx = getelementptr inbounds double, double* %x, i64 %iv, !dbg !140
  %0 = load double, double* %arrayidx, align 8, !dbg !140, !tbaa !38
  %mul = fmul double %0, %0, !dbg !142
  %arrayidx4 = getelementptr inbounds double, double* %y, i64 %iv, !dbg !143
  store double %mul, double* %arrayidx4, align 8, !dbg !144, !tbaa !38
  call void @llvm.dbg.value(metadata i64 %iv.next, metadata !132, metadata !DIExpression()) #9, !dbg !135
  %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count, !dbg !136
  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !dbg !138, !llvm.loop !145
}

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable
define internal void @diffe_Z6squarePKdPdi(double* nocapture noundef readonly %x, double* nocapture %"x'", double* nocapture noundef writeonly %y, double* nocapture %"y'", i32 noundef %size) #7 !dbg !147 {
entry:
  %"iv'ac" = alloca i64, align 8
  %"mul'de" = alloca double, align 8
  store double 0.000000e+00, double* %"mul'de", align 8
  %_cache = alloca double*, align 8
  %"'de" = alloca double, align 8
  store double 0.000000e+00, double* %"'de", align 8
  %cmp11 = icmp sgt i32 %size, 0, !dbg !154
  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup, !dbg !156

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %size to i64, !dbg !154
  %0 = add nsw i64 %wide.trip.count, -1, !dbg !156
  %1 = add nuw nsw i64 %0, 1, !dbg !156
  %2 = add nuw i64 %0, 1, !dbg !156
  %mallocsize = mul nuw nsw i64 %2, 8
  %malloccall = tail call noalias nonnull i8* @malloc(i64 %mallocsize)
  %_malloccache = bitcast i8* %malloccall to double*
  store double* %_malloccache, double** %_cache, align 8, !dbg !156, !invariant.group !157
  br label %for.body, !dbg !156

for.cond.cleanup.loopexit:                        ; preds = %for.body
  br label %for.cond.cleanup, !dbg !158

for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
  br label %invertfor.cond.cleanup, !dbg !158

for.body:                                         ; preds = %for.body, %for.body.preheader
  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
  %iv.next = add nuw nsw i64 %iv, 1, !dbg !159
  %"arrayidx'ipg" = getelementptr inbounds double, double* %"x'", i64 %iv, !dbg !159
  %arrayidx = getelementptr inbounds double, double* %x, i64 %iv, !dbg !159
  %3 = load double, double* %arrayidx, align 8, !dbg !159, !tbaa !38
  %mul = fmul double %3, %3, !dbg !161
  %"arrayidx4'ipg" = getelementptr inbounds double, double* %"y'", i64 %iv, !dbg !162
  %arrayidx4 = getelementptr inbounds double, double* %y, i64 %iv, !dbg !162
  store double %mul, double* %arrayidx4, align 8, !dbg !163, !tbaa !38
  %4 = load double*, double** %_cache, align 8, !dbg !154, !dereferenceable !164, !invariant.group !157
  %5 = getelementptr inbounds double, double* %4, i64 %iv, !dbg !154
  store double %3, double* %5, align 8, !dbg !154, !invariant.group !165
  %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count, !dbg !154
  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !dbg !156, !llvm.loop !166

invertentry:                                      ; preds = %invertfor.cond.cleanup, %invertfor.body.preheader
  ret void

invertfor.body.preheader:                         ; preds = %invertfor.body
  %6 = load i64, i64* %"iv'ac", align 8
  %forfree = load double*, double** %_cache, align 8, !dereferenceable !164, !invariant.group !157
  %7 = bitcast double* %forfree to i8*
  tail call void @free(i8* nonnull %7), !dbg !168
  br label %invertentry

invertfor.cond.cleanup.loopexit:                  ; preds = %invertfor.cond.cleanup
  %wide.trip.count_unwrap = zext i32 %size to i64
  %_unwrap = add nsw i64 %wide.trip.count_unwrap, -1
  br label %mergeinvertfor.body_for.cond.cleanup.loopexit

mergeinvertfor.body_for.cond.cleanup.loopexit:    ; preds = %invertfor.cond.cleanup.loopexit
  store i64 %_unwrap, i64* %"iv'ac", align 8
  br label %invertfor.body

invertfor.cond.cleanup:                           ; preds = %for.cond.cleanup
  br i1 %cmp11, label %invertfor.cond.cleanup.loopexit, label %invertentry

invertfor.body:                                   ; preds = %incinvertfor.body, %mergeinvertfor.body_for.cond.cleanup.loopexit
  %8 = load i64, i64* %"iv'ac", align 8
  %"arrayidx4'ipg_unwrap" = getelementptr inbounds double, double* %"y'", i64 %8
  %9 = load double, double* %"arrayidx4'ipg_unwrap", align 8
  store double 0.000000e+00, double* %"arrayidx4'ipg_unwrap", align 8
  %10 = load double, double* %"mul'de", align 8
  %11 = fadd fast double %10, %9
  store double %11, double* %"mul'de", align 8
  %12 = load double, double* %"mul'de", align 8
  %13 = load i64, i64* %"iv'ac", align 8
  %wide.trip.count_unwrap1 = zext i32 %size to i64
  %_unwrap2 = add nsw i64 %wide.trip.count_unwrap1, -1
  %14 = add nuw i64 %_unwrap2, 1
  %15 = load double*, double** %_cache, align 8, !dereferenceable !164, !invariant.group !157
  %16 = getelementptr inbounds double, double* %15, i64 %13
  %17 = load double, double* %16, align 8, !invariant.group !165
  %m0diffe = fmul fast double %12, %17
  %m1diffe = fmul fast double %12, %17
  store double 0.000000e+00, double* %"mul'de", align 8
  %18 = load double, double* %"'de", align 8
  %19 = fadd fast double %18, %m0diffe
  store double %19, double* %"'de", align 8
  %20 = load double, double* %"'de", align 8
  %21 = fadd fast double %20, %m1diffe
  store double %21, double* %"'de", align 8
  %22 = load double, double* %"'de", align 8
  store double 0.000000e+00, double* %"'de", align 8
  %23 = load i64, i64* %"iv'ac", align 8
  %"arrayidx'ipg_unwrap" = getelementptr inbounds double, double* %"x'", i64 %23
  %24 = load double, double* %"arrayidx'ipg_unwrap", align 8
  %25 = fadd fast double %24, %22
  store double %25, double* %"arrayidx'ipg_unwrap", align 8
  %26 = load i64, i64* %"iv'ac", align 8
  %27 = icmp eq i64 %26, 0
  %28 = xor i1 %27, true
  br i1 %27, label %invertfor.body.preheader, label %incinvertfor.body

incinvertfor.body:                                ; preds = %invertfor.body
  %29 = load i64, i64* %"iv'ac", align 8
  %30 = add nsw i64 %29, -1
  store i64 %30, i64* %"iv'ac", align 8
  br label %invertfor.body
}

declare noalias i8* @malloc(i64)

declare void @free(i8*)

attributes #0 = { mustprogress nofree norecurse nosync nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
attributes #2 = { argmemonly nocallback nofree nosync nounwind willreturn }
attributes #3 = { mustprogress uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #4 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #5 = { mustprogress norecurse uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #6 = { nofree nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #7 = { mustprogress nofree norecurse nosync nounwind willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #8 = { nounwind }
attributes #9 = { mustprogress willreturn }

!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!12, !13, !14, !15}
!llvm.ident = !{!16}

!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "enzyme_dup", scope: !2, file: !3, line: 4, type: !9, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 15.0.0 (git@github.com:jdoerfert/llvm-project b2d4937701a3ddc99fae0c988cd69e6d0744112c)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !4, globals: !6, splitDebugInlining: false, nameTableKind: None)
!3 = !DIFile(filename: "test.cpp", directory: "/home/wmoses/git/Enzyme/enzyme/buildomp", checksumkind: CSK_MD5, checksum: "3cac590f176080abbb52b794dd22086d")
!4 = !{!5}
!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
!6 = !{!0, !7, !10}
!7 = !DIGlobalVariableExpression(var: !8, expr: !DIExpression())
!8 = distinct !DIGlobalVariable(name: "enzyme_out", scope: !2, file: !3, line: 5, type: !9, isLocal: false, isDefinition: true)
!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression())
!11 = distinct !DIGlobalVariable(name: "enzyme_const", scope: !2, file: !3, line: 6, type: !9, isLocal: false, isDefinition: true)
!12 = !{i32 7, !"Dwarf Version", i32 5}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{i32 1, !"wchar_size", i32 4}
!15 = !{i32 7, !"uwtable", i32 2}
!16 = !{!"clang version 15.0.0 (git@github.com:jdoerfert/llvm-project b2d4937701a3ddc99fae0c988cd69e6d0744112c)"}
!17 = distinct !DISubprogram(name: "square", linkageName: "_Z6squarePKdPdi", scope: !3, file: !3, line: 11, type: !18, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !24)
!18 = !DISubroutineType(types: !19)
!19 = !{null, !20, !23, !9}
!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !21, size: 64)
!21 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22)
!22 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
!23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
!24 = !{!25, !26, !27, !28}
!25 = !DILocalVariable(name: "x", arg: 1, scope: !17, file: !3, line: 11, type: !20)
!26 = !DILocalVariable(name: "y", arg: 2, scope: !17, file: !3, line: 11, type: !23)
!27 = !DILocalVariable(name: "size", arg: 3, scope: !17, file: !3, line: 11, type: !9)
!28 = !DILocalVariable(name: "i", scope: !29, file: !3, line: 12, type: !9)
!29 = distinct !DILexicalBlock(scope: !17, file: !3, line: 12, column: 3)
!30 = !DILocation(line: 0, scope: !17)
!31 = !DILocation(line: 0, scope: !29)
!32 = !DILocation(line: 12, column: 20, scope: !33)
!33 = distinct !DILexicalBlock(scope: !29, file: !3, line: 12, column: 3)
!34 = !DILocation(line: 12, column: 3, scope: !29)
!35 = !DILocation(line: 15, column: 1, scope: !17)
!36 = !DILocation(line: 13, column: 12, scope: !37)
!37 = distinct !DILexicalBlock(scope: !33, file: !3, line: 12, column: 36)
!38 = !{!39, !39, i64 0}
!39 = !{!"double", !40, i64 0}
!40 = !{!"omnipotent char", !41, i64 0}
!41 = !{!"Simple C++ TBAA"}
!42 = !DILocation(line: 13, column: 17, scope: !37)
!43 = !DILocation(line: 13, column: 5, scope: !37)
!44 = !DILocation(line: 13, column: 10, scope: !37)
!45 = !DILocation(line: 12, column: 30, scope: !33)
!46 = distinct !{!46, !34, !47, !48, !49}
!47 = !DILocation(line: 14, column: 3, scope: !29)
!48 = !{!"llvm.loop.mustprogress"}
!49 = !{!"llvm.loop.unroll.disable"}
!50 = distinct !DISubprogram(name: "dsquare", linkageName: "_Z7dsquarePKdPdS1_S0_i", scope: !3, file: !3, line: 17, type: !51, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !53)
!51 = !DISubroutineType(types: !52)
!52 = !{null, !20, !23, !23, !20, !9}
!53 = !{!54, !55, !56, !57, !58}
!54 = !DILocalVariable(name: "x", arg: 1, scope: !50, file: !3, line: 17, type: !20)
!55 = !DILocalVariable(name: "x_b", arg: 2, scope: !50, file: !3, line: 17, type: !23)
!56 = !DILocalVariable(name: "y", arg: 3, scope: !50, file: !3, line: 17, type: !23)
!57 = !DILocalVariable(name: "y_b", arg: 4, scope: !50, file: !3, line: 17, type: !20)
!58 = !DILocalVariable(name: "size", arg: 5, scope: !50, file: !3, line: 17, type: !9)
!59 = !DILocation(line: 0, scope: !50)
!60 = !DILocation(line: 20, column: 9, scope: !50)
!61 = !{!62, !62, i64 0}
!62 = !{!"int", !40, i64 0}
!63 = !DILocation(line: 22, column: 9, scope: !50)
!64 = !DILocation(line: 19, column: 12, scope: !50)
!65 = !DILocation(line: 19, column: 5, scope: !50)
!66 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 24, type: !67, scopeLine: 24, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !69)
!67 = !DISubroutineType(types: !68)
!68 = !{!9}
!69 = !{!70, !72, !76, !77, !78, !79, !81}
!70 = !DILocalVariable(name: "n", scope: !66, file: !3, line: 25, type: !71)
!71 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
!72 = !DILocalVariable(name: "x", scope: !66, file: !3, line: 26, type: !73)
!73 = !DICompositeType(tag: DW_TAG_array_type, baseType: !22, size: 320, elements: !74)
!74 = !{!75}
!75 = !DISubrange(count: 5)
!76 = !DILocalVariable(name: "x_b", scope: !66, file: !3, line: 27, type: !73)
!77 = !DILocalVariable(name: "y", scope: !66, file: !3, line: 28, type: !73)
!78 = !DILocalVariable(name: "y_b", scope: !66, file: !3, line: 29, type: !73)
!79 = !DILocalVariable(name: "i", scope: !80, file: !3, line: 31, type: !9)
!80 = distinct !DILexicalBlock(scope: !66, file: !3, line: 31, column: 5)
!81 = !DILocalVariable(name: "i", scope: !82, file: !3, line: 38, type: !9)
!82 = distinct !DILexicalBlock(scope: !66, file: !3, line: 38, column: 5)
!83 = !DILocation(line: 0, scope: !66)
!84 = !DILocation(line: 26, column: 5, scope: !66)
!85 = !DILocation(line: 26, column: 12, scope: !66)
!86 = !DILocation(line: 27, column: 5, scope: !66)
!87 = !DILocation(line: 27, column: 12, scope: !66)
!88 = !DILocation(line: 28, column: 5, scope: !66)
!89 = !DILocation(line: 28, column: 12, scope: !66)
!90 = !DILocation(line: 29, column: 5, scope: !66)
!91 = !DILocation(line: 29, column: 12, scope: !66)
!92 = !DILocation(line: 0, scope: !80)
!93 = !DILocation(line: 31, column: 5, scope: !80)
!94 = !DILocation(line: 36, column: 13, scope: !66)
!95 = !DILocation(line: 36, column: 16, scope: !66)
!96 = !DILocation(line: 36, column: 21, scope: !66)
!97 = !DILocation(line: 36, column: 24, scope: !66)
!98 = !DILocation(line: 0, scope: !50, inlinedAt: !99)
!99 = distinct !DILocation(line: 36, column: 5, scope: !66)
!100 = !DILocation(line: 20, column: 9, scope: !50, inlinedAt: !99)
!101 = !DILocation(line: 22, column: 9, scope: !50, inlinedAt: !99)
!102 = !DILocation(line: 19, column: 12, scope: !50, inlinedAt: !99)
!103 = !DILocation(line: 0, scope: !82)
!104 = !DILocation(line: 38, column: 5, scope: !82)
!105 = !DILocation(line: 32, column: 16, scope: !106)
!106 = distinct !DILexicalBlock(scope: !107, file: !3, line: 31, column: 28)
!107 = distinct !DILexicalBlock(scope: !80, file: !3, line: 31, column: 5)
!108 = !DILocation(line: 32, column: 14, scope: !106)
!109 = !DILocation(line: 32, column: 7, scope: !106)
!110 = !DILocation(line: 32, column: 12, scope: !106)
!111 = !DILocation(line: 33, column: 7, scope: !106)
!112 = !DILocation(line: 33, column: 14, scope: !106)
!113 = !DILocation(line: 31, column: 19, scope: !107)
!114 = distinct !{!114, !93, !115, !48, !49}
!115 = !DILocation(line: 34, column: 5, scope: !80)
!116 = !DILocation(line: 41, column: 1, scope: !66)
!117 = !DILocation(line: 39, column: 52, scope: !118)
!118 = distinct !DILexicalBlock(scope: !119, file: !3, line: 38, column: 28)
!119 = distinct !DILexicalBlock(scope: !82, file: !3, line: 38, column: 5)
!120 = !DILocation(line: 39, column: 58, scope: !118)
!121 = !DILocation(line: 39, column: 70, scope: !118)
!122 = !DILocation(line: 39, column: 7, scope: !118)
!123 = !DILocation(line: 38, column: 24, scope: !119)
!124 = !DILocation(line: 38, column: 19, scope: !119)
!125 = distinct !{!125, !104, !126, !48, !49}
!126 = !DILocation(line: 40, column: 5, scope: !82)
!127 = distinct !DISubprogram(name: "square", linkageName: "_Z6squarePKdPdi", scope: !3, file: !3, line: 11, type: !18, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !128)
!128 = !{!129, !130, !131, !132}
!129 = !DILocalVariable(name: "x", arg: 1, scope: !127, file: !3, line: 11, type: !20)
!130 = !DILocalVariable(name: "y", arg: 2, scope: !127, file: !3, line: 11, type: !23)
!131 = !DILocalVariable(name: "size", arg: 3, scope: !127, file: !3, line: 11, type: !9)
!132 = !DILocalVariable(name: "i", scope: !133, file: !3, line: 12, type: !9)
!133 = distinct !DILexicalBlock(scope: !127, file: !3, line: 12, column: 3)
!134 = !DILocation(line: 0, scope: !127)
!135 = !DILocation(line: 0, scope: !133)
!136 = !DILocation(line: 12, column: 20, scope: !137)
!137 = distinct !DILexicalBlock(scope: !133, file: !3, line: 12, column: 3)
!138 = !DILocation(line: 12, column: 3, scope: !133)
!139 = !DILocation(line: 15, column: 1, scope: !127)
!140 = !DILocation(line: 13, column: 12, scope: !141)
!141 = distinct !DILexicalBlock(scope: !137, file: !3, line: 12, column: 36)
!142 = !DILocation(line: 13, column: 17, scope: !141)
!143 = !DILocation(line: 13, column: 5, scope: !141)
!144 = !DILocation(line: 13, column: 10, scope: !141)
!145 = distinct !{!145, !138, !146, !48, !49}
!146 = !DILocation(line: 14, column: 3, scope: !133)
!147 = distinct !DISubprogram(name: "square", linkageName: "_Z6squarePKdPdi", scope: !3, file: !3, line: 11, type: !18, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !148)
!148 = !{!149, !150, !151, !152}
!149 = !DILocalVariable(name: "x", arg: 1, scope: !147, file: !3, line: 11, type: !20)
!150 = !DILocalVariable(name: "y", arg: 2, scope: !147, file: !3, line: 11, type: !23)
!151 = !DILocalVariable(name: "size", arg: 3, scope: !147, file: !3, line: 11, type: !9)
!152 = !DILocalVariable(name: "i", scope: !153, file: !3, line: 12, type: !9)
!153 = distinct !DILexicalBlock(scope: !147, file: !3, line: 12, column: 3)
!154 = !DILocation(line: 12, column: 20, scope: !155)
!155 = distinct !DILexicalBlock(scope: !153, file: !3, line: 12, column: 3)
!156 = !DILocation(line: 12, column: 3, scope: !153)
!157 = distinct !{}
!158 = !DILocation(line: 15, column: 1, scope: !147)
!159 = !DILocation(line: 13, column: 12, scope: !160)
!160 = distinct !DILexicalBlock(scope: !155, file: !3, line: 12, column: 36)
!161 = !DILocation(line: 13, column: 17, scope: !160)
!162 = !DILocation(line: 13, column: 5, scope: !160)
!163 = !DILocation(line: 13, column: 10, scope: !160)
!164 = !{i64 8}
!165 = distinct !{}
!166 = distinct !{!166, !156, !167, !48, !49}
!167 = !DILocation(line: 14, column: 3, scope: !153)
!168 = !DILocation(line: 0, scope: !147)
ZuseZ4 commented 2 years ago

Any updates here?