EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
453 stars 63 forks source link

Enzyme.jl hits an unknown intrisic error when diffing through sincos. #219

Closed sefffal closed 2 years ago

sefffal commented 2 years ago

I came across this while trying to differentiate through AstroLib's Kepler solver.

Here's a MWE:

function test5(theta)                                                                                                                                                                                                                 
   a, b = sincos(theta)                                                                                                                                                                                                              
   return a + b                                                                                                                                                                                                                      
end
Enzyme.autodiff(test5, Active(1.0))

Gives:

...
cannot handle (augmented) unknown intrinsic
  %58 = call i128 @llvm.abs.i128(i128 %54, i1 noundef false), !dbg !159ERROR: LLVM error: (augmented) unknown intrinsic
Stacktrace:
...
Expand for full stack trace
ulia> Enzyme.autodiff(test5, Active(1.0))
; Function Attrs: nosync uwtable
define internal fastcc void @preprocess_julia_paynehanek_23245({ i64, [2 x double] }* noalias nocapture noundef nonnull writeonly sret({ i64, [2 x double] }) align 8 dereferenceable(24) %0, double %1) unnamed_addr #7 !dbg !1662 {        
top:
  %2 = call {}*** @julia.get_pgcstack()
  %3 = bitcast double %1 to i64, !dbg !1663
  %4 = and i64 %3, 4503599627370495, !dbg !1665
  %5 = or i64 %4, 4503599627370496, !dbg !1667
  %6 = lshr i64 %3, 52, !dbg !1668
  %7 = and i64 %6, 2047, !dbg !1668
  %8 = add nsw i64 %7, -1075, !dbg !1671
  %9 = ashr i64 %8, 6, !dbg !1673
  %10 = and i64 %8, 63, !dbg !1676
  %.not = icmp eq i64 %10, 0, !dbg !1678
  br i1 %.not, label %L28, label %L38, !dbg !1679

L28:                                              ; preds = %top
  %11 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* addrspacecast (i64 addrspace(13)** inttoptr (i64 346915328 to i64 addrspace(13)**) to i64 addrspace(13)* addrspace(11)*), align 512, !dbg !1680, !tbaa !566, !nonnull !4
  %12 = getelementptr inbounds i64, i64 addrspace(13)* %11, i64 %9, !dbg !1680
  %13 = load i64, i64 addrspace(13)* %12, align 8, !dbg !1680, !tbaa !571
  %14 = add nsw i64 %9, 1, !dbg !1682
  %15 = getelementptr inbounds i64, i64 addrspace(13)* %11, i64 %14, !dbg !1682
  %16 = load i64, i64 addrspace(13)* %15, align 8, !dbg !1682, !tbaa !571
  %17 = add nsw i64 %9, 2, !dbg !1684
  %18 = getelementptr inbounds i64, i64 addrspace(13)* %11, i64 %17, !dbg !1684
  %19 = load i64, i64 addrspace(13)* %18, align 8, !dbg !1684, !tbaa !571
  br label %L108, !dbg !1685

L38:                                              ; preds = %top
  %20 = icmp ugt i64 %7, 1074, !dbg !1686
  br i1 %20, label %L41, label %L38.L51_crit_edge, !dbg !1687

L38.L51_crit_edge:                                ; preds = %L38
  %.pre = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* addrspacecast (i64 addrspace(13)** inttoptr (i64 346915328 to i64 addrspace(13)**) to i64 addrspace(13)* addrspace(11)*), align 512, !dbg !1688, !tbaa !566
  br label %L51, !dbg !1687

L41:                                              ; preds = %L38
  %21 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* addrspacecast (i64 addrspace(13)** inttoptr (i64 346915328 to i64 addrspace(13)**) to i64 addrspace(13)* addrspace(11)*), align 512, !dbg !1688, !tbaa !566, !nonnull !4
  %22 = getelementptr inbounds i64, i64 addrspace(13)* %21, i64 %9, !dbg !1688
  %23 = load i64, i64 addrspace(13)* %22, align 8, !dbg !1688, !tbaa !571
  %24 = shl i64 %23, %10, !dbg !1689
  br label %L51, !dbg !1690

L51:                                              ; preds = %L41, %L38.L51_crit_edge
  %25 = phi i64 addrspace(13)* [ %21, %L41 ], [ %.pre, %L38.L51_crit_edge ], !dbg !1688
  %value_phi5 = phi i64 [ %24, %L41 ], [ 0, %L38.L51_crit_edge ]
  %26 = add nsw i64 %9, 1, !dbg !1688
  %27 = getelementptr inbounds i64, i64 addrspace(13)* %25, i64 %26, !dbg !1688
  %28 = load i64, i64 addrspace(13)* %27, align 8, !dbg !1688, !tbaa !571
  %29 = sub nuw nsw i64 64, %10, !dbg !1691
  %30 = lshr i64 %28, %29, !dbg !1692
  %31 = or i64 %30, %value_phi5, !dbg !1694
  %32 = add nsw i64 %9, 2, !dbg !1695
  %33 = getelementptr inbounds i64, i64 addrspace(13)* %25, i64 %32, !dbg !1695
  %34 = load i64, i64 addrspace(13)* %33, align 8, !dbg !1695, !tbaa !571
  %35 = call i64 @llvm.fshl.i64(i64 %28, i64 %34, i64 %10), !dbg !1697
  %36 = add nsw i64 %9, 3, !dbg !1698
  %37 = getelementptr inbounds i64, i64 addrspace(13)* %25, i64 %36, !dbg !1698
  %38 = load i64, i64 addrspace(13)* %37, align 8, !dbg !1698, !tbaa !571
  %39 = call i64 @llvm.fshl.i64(i64 %34, i64 %38, i64 %10), !dbg !1700
  br label %L108, !dbg !1700

L108:                                             ; preds = %L51, %L28
  %value_phi = phi i64 [ %19, %L28 ], [ %39, %L51 ]
  %value_phi1 = phi i64 [ %16, %L28 ], [ %35, %L51 ]
  %value_phi2 = phi i64 [ %13, %L28 ], [ %31, %L51 ]
  %40 = mul i64 %value_phi2, %5, !dbg !1701
  %41 = zext i64 %40 to i128, !dbg !1703
  %42 = shl nuw i128 %41, 64, !dbg !1705
  %43 = zext i64 %5 to i128, !dbg !1707
  %44 = zext i64 %value_phi1 to i128, !dbg !1707
  %45 = mul nuw nsw i128 %44, %43, !dbg !1713
  %46 = zext i64 %value_phi to i128, !dbg !1714
  %47 = mul nuw nsw i128 %46, %43, !dbg !1720
  %48 = lshr i128 %47, 64, !dbg !1721
  %49 = add nuw nsw i128 %48, %45, !dbg !1723
  %50 = add i128 %49, %42, !dbg !1723
  %51 = icmp sgt i64 %3, -1, !dbg !1726
  %52 = sub i128 0, %50, !dbg !1731
  %53 = select i1 %51, i128 %50, i128 %52, !dbg !1729
  %54 = shl i128 %53, 2, !dbg !1732
  %.not10 = icmp eq i128 %54, 0, !dbg !1735
  br i1 %.not10, label %L218, label %L150, !dbg !1737

L150:                                             ; preds = %L108
  %55 = lshr i128 %53, 62, !dbg !1739
  %56 = trunc i128 %55 to i64, !dbg !1739
  %57 = and i64 %56, -9223372036854775808, !dbg !1739
  %58 = call i128 @llvm.abs.i128(i128 %54, i1 noundef false), !dbg !1742
  %59 = call i128 @llvm.ctlz.i128(i128 %58, i1 noundef false), !dbg !1745, !range !658
  %60 = trunc i128 %59 to i64, !dbg !1747
  %.neg = add nsw i64 %60, -102, !dbg !1748
  %61 = sub nsw i64 102, %60, !dbg !1748
  %62 = icmp ugt i64 %60, 102, !dbg !1750
  %63 = zext i64 %61 to i128, !dbg !1752
  %64 = lshr i128 %58, %63, !dbg !1752
  %65 = icmp ugt i64 %61, 127, !dbg !1752
  %66 = select i1 %65, i128 0, i128 %64, !dbg !1752
  %67 = zext i64 %.neg to i128, !dbg !1753
  %68 = shl i128 %58, %67, !dbg !1753
  %69 = icmp ugt i64 %.neg, 127, !dbg !1753
  %70 = select i1 %69, i128 0, i128 %68, !dbg !1753
  %71 = select i1 %62, i128 %70, i128 %66, !dbg !1751
  %72 = trunc i128 %71 to i64, !dbg !1754
  %73 = shl i64 %72, 27, !dbg !1755
  %.neg11 = mul nsw i64 %60, -4503599627370496, !dbg !1757
  %74 = add nsw i64 %.neg11, 4598175219545276416, !dbg !1757
  %75 = add i64 %74, %73, !dbg !1760
  %76 = or i64 %75, %57, !dbg !1762
  %77 = bitcast i64 %76 to double, !dbg !1763
  %78 = zext i64 %73 to i128, !dbg !1764
  %.neg12 = add nsw i64 %60, -75, !dbg !1767
  %79 = sub nsw i64 75, %60, !dbg !1767
  %80 = icmp ugt i64 %60, 75, !dbg !1768
  %81 = zext i64 %79 to i128, !dbg !1770
  %82 = shl i128 %78, %81, !dbg !1770
  %83 = icmp ugt i64 %79, 127, !dbg !1770
  %84 = select i1 %83, i128 0, i128 %82, !dbg !1770
  %85 = zext i64 %.neg12 to i128, !dbg !1771
  %86 = lshr i128 %78, %85, !dbg !1771
  %87 = icmp ugt i64 %.neg12, 127, !dbg !1771
  %88 = select i1 %87, i128 0, i128 %86, !dbg !1771
  %89 = select i1 %80, i128 %88, i128 %84, !dbg !1769
  %90 = sub i128 %58, %89, !dbg !1767
  %.not13 = icmp eq i128 %90, 0, !dbg !1772
  br i1 %.not13, label %L218, label %L196, !dbg !1775

L196:                                             ; preds = %L150
  %91 = call i128 @llvm.ctlz.i128(i128 %90, i1 noundef true), !dbg !1776, !range !658
  %92 = trunc i128 %91 to i64, !dbg !1778
  %.neg14 = add nsw i64 %92, -75, !dbg !1779
  %93 = sub nsw i64 75, %92, !dbg !1779
  %94 = icmp ugt i64 %92, 75, !dbg !1781
  %95 = zext i64 %93 to i128, !dbg !1783
  %96 = lshr i128 %90, %95, !dbg !1783
  %97 = icmp ugt i64 %93, 127, !dbg !1783
  %98 = select i1 %97, i128 0, i128 %96, !dbg !1783
  %99 = zext i64 %.neg14 to i128, !dbg !1784
  %100 = shl i128 %90, %99, !dbg !1784
  %101 = icmp ugt i64 %.neg14, 127, !dbg !1784
  %102 = select i1 %101, i128 0, i128 %100, !dbg !1784
  %103 = select i1 %94, i128 %102, i128 %98, !dbg !1782
  %104 = trunc i128 %103 to i64, !dbg !1785
  %.neg15 = mul nsw i64 %92, -4503599627370496, !dbg !1786
  %105 = add nsw i64 %.neg15, 4598175219545276416, !dbg !1786
  %106 = add i64 %105, %104, !dbg !1789
  %107 = or i64 %106, %57, !dbg !1791
  %108 = bitcast i64 %107 to double, !dbg !1792
  br label %L218, !dbg !1793

L218:                                             ; preds = %L196, %L150, %L108
  %value_phi3 = phi double [ %77, %L196 ], [ 0.000000e+00, %L108 ], [ %77, %L150 ]
  %value_phi4 = phi double [ %108, %L196 ], [ 0.000000e+00, %L108 ], [ 0.000000e+00, %L150 ]
  %109 = lshr i128 %53, 125, !dbg !1794
  %110 = trunc i128 %109 to i64, !dbg !1797
  %111 = add nuw nsw i64 %110, 1, !dbg !1798
  %112 = lshr i64 %111, 1, !dbg !1799
  %113 = fadd double %value_phi3, %value_phi4, !dbg !1800
  %114 = fmul double %113, 0x3FF921FB54442D18, !dbg !1802
  %115 = fmul double %value_phi3, 0x3FF921FB58000000, !dbg !1803
  %116 = fsub double %115, %114, !dbg !1805
  %117 = fmul double %value_phi3, 0x3E4DDE973DCB3B3A, !dbg !1803
  %118 = fsub double %116, %117, !dbg !1806
  %119 = fmul double %value_phi4, 0x3FF921FB58000000, !dbg !1803
  %120 = fadd double %119, %118, !dbg !1806
  %121 = fmul double %value_phi4, 0x3E4DDE973DCB3B3A, !dbg !1803
  %122 = fsub double %120, %121, !dbg !1806
  %.sroa.0.0..sroa_idx = getelementptr inbounds { i64, [2 x double] }, { i64, [2 x double] }* %0, i64 0, i32 0, !dbg !1807
  store i64 %112, i64* %.sroa.0.0..sroa_idx, align 8, !dbg !1807
  %.sroa.2.sroa.0.0..sroa.2.0..sroa_cast.sroa_idx = getelementptr inbounds { i64, [2 x double] }, { i64, [2 x double] }* %0, i64 0, i32 1, i64 0, !dbg !1807
  store double %114, double* %.sroa.2.sroa.0.0..sroa.2.0..sroa_cast.sroa_idx, align 8, !dbg !1807
  %.sroa.2.sroa.2.0..sroa.2.0..sroa_cast.sroa_idx9 = getelementptr inbounds { i64, [2 x double] }, { i64, [2 x double] }* %0, i64 0, i32 1, i64 1, !dbg !1807
  store double %122, double* %.sroa.2.sroa.2.0..sroa.2.0..sroa_cast.sroa_idx9, align 8, !dbg !1807
  ret void, !dbg !1807
}

; Function Attrs: nosync uwtable
define internal fastcc { i8* } @fakeaugmented_julia_paynehanek_23245({ i64, [2 x double] }* noalias nocapture noundef nonnull writeonly sret({ i64, [2 x double] }) align 8 dereferenceable(24) %0, { i64, [2 x double] }* nocapture %"'", double %1) unnamed_addr #7 !dbg !1808 {
top:
  %_replacementA = phi {}***
  %2 = bitcast double %1 to i64, !dbg !1809
  %3 = and i64 %2, 4503599627370495, !dbg !1811
  %4 = or i64 %3, 4503599627370496, !dbg !1813
  %5 = lshr i64 %2, 52, !dbg !1814
  %6 = and i64 %5, 2047, !dbg !1814
  %7 = add nsw i64 %6, -1075, !dbg !1817
  %8 = ashr i64 %7, 6, !dbg !1819
  %9 = and i64 %7, 63, !dbg !1822
  %.not = icmp eq i64 %9, 0, !dbg !1824
  br i1 %.not, label %L28, label %L38, !dbg !1825

L28:                                              ; preds = %top
  %10 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* addrspacecast (i64 addrspace(13)** inttoptr (i64 346915328 to i64 addrspace(13)**) to i64 addrspace(13)* addrspace(11)*), align 512, !dbg !1826, !tbaa !566, !nonnull !4  
  %11 = getelementptr inbounds i64, i64 addrspace(13)* %10, i64 %8, !dbg !1826
  %12 = load i64, i64 addrspace(13)* %11, align 8, !dbg !1826, !tbaa !571
  %13 = add nsw i64 %8, 1, !dbg !1828
  %14 = getelementptr inbounds i64, i64 addrspace(13)* %10, i64 %13, !dbg !1828
  %15 = load i64, i64 addrspace(13)* %14, align 8, !dbg !1828, !tbaa !571
  %16 = add nsw i64 %8, 2, !dbg !1830
  %17 = getelementptr inbounds i64, i64 addrspace(13)* %10, i64 %16, !dbg !1830
  %18 = load i64, i64 addrspace(13)* %17, align 8, !dbg !1830, !tbaa !571
  br label %L108, !dbg !1831

L38:                                              ; preds = %top
  %19 = icmp ugt i64 %6, 1074, !dbg !1832
  br i1 %19, label %L41, label %L38.L51_crit_edge, !dbg !1833

L38.L51_crit_edge:                                ; preds = %L38
  %.pre = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* addrspacecast (i64 addrspace(13)** inttoptr (i64 346915328 to i64 addrspace(13)**) to i64 addrspace(13)* addrspace(11)*), align 512, !dbg !1834, !tbaa !566
  br label %L51, !dbg !1833

L41:                                              ; preds = %L38
  %20 = load i64 addrspace(13)*, i64 addrspace(13)* addrspace(11)* addrspacecast (i64 addrspace(13)** inttoptr (i64 346915328 to i64 addrspace(13)**) to i64 addrspace(13)* addrspace(11)*), align 512, !dbg !1834, !tbaa !566, !nonnull !4  
  %21 = getelementptr inbounds i64, i64 addrspace(13)* %20, i64 %8, !dbg !1834
  %22 = load i64, i64 addrspace(13)* %21, align 8, !dbg !1834, !tbaa !571
  %23 = shl i64 %22, %9, !dbg !1835
  br label %L51, !dbg !1836

L51:                                              ; preds = %L41, %L38.L51_crit_edge
  %24 = phi i64 addrspace(13)* [ %20, %L41 ], [ %.pre, %L38.L51_crit_edge ], !dbg !1834
  %value_phi5 = phi i64 [ %23, %L41 ], [ 0, %L38.L51_crit_edge ]
  %25 = add nsw i64 %8, 1, !dbg !1834
  %26 = getelementptr inbounds i64, i64 addrspace(13)* %24, i64 %25, !dbg !1834
  %27 = load i64, i64 addrspace(13)* %26, align 8, !dbg !1834, !tbaa !571
  %28 = sub nuw nsw i64 64, %9, !dbg !1837
  %29 = lshr i64 %27, %28, !dbg !1838
  %30 = or i64 %29, %value_phi5, !dbg !1840
  %31 = add nsw i64 %8, 2, !dbg !1841
  %32 = getelementptr inbounds i64, i64 addrspace(13)* %24, i64 %31, !dbg !1841
  %33 = load i64, i64 addrspace(13)* %32, align 8, !dbg !1841, !tbaa !571
  %34 = call i64 @llvm.fshl.i64(i64 %27, i64 %33, i64 %9), !dbg !1843
  %35 = add nsw i64 %8, 3, !dbg !1844
  %36 = getelementptr inbounds i64, i64 addrspace(13)* %24, i64 %35, !dbg !1844
  %37 = load i64, i64 addrspace(13)* %36, align 8, !dbg !1844, !tbaa !571
  %38 = call i64 @llvm.fshl.i64(i64 %33, i64 %37, i64 %9), !dbg !1846
  br label %L108, !dbg !1846

L108:                                             ; preds = %L51, %L28
  %value_phi = phi i64 [ %18, %L28 ], [ %38, %L51 ]
  %value_phi1 = phi i64 [ %15, %L28 ], [ %34, %L51 ]
  %value_phi2 = phi i64 [ %12, %L28 ], [ %30, %L51 ]
  %39 = mul i64 %value_phi2, %4, !dbg !1847
  %40 = zext i64 %39 to i128, !dbg !1849
  %41 = shl nuw i128 %40, 64, !dbg !1851
  %42 = zext i64 %4 to i128, !dbg !1853
  %43 = zext i64 %value_phi1 to i128, !dbg !1853
  %44 = mul nuw nsw i128 %43, %42, !dbg !1859
  %45 = zext i64 %value_phi to i128, !dbg !1860
  %46 = mul nuw nsw i128 %45, %42, !dbg !1866
  %47 = lshr i128 %46, 64, !dbg !1867
  %48 = add nuw nsw i128 %47, %44, !dbg !1869
  %49 = add i128 %48, %41, !dbg !1869
  %50 = icmp sgt i64 %2, -1, !dbg !1872
  %51 = sub i128 0, %49, !dbg !1877
  %52 = select i1 %50, i128 %49, i128 %51, !dbg !1875
  %53 = shl i128 %52, 2, !dbg !1878
  %.not10 = icmp eq i128 %53, 0, !dbg !1881
  br i1 %.not10, label %L218, label %L150, !dbg !1883

L150:                                             ; preds = %L108
  %54 = lshr i128 %52, 62, !dbg !1885
  %55 = trunc i128 %54 to i64, !dbg !1885
  %56 = and i64 %55, -9223372036854775808, !dbg !1885
  %57 = call i128 @llvm.abs.i128(i128 %53, i1 noundef false), !dbg !1888
  %58 = call i128 @llvm.ctlz.i128(i128 %57, i1 noundef false), !dbg !1891, !range !658
  %59 = trunc i128 %58 to i64, !dbg !1893
  %.neg = add nsw i64 %59, -102, !dbg !1894
  %60 = sub nsw i64 102, %59, !dbg !1894
  %61 = icmp ugt i64 %59, 102, !dbg !1896
  %62 = zext i64 %60 to i128, !dbg !1898
  %63 = lshr i128 %57, %62, !dbg !1898
  %64 = icmp ugt i64 %60, 127, !dbg !1898
  %65 = select i1 %64, i128 0, i128 %63, !dbg !1898
  %66 = zext i64 %.neg to i128, !dbg !1899
  %67 = shl i128 %57, %66, !dbg !1899
  %68 = icmp ugt i64 %.neg, 127, !dbg !1899
  %69 = select i1 %68, i128 0, i128 %67, !dbg !1899
  %70 = select i1 %61, i128 %69, i128 %65, !dbg !1897
  %71 = trunc i128 %70 to i64, !dbg !1900
  %72 = shl i64 %71, 27, !dbg !1901
  %.neg11 = mul nsw i64 %59, -4503599627370496, !dbg !1903
  %73 = add nsw i64 %.neg11, 4598175219545276416, !dbg !1903
  %74 = add i64 %73, %72, !dbg !1906
  %75 = or i64 %74, %56, !dbg !1908
  %76 = bitcast i64 %75 to double, !dbg !1909
  %77 = zext i64 %72 to i128, !dbg !1910
  %.neg12 = add nsw i64 %59, -75, !dbg !1913
  %78 = sub nsw i64 75, %59, !dbg !1913
  %79 = icmp ugt i64 %59, 75, !dbg !1914
  %80 = zext i64 %78 to i128, !dbg !1916
  %81 = shl i128 %77, %80, !dbg !1916
  %82 = icmp ugt i64 %78, 127, !dbg !1916
  %83 = select i1 %82, i128 0, i128 %81, !dbg !1916
  %84 = zext i64 %.neg12 to i128, !dbg !1917
  %85 = lshr i128 %77, %84, !dbg !1917
  %86 = icmp ugt i64 %.neg12, 127, !dbg !1917
  %87 = select i1 %86, i128 0, i128 %85, !dbg !1917
  %88 = select i1 %79, i128 %87, i128 %83, !dbg !1915
  %89 = sub i128 %57, %88, !dbg !1913
  %.not13 = icmp eq i128 %89, 0, !dbg !1918
  br i1 %.not13, label %L218, label %L196, !dbg !1921

L196:                                             ; preds = %L150
  %90 = call i128 @llvm.ctlz.i128(i128 %89, i1 noundef true), !dbg !1922, !range !658
  %91 = trunc i128 %90 to i64, !dbg !1924
  %.neg14 = add nsw i64 %91, -75, !dbg !1925
  %92 = sub nsw i64 75, %91, !dbg !1925
  %93 = icmp ugt i64 %91, 75, !dbg !1927
  %94 = zext i64 %92 to i128, !dbg !1929
  %95 = lshr i128 %89, %94, !dbg !1929
  %96 = icmp ugt i64 %92, 127, !dbg !1929
  %97 = select i1 %96, i128 0, i128 %95, !dbg !1929
  %98 = zext i64 %.neg14 to i128, !dbg !1930
  %99 = shl i128 %89, %98, !dbg !1930
  %100 = icmp ugt i64 %.neg14, 127, !dbg !1930
  %101 = select i1 %100, i128 0, i128 %99, !dbg !1930
  %102 = select i1 %93, i128 %101, i128 %97, !dbg !1928
  %103 = trunc i128 %102 to i64, !dbg !1931
  %.neg15 = mul nsw i64 %91, -4503599627370496, !dbg !1932
  %104 = add nsw i64 %.neg15, 4598175219545276416, !dbg !1932
  %105 = add i64 %104, %103, !dbg !1935
  %106 = or i64 %105, %56, !dbg !1937
  %107 = bitcast i64 %106 to double, !dbg !1938
  br label %L218, !dbg !1939

L218:                                             ; preds = %L196, %L150, %L108
  %value_phi3 = phi double [ %76, %L196 ], [ 0.000000e+00, %L108 ], [ %76, %L150 ]
  %value_phi4 = phi double [ %107, %L196 ], [ 0.000000e+00, %L108 ], [ 0.000000e+00, %L150 ]
  %108 = lshr i128 %52, 125, !dbg !1940
  %109 = trunc i128 %108 to i64, !dbg !1943
  %110 = add nuw nsw i64 %109, 1, !dbg !1944
  %111 = lshr i64 %110, 1, !dbg !1945
  %112 = fadd double %value_phi3, %value_phi4, !dbg !1946
  %113 = fmul double %112, 0x3FF921FB54442D18, !dbg !1948
  %114 = fmul double %value_phi3, 0x3FF921FB58000000, !dbg !1949
  %115 = fsub double %114, %113, !dbg !1951
  %116 = fmul double %value_phi3, 0x3E4DDE973DCB3B3A, !dbg !1949
  %117 = fsub double %115, %116, !dbg !1952
  %118 = fmul double %value_phi4, 0x3FF921FB58000000, !dbg !1949
  %119 = fadd double %118, %117, !dbg !1952
  %120 = fmul double %value_phi4, 0x3E4DDE973DCB3B3A, !dbg !1949
  %121 = fsub double %119, %120, !dbg !1952
  %.sroa.0.0..sroa_idx = getelementptr inbounds { i64, [2 x double] }, { i64, [2 x double] }* %0, i64 0, i32 0, !dbg !1953
  store i64 %111, i64* %.sroa.0.0..sroa_idx, align 8, !dbg !1953
  %.sroa.2.sroa.0.0..sroa.2.0..sroa_cast.sroa_idx = getelementptr inbounds { i64, [2 x double] }, { i64, [2 x double] }* %0, i64 0, i32 1, i64 0, !dbg !1953
  store double %113, double* %.sroa.2.sroa.0.0..sroa.2.0..sroa_cast.sroa_idx, align 8, !dbg !1953
  %.sroa.2.sroa.2.0..sroa.2.0..sroa_cast.sroa_idx9 = getelementptr inbounds { i64, [2 x double] }, { i64, [2 x double] }* %0, i64 0, i32 1, i64 1, !dbg !1953
  store double %121, double* %.sroa.2.sroa.2.0..sroa.2.0..sroa_cast.sroa_idx9, align 8, !dbg !1953
  ret { i8* } undef, !dbg !1953

allocsForInversion:                               ; No predecessors!
}

cannot handle (augmented) unknown intrinsic
  %58 = call i128 @llvm.abs.i128(i128 %54, i1 noundef false), !dbg !159ERROR: LLVM error: (augmented) unknown intrinsic
Stacktrace:
  [1] handle_error(reason::Cstring)
    @ LLVM C:\Users\William\.julia\packages\LLVM\vQ98J\src\core\context.jl:105
  [2] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, topLevel::Bool, additionalArg::Ptr{Nothing}, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool, postOpt::Bool)
    @ Enzyme.API C:\Users\William\.julia\packages\Enzyme\i3uGf\src\api.jl:108
  [3] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(test5), Tuple{Float64}}}, mod::LLVM.Module, primalf::LLVM.Function, adjoint::GPUCompiler.FunctionSpec{typeof(test5), Tuple{Active{Float64}}}, split::Bool, parallel::Bool, actualRetType::Type, dupClosure::Bool)
    @ Enzyme.Compiler C:\Users\William\.julia\packages\Enzyme\i3uGf\src\compiler.jl:1740
  [4] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(test5), Tuple{Float64}}}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler C:\Users\William\.julia\packages\Enzyme\i3uGf\src\compiler.jl:2282
  [5] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(test5), Tuple{Float64}}})
    @ Enzyme.Compiler C:\Users\William\.julia\packages\Enzyme\i3uGf\src\compiler.jl:2623
  [6] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(Enzyme.Compiler._thunk), linker::typeof(Enzyme.Compiler._link))
    @ GPUCompiler C:\Users\William\.julia\packages\GPUCompiler\1Ajz2\src\cache.jl:90
  [7] thunk(f::typeof(test5), df::Nothing, ::Type{Active{Float64}}, tt::Type{Tuple{Active{Float64}}}, ::Val{false})
    @ Enzyme.Compiler C:\Users\William\.julia\packages\Enzyme\i3uGf\src\compiler.jl:2675
  [8] autodiff
    @ C:\Users\William\.julia\packages\Enzyme\i3uGf\src\Enzyme.jl:186 [inlined]
  [9] autodiff(f::typeof(test5), args::Active{Float64})
    @ Enzyme C:\Users\William\.julia\packages\Enzyme\i3uGf\src\Enzyme.jl:216
 [10] top-level scope
    @ REPL[157]:1

Output of versioninfo():

Julia Version 1.7.0
Commit 3bf9d17731 (2021-11-30 12:12 UTC)
Platform Info:
  OS: Windows (x86_64-w64-mingw32)
  CPU: Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-12.0.1 (ORCJIT, skylake)
Environment:
  JULIA_NUM_THREADS = 1
  JULIA_EDITOR = code
vchuravy commented 2 years ago

Fixed by #224