pauldreik / random_foreach

code for randomly generating all numbers in a range exactly once
Boost Software License 1.0
3 stars 0 forks source link

illegal instruction - crash #3

Closed oschonrock closed 4 years ago

oschonrock commented 4 years ago

I am getting crashes for many of the binaries (see bottom). Some random error messages, some say "illegal instruction".

I am using clang-9.01 on ubuntu 19.10.

Initially thought: Related to AES intrinsics?

I do not have the newest CPU: https://ark.intel.com/content/www/us/en/ark/products/52213/intel-core-i7-2600-processor-8m-cache-up-to-3-80-ghz.html

But it does have

cat /proc/cpuinfo
...
aes
...

here it is running in gdb

gdb ./shootout
...
(gdb) run aes_feistel
Starting program: random_foreach/build/shootout aes_feistel

Program received signal SIGILL, Illegal instruction.
GenericFeistel<Aes32<2>, unsigned int, unsigned short>::GenericFeistel (this=0x7fffffffdbd0, Nbits=30) at random_foreach/GenericFeistel.h:79
79       : m_mask((1ULL << (Nbits / 2)) - 1)

(gdb) bt
#0  GenericFeistel<Aes32<2>, unsigned int, unsigned short>::GenericFeistel (this=0x7fffffffdbd0, Nbits=30) at random_foreach/GenericFeistel.h:79
#1  Aes32<2>::Aes32 (this=0x7fffffffdbd0, Nbits=30) at random_foreach/AesFunc.h:26
#2  feistel_for_each<Aes32<2>, unsigned int, std::random_device, main::$_0&> (M=<optimised out>, rng=..., cb=...) at random_foreach/shootout.cpp:183
#3  main::$_16::operator() (this=<optimised out>) at random_foreach/shootout.cpp:295
#4  std::_Function_handler<void (), main::$_16>::_M_invoke(std::_Any_data const&) (__functor=...) at /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/std_function.h:300
#5  0x00000000004024d6 in std::function<void ()>::operator()() const (this=0xbf7fc0831aac4a80) at /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/std_function.h:690
#6  main (argc=<optimised out>, argv=<optimised out>) at random_foreach/shootout.cpp:337

sequential*, xor, dowhile, random* don't crash. the rest seem to all crash in exactly the same place as above.

Seems it's not just AES, as even Fnv1aCiphers.h (which doesn't seem to use any intrinsics), crashes in the same place.

also compiled with gcc 9.2.1 (default compiler for my system) and that has the same symptoms.

oschonrock commented 4 years ago

OK, it was about my old CPU and the intrinsics, just not the aes ones. I am missing "avx2" "bmi2" and "sha" features. I had to comment out some of the compile features in CMakeLists and then the compile errors started to come and could comment out the relevant parts of the source.

Basically it seems your code requires Haswell (i think that gets you everything) and I have Sandy Bridge.

Here are the changes I needed to make to make it compile and run without crashes:

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 28f7e48..30520bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ project(feistel)

 #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -maes -std=c++2a -O3 -g")
 # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -maes -std=c++2a  -g")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mavx2 -maes -msha -mbmi2 -std=c++1z -O3 -ggdb -fno-omit-frame-pointer -DNDEBUG=")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mavx -maes -std=c++17 -O3 -ggdb -fno-omit-frame-pointer -DNDEBUG=")

 # performance testing
@@ -11,9 +11,9 @@ add_executable(shootout
     shootout.cpp
     LazyFisherYates.h
     AesFunc.h
-    ShaFeistel.h
+    # ShaFeistel.h
     Fnv1aCiphers.h
-    simdfeistel.h
+    # simdfeistel.h
     donothing.cpp)

 # statistical testing
@@ -22,12 +22,12 @@ add_executable(binaryrng

 # statistical testing with big crush
 # http://www.pcg-random.org/posts/how-to-test-with-testu01.html
-add_executable(runbigcrush
-    PlaygroundFeistel.h
-    GenericFeistel.h
-    bigcrush.cpp)
-target_link_libraries(runbigcrush
-    PRIVATE
-   testu01
-   testu01probdist
-   testu01mylib)
+# add_executable(runbigcrush
+#     PlaygroundFeistel.h
+#     GenericFeistel.h
+#     bigcrush.cpp)
+# target_link_libraries(runbigcrush
+#     PRIVATE
+#    testu01
+#    testu01probdist
+#    testu01mylib)
diff --git a/shootout.cpp b/shootout.cpp
index d58cf54..0a5f6f1 100644
--- a/shootout.cpp
+++ b/shootout.cpp
@@ -20,9 +20,9 @@
 #include "GenericFeistel.h"
 #include "LazyFisherYates.h"
 #include "PlaygroundFeistel.h"
-#include "ShaFeistel.h"
+// #include "ShaFeistel.h"
 #include "XoroFeistel.h"
-#include "simdfeistel.h"
+// #include "simdfeistel.h"

 // empty test function in another translation unit
 void
@@ -197,41 +197,41 @@ feistel_for_each(Integer M, URBG&& rng, Callback&& cb)
   std::abort();
 }

-/**
- * like feistel_for_each, but simd parallelized
- */
-template<typename Integer, typename URBG, typename Callback>
-void
-simdfeistel_for_each(Integer M, URBG&& rng, Callback&& cb)
-{
-  // how many bits do we need?
-  int bitsneeded = static_cast<int>(std::ceil(std::log2(M)));
-  // round up to even
-  bitsneeded /= 2;
-  bitsneeded *= 2;
+// /**
+//  * like feistel_for_each, but simd parallelized
+//  */
+// template<typename Integer, typename URBG, typename Callback>
+// void
+// simdfeistel_for_each(Integer M, URBG&& rng, Callback&& cb)
+// {
+//   // how many bits do we need?
+//   int bitsneeded = static_cast<int>(std::ceil(std::log2(M)));
+//   // round up to even
+//   bitsneeded /= 2;
+//   bitsneeded *= 2;

-  if (bitsneeded <= 32) {
-    ParallelFeistel cipher(bitsneeded);
-    cipher.seed(rng);
-    ManyU32 II(0, 1, 2, 3, 4, 5, 6, 7);
-    const ManyU32 ones(1);
-    for (Integer count = 0; count < M; II += ones) {
-      auto ea = cipher.encrypt(II).toArray();
-      for (auto encrypted : ea) {
-        if (encrypted < M) {
-          cb(encrypted);
-          ++count;
-          if (count >= M) {
-            return;
-          }
-        }
-      }
-    }
-    return;
-  }
-  std::puts("implement switching to 64 bit");
-  std::abort();
-}
+//   if (bitsneeded <= 32) {
+//     ParallelFeistel cipher(bitsneeded);
+//     cipher.seed(rng);
+//     ManyU32 II(0, 1, 2, 3, 4, 5, 6, 7);
+//     const ManyU32 ones(1);
+//     for (Integer count = 0; count < M; II += ones) {
+//       auto ea = cipher.encrypt(II).toArray();
+//       for (auto encrypted : ea) {
+//         if (encrypted < M) {
+//           cb(encrypted);
+//           ++count;
+//           if (count >= M) {
+//             return;
+//           }
+//         }
+//       }
+//     }
+//     return;
+//   }
+//   std::puts("implement switching to 64 bit");
+//   std::abort();
+// }

 int
 main(int argc, char* argv[])
@@ -297,9 +297,9 @@ main(int argc, char* argv[])
   functions["aes_feistel_rounds4"] = [&]() {
     feistel_for_each<Aes32<4>>(N, std::random_device{}, work);
   };
-  functions["sha1_feistel"] = [&]() {
-    feistel_for_each<ShaFeistel32<2>>(N, std::random_device{}, work);
-  };
+  // functions["sha1_feistel"] = [&]() {
+  //   feistel_for_each<ShaFeistel32<2>>(N, std::random_device{}, work);
+  // };

   functions["xoro_feistel"] = [&]() {
     feistel_for_each<XoroFeistel32<2>>(N, std::random_device{}, work);
@@ -308,9 +308,9 @@ main(int argc, char* argv[])
     feistel_for_each<PlaygroundFeistel<2>>(N, std::random_device{}, work);
   };

-  functions["simd_feistel"] = [&]() {
-    simdfeistel_for_each(N, std::random_device{}, work);
-  };
+  // functions["simd_feistel"] = [&]() {
+  //   simdfeistel_for_each(N, std::random_device{}, work);
+  // };
   if (algoname == "--list") {
     for (auto& e : functions) {
       std::puts(e.first.c_str());

This gave me the following performance:

time ./shootout sequential
real 0m1.750s

time ./shootout aes_feistel
real 0m5.281s

time ./shootout aes_feistel_rounds4
real 0m9.936s
oschonrock commented 4 years ago

good to clarify that Haswell is required as minimum, but ultimately no issue with the code (except for potential of feature detection perhaps).

So closing this

pauldreik commented 4 years ago

Thanks for your input! I should probably describe this a bit better, or perhaps do some kind of toggle to disable those features. You speed run is interesting, aes got faster on later cpus!