Tasks - Githubissues

	1 Error
:no_entry_sign:	Code style violations detected.

	1 Warning
:warning:	There are code changes, but no corresponding tests. Please include tests if this PR introduces any modifications in behavior.

Code Style Check

Code style violations detected in the following files:

src/parallel.cpp
src/parallel.h
src/qmfunctions/QMFunction.cpp
src/qmfunctions/orbital_utils.cpp
src/qmoperators/two_electron/CoulombPotential.cpp
src/qmoperators/two_electron/ExchangePotentialD1.cpp
src/scf_solver/GroundStateSolver.cpp
src/scf_solver/KAIN.cpp
src/utils/NonlinearMaximizer.cpp
src/utils/RRMaximizer.cpp

Execute one of the following actions and commit again:

Run clang-format on the offending files
Apply the suggested patches with git apply patch.

src/parallel.cpp

--- src/parallel.cpp
+++ src/parallel.cpp
@@ -23,9 +23,9 @@
  * <https://mrchem.readthedocs.io/>
  */

-#include <bits/stdc++.h>
 #include <MRCPP/Printer>
 #include <MRCPP/Timer>
+#include <bits/stdc++.h>

 #include "parallel.h"
 #include "qmfunctions/ComplexFunction.h"
@@ -135,23 +135,23 @@
     MPI_Comm_rank(mpi::comm_orb, &mpi::orb_rank);
     MPI_Comm_size(mpi::comm_orb, &mpi::orb_size);

-    //if bank_size is large enough, we reserve one as "task manager"
+    // if bank_size is large enough, we reserve one as "task manager"
     mpi::orb_bank_size = mpi::bank_size;
-    mpi::task_bank=-1;
-    if(mpi::bank_size == 1){
-        //use the bank as task manager
-        mpi::task_bank=mpi::bankmaster[0];
-    } else if(mpi::bank_size > 1){
+    mpi::task_bank = -1;
+    if (mpi::bank_size == 1) {
+        // use the bank as task manager
+        mpi::task_bank = mpi::bankmaster[0];
+    } else if (mpi::bank_size > 1) {
         // reserve one bank for task management only
-        mpi::orb_bank_size = mpi::bank_size-1;
-        mpi::task_bank=mpi::bankmaster[mpi::orb_bank_size];
+        mpi::orb_bank_size = mpi::bank_size - 1;
+        mpi::task_bank = mpi::bankmaster[mpi::orb_bank_size];
     }

     void *val;
     int flag;
     MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &val, &flag); // max value allowed by MPI for tags
-    id_shift = *(int*)val / 2; // half is reserved for non orbital. Maybe not necessary
-    if(mpi::world_rank==0) std::cout<<" max tag value "<<*(int*)val<<std::endl;
+    id_shift = *(int *)val / 2;                                 // half is reserved for non orbital. Maybe not necessary
+    if (mpi::world_rank == 0) std::cout << " max tag value " << *(int *)val << std::endl;
     if (mpi::is_bank) {
         // define rank among bankmasters
         mpi::bank_rank = mpi::world_rank % mpi::bank_size;
@@ -168,9 +168,9 @@

 void mpi::finalize() {
 #ifdef HAVE_MPI
-    if (mpi::bank_size > 0 and mpi::grand_master()){
+    if (mpi::bank_size > 0 and mpi::grand_master()) {
         auto plevel = Printer::getPrintLevel();
-        if (plevel > 1) std::cout<<" max data in bank "<<mpi::orb_bank.get_maxtotalsize()<<" MB "<<std::endl;
+        if (plevel > 1) std::cout << " max data in bank " << mpi::orb_bank.get_maxtotalsize() << " MB " << std::endl;
         mpi::orb_bank.close();
     }
     MPI_Barrier(MPI_COMM_WORLD); // to ensure everybody got here
@@ -424,13 +424,12 @@
 struct s_ix {
     long long ix, value;
 };
-bool operator<(s_ix  a, s_ix  b) {
+bool operator<(s_ix a, s_ix b) {
     return a.value < b.value;
 }
 std::set<s_ix> priority;
 std::map<int, s_ix> rank2q;

-
 // Sort banks in a linked list.
 // Can take the first, the last, or extract a specific bank and put it in first or last position
 // Very fast, O(1)
@@ -442,15 +441,16 @@
     std::vector<std::list<int>::iterator> br2pos;
     std::vector<long long> timestamp;
     long long first_timestamp, last_timestamp;
-    SimpleQueue(int nbanks) : nbanks(nbanks) {
+    SimpleQueue(int nbanks)
+            : nbanks(nbanks) {
         assert(nbanks > 0);
         for (int i = 0; i < nbanks; i++) {
             order.push_back(i);
             timestamp.push_back(i);
-            br2pos.push_back( prev(order.end()) );
+            br2pos.push_back(prev(order.end()));
         }
         first_timestamp = 0;
-        last_timestamp = nbanks-1;
+        last_timestamp = nbanks - 1;
     }
     void moveToBack(int bank_rank) { // back is newest timestamp. Show banks which recently got a task
         auto pos = br2pos[bank_rank];
@@ -464,20 +464,11 @@
         first_timestamp--;
         timestamp[bank_rank] = first_timestamp;
     }
-    int getFront() {
-        return order.front();
-    }
-    int getBack() {
-        return order.back();
-    }
-    long long getTimestamp(int bank_rank) {
-        return timestamp[bank_rank];
-    }
+    int getFront() { return order.front(); }
+    int getBack() { return order.back(); }
+    long long getTimestamp(int bank_rank) { return timestamp[bank_rank]; }
 };

-
-
-
 void Bank::open() {
 #ifdef HAVE_MPI
     MPI_Status status;
@@ -506,28 +497,29 @@
         Timer t_timer;
         MPI_Recv(&message, 1, MPI_INTEGER, MPI_ANY_SOURCE, MPI_ANY_TAG, mpi::comm_bank, &status);
         //        if (printinfo or (int)((float)t_timer.elapsed()* 1000)>150)
-        //  std::cout << mpi::world_rank << " got message " << message <<" from " << status.MPI_SOURCE <<" "<<(int)((float)t_timer.elapsed()* 1000)<<std::endl;
+        //  std::cout << mpi::world_rank << " got message " << message <<" from " << status.MPI_SOURCE <<"
+        //  "<<(int)((float)t_timer.elapsed()* 1000)<<std::endl;
         if (message == CLOSE_BANK) {
             if (mpi::is_bankmaster and printinfo) std::cout << "Bank is closing" << std::endl;
             this->clear_bank();
-       readytasks.clear();
+            readytasks.clear();
             break; // close bank, i.e stop listening for incoming messages
         }
         if (message == CLEAR_BANK) {
             this->clear_bank();
-       readytasks.clear();
+            readytasks.clear();
             banks_fromid.clear();
             // send message that it is ready (value of message is not used)
             MPI_Ssend(&message, 1, MPI_INTEGER, status.MPI_SOURCE, 77, mpi::comm_bank);
         }
         if (message == GETMAXTOTDATA) {
-            int maxsize_int = maxsize/1024; // convert into MB
+            int maxsize_int = maxsize / 1024; // convert into MB
             MPI_Send(&maxsize_int, 1, MPI_INTEGER, status.MPI_SOURCE, 1171, mpi::comm_bank);
-   }
+        }
         if (message == GETTOTDATA) {
-            int maxsize_int = currentsize/1024; // convert into MB
+            int maxsize_int = currentsize / 1024; // convert into MB
             MPI_Send(&maxsize_int, 1, MPI_INTEGER, status.MPI_SOURCE, 1172, mpi::comm_bank);
-   }
+        }
         if (message == GET_ORBITAL or message == GET_ORBITAL_AND_WAIT or message == GET_ORBITAL_AND_DELETE or
             message == GET_FUNCTION or message == GET_DATA) {
             // withdrawal
@@ -582,7 +574,8 @@
             // make a new deposit
             int exist_flag = 0;
             if (id2ix[status.MPI_TAG]) {
-         std::cout<<"WARNING: id "<<status.MPI_TAG<<" exists already"<<" "<<status.MPI_SOURCE<<std::endl;
+                std::cout << "WARNING: id " << status.MPI_TAG << " exists already"
+                          << " " << status.MPI_SOURCE << std::endl;
                 ix = id2ix[status.MPI_TAG]; // the deposit exist from before. Will be overwritten
                 exist_flag = 1;
                 if (message == SAVE_DATA and !deposits[ix].hasdata) {
@@ -593,9 +586,7 @@
             } else {
                 ix = deposits.size(); // NB: ix is now index of last element + 1
                 deposits.resize(ix + 1);
-                if (message == SAVE_ORBITAL or message == SAVE_FUNCTION) {
-                    deposits[ix].orb = new Orbital(0);
-                }
+                if (message == SAVE_ORBITAL or message == SAVE_FUNCTION) { deposits[ix].orb = new Orbital(0); }
                 if (message == SAVE_DATA) {
                     deposits[ix].data = new double[datasize];
                     deposits[ix].hasdata = true;
@@ -624,14 +615,15 @@
                          deposits[ix].id,
                          mpi::comm_bank,
                          &status);
-                this->currentsize += datasize/128; // converted into kB
+                this->currentsize += datasize / 128; // converted into kB
                 this->maxsize = std::max(this->currentsize, this->maxsize);
             }
             if (id2qu[deposits[ix].id] != 0) {
                 // someone is waiting for those data. Send to them
                 int iq = id2qu[deposits[ix].id];
                 if (deposits[ix].id != queue[iq].id)
-                    std::cout << ix << " Bank queue accounting ERROR " << deposits[ix].id<<" "<<queue[iq].id<<std::endl;
+                    std::cout << ix << " Bank queue accounting ERROR " << deposits[ix].id << " " << queue[iq].id
+                              << std::endl;
                 for (int iqq : queue[iq].clients) {
                     if (message == SAVE_ORBITAL) {
                         mpi::send_orbital(*deposits[ix].orb, iqq, queue[iq].id, mpi::comm_bank);
@@ -643,7 +635,8 @@
                         MPI_Send(deposits[ix].data, datasize, MPI_DOUBLE, iqq, queue[iq].id, mpi::comm_bank);
                     }
                 }
-                queue[iq].clients.clear(); // cannot erase entire queue[iq], because that would require to shift all the id2qu value larger than iq
+                queue[iq].clients.clear(); // cannot erase entire queue[iq], because that would require to shift all the
+                                           // id2qu value larger than iq
                 queue[iq].id = -1;
                 id2qu.erase(deposits[ix].id);
             }
@@ -665,89 +658,91 @@

         if (message == INIT_TASKS) {
             MPI_Recv(&tot_ntasksij, 2, MPI_INTEGER, status.MPI_SOURCE, status.MPI_TAG, mpi::comm_bank, &status);
-       tot_ntasks = tot_ntasksij[0]*tot_ntasksij[1];
-       tot_ntasks_2D.resize(tot_ntasksij[1]);
-       for (int i = 0; i < tot_ntasksij[1]; i++) tot_ntasks_2D[i] = tot_ntasksij[0]; // each j has  tot_ntasksij[0] i blocks
+            tot_ntasks = tot_ntasksij[0] * tot_ntasksij[1];
+            tot_ntasks_2D.resize(tot_ntasksij[1]);
+            for (int i = 0; i < tot_ntasksij[1]; i++)
+                tot_ntasks_2D[i] = tot_ntasksij[0]; // each j has  tot_ntasksij[0] i blocks
             next_task = 0;
         }
         if (message == GET_NEXTTASK) {
             int task = next_task;
-            if(next_task >= tot_ntasks) task = -1; // flag to show all tasks are assigned
+            if (next_task >= tot_ntasks) task = -1; // flag to show all tasks are assigned
             MPI_Send(&task, 1, MPI_INTEGER, status.MPI_SOURCE, 843, mpi::comm_bank);
             next_task++;
         }
         if (message == GET_NEXTTASK_2D) {
             int task = next_task;
             int task_2D[2];
-            if(next_task >= tot_ntasks) {
+            if (next_task >= tot_ntasks) {
                 task = -1; // flag to show all tasks are assigned
                 task_2D[0] = -1;
                 task_2D[1] = -1;
-       } else {
-         //if possible, give a task with same j (=status.MPI_TAG)
-         if(tot_ntasks_2D[status.MPI_TAG]>0){
-                  tot_ntasks_2D[status.MPI_TAG]--; // next i for that j
-                  task = status.MPI_TAG * tot_ntasksij[0] + tot_ntasks_2D[status.MPI_TAG];
-                  task_2D[0] = tot_ntasks_2D[status.MPI_TAG];
-                  task_2D[1] = status.MPI_TAG; // same j as asked
-         } else {
-                  //find any available task
-                  int err=1;
-                  for (int j = 0; j<tot_ntasks_2D.size(); j++) {
-                      int jj = (j+status.MPI_TAG)%tot_ntasks_2D.size();// we want to spread among i
-                      if(tot_ntasks_2D[jj]>0){
-                          tot_ntasks_2D[jj]--;
-                          task_2D[0] = tot_ntasks_2D[jj];
-                          task_2D[1] = jj;
-                          err = 0;
-                          break;
-                      }
-                  }
-                  if (err) std::cout<<"ERROR find 2Dtask"<<std::endl;
-         }
-       }
+            } else {
+                // if possible, give a task with same j (=status.MPI_TAG)
+                if (tot_ntasks_2D[status.MPI_TAG] > 0) {
+                    tot_ntasks_2D[status.MPI_TAG]--; // next i for that j
+                    task = status.MPI_TAG * tot_ntasksij[0] + tot_ntasks_2D[status.MPI_TAG];
+                    task_2D[0] = tot_ntasks_2D[status.MPI_TAG];
+                    task_2D[1] = status.MPI_TAG; // same j as asked
+                } else {
+                    // find any available task
+                    int err = 1;
+                    for (int j = 0; j < tot_ntasks_2D.size(); j++) {
+                        int jj = (j + status.MPI_TAG) % tot_ntasks_2D.size(); // we want to spread among i
+                        if (tot_ntasks_2D[jj] > 0) {
+                            tot_ntasks_2D[jj]--;
+                            task_2D[0] = tot_ntasks_2D[jj];
+                            task_2D[1] = jj;
+                            err = 0;
+                            break;
+                        }
+                    }
+                    if (err) std::cout << "ERROR find 2Dtask" << std::endl;
+                }
+            }
             MPI_Send(task_2D, 2, MPI_INTEGER, status.MPI_SOURCE, 853, mpi::comm_bank);
             next_task++;
         }
         if (message == PUT_READYTASK) {
-       int iready;
+            int iready;
             MPI_Recv(&iready, 1, MPI_INTEGER, status.MPI_SOURCE, status.MPI_TAG, mpi::comm_bank, &status);
-       readytasks[iready].push_back(status.MPI_TAG); // status.MPI_TAG gives the id
-            if(readytasks[iready][0]==0)std::cout<<"ERROR putreadytask"<<std::endl;
-   }
+            readytasks[iready].push_back(status.MPI_TAG); // status.MPI_TAG gives the id
+            if (readytasks[iready][0] == 0) std::cout << "ERROR putreadytask" << std::endl;
+        }
         if (message == DEL_READYTASK) {
-       int iready; // corresponding orbital index given by client
+            int iready; // corresponding orbital index given by client
             MPI_Recv(&iready, 1, MPI_INTEGER, status.MPI_SOURCE, status.MPI_TAG, mpi::comm_bank, &status);
-            for (int i=0; i< readytasks[iready].size(); i++) {
-                if(readytasks[iready][i] == status.MPI_TAG){
+            for (int i = 0; i < readytasks[iready].size(); i++) {
+                if (readytasks[iready][i] == status.MPI_TAG) {
                     readytasks[iready].erase(readytasks[iready].begin() + i); // status.MPI_TAG gives the id
                     break;
                 }
             }
-   }
+        }
         if (message == GET_READYTASK) {
-       int iready = status.MPI_TAG; // the value of the index is sent through the tag
-       int nready = readytasks[iready].size();
-            if(nready>0 and readytasks[iready][0]==0)std::cout<<"ERROR getreadytask"<<std::endl;
+            int iready = status.MPI_TAG; // the value of the index is sent through the tag
+            int nready = readytasks[iready].size();
+            if (nready > 0 and readytasks[iready][0] == 0) std::cout << "ERROR getreadytask" << std::endl;
             MPI_Send(&nready, 1, MPI_INTEGER, status.MPI_SOURCE, 844, mpi::comm_bank);
             MPI_Send(readytasks[iready].data(), nready, MPI_INTEGER, status.MPI_SOURCE, 845, mpi::comm_bank);
-   }
+        }
         if (message == GET_READYTASK_DEL) {
-       int iready = status.MPI_TAG; // the value of the index is sent through the tag
-       int nready = readytasks[iready].size();
+            int iready = status.MPI_TAG; // the value of the index is sent through the tag
+            int nready = readytasks[iready].size();
             MPI_Send(&nready, 1, MPI_INTEGER, status.MPI_SOURCE, 844, mpi::comm_bank);
-            if(nready>0 and readytasks[iready][0]==0)std::cout<<"ERROR del getreadytask"<<std::endl;
+            if (nready > 0 and readytasks[iready][0] == 0) std::cout << "ERROR del getreadytask" << std::endl;
             MPI_Send(readytasks[iready].data(), nready, MPI_INTEGER, status.MPI_SOURCE, 845, mpi::comm_bank);
             readytasks[iready].clear();
-   }
+        }
         if (message == PUT_ORB_N) {
             int nbanks;
-            int id=status.MPI_TAG;
+            int id = status.MPI_TAG;
             MPI_Recv(&nbanks, 1, MPI_INTEGER, status.MPI_SOURCE, id, mpi::comm_bank, &status);
             // make a list of n banks that will store the orbital id.
             std::vector<int> banks(nbanks); // the ranks of the banks to whom to send the orbital
-            if(banks_fromid[id].size()>0){
-         std::cout<<mpi::orb_rank<<"WARNING: bank id "<<id<<" is already defined "<<banks_fromid[id].size()<<" "<<status.MPI_SOURCE<<std::endl;
+            if (banks_fromid[id].size() > 0) {
+                std::cout << mpi::orb_rank << "WARNING: bank id " << id << " is already defined "
+                          << banks_fromid[id].size() << " " << status.MPI_SOURCE << std::endl;
             }
             banks_fromid[id].clear();
             for (int i = 0; i < nbanks; i++) {
@@ -755,35 +750,42 @@
                 banks[i] = bank_queue.getFront();
                 // also banks which are on the same node are marked as "busy"
                 //                std::cout<<" put "<<(banks[i])<<std::endl;
-                //                for (int ii = 0; ii < 8; ii++) if(banks[i]-banks[i]%8 + ii==mpi::orb_bank_size)std::cout<<" error "<<banks[i]-banks[i]%8 + ii<<" "<<mpi::orb_bank_size<<" tbank"<<mpi::task_bank <<std::endl;
-                //      for (int ii = 0; ii < 8; ii++) if(banks[i]-banks[i]%8 + ii!=0) bank_queue.moveToBack(banks[i]-banks[i]%8 + ii-1); // asummes 8 mpi per node, ranks starting from a multiple of 8
-               bank_queue.moveToBack(banks[i]);// mark last-> busiest
-               //banks[i] = rand()% mpi::orb_bank_size;
-               //if(i>0)banks[i] = (banks[0]+ 1 + rand()% (mpi::orb_bank_size-1))% mpi::orb_bank_size;
+                //                for (int ii = 0; ii < 8; ii++) if(banks[i]-banks[i]%8 +
+                //                ii==mpi::orb_bank_size)std::cout<<" error "<<banks[i]-banks[i]%8 + ii<<"
+                //                "<<mpi::orb_bank_size<<" tbank"<<mpi::task_bank <<std::endl;
+                //      for (int ii = 0; ii < 8; ii++) if(banks[i]-banks[i]%8 + ii!=0)
+                //      bank_queue.moveToBack(banks[i]-banks[i]%8 + ii-1); // asummes 8 mpi per node, ranks starting
+                //      from a multiple of 8
+                bank_queue.moveToBack(banks[i]); // mark last-> busiest
+                                                 // banks[i] = rand()% mpi::orb_bank_size;
+                // if(i>0)banks[i] = (banks[0]+ 1 + rand()% (mpi::orb_bank_size-1))% mpi::orb_bank_size;
                 banks_fromid[id].push_back(banks[i]); // list of banks that hold the orbital id
-          }
+            }
             MPI_Send(banks.data(), nbanks, MPI_INTEGER, status.MPI_SOURCE, 846, mpi::comm_bank);
         }
         if (message == GET_ORB_N) {
             // find an available bank
-            int id=status.MPI_TAG;
+            int id = status.MPI_TAG;
             int b_rank = (id) % mpi::orb_bank_size;
-            long long mn=1e18;
+            long long mn = 1e18;
             for (int rank : banks_fromid[id]) {
-                if(bank_queue.getTimestamp(rank)<mn) {
+                if (bank_queue.getTimestamp(rank) < mn) {
                     //                if(queue_n[rank]<mn) {
                     //                    mn = queue_n[rank];
                     mn = bank_queue.getTimestamp(rank);
                     b_rank = rank;
                 }
             }
-            //            std::cout<<" giving bank "<<b_rank<<" score "<<queue_n[b_rank]<<" Timestamp "<<bank_queue.getTimestamp(b_rank) <<" "<<queue_n[b_rank]<<std::endl;
+            //            std::cout<<" giving bank "<<b_rank<<" score "<<queue_n[b_rank]<<" Timestamp
+            //            "<<bank_queue.getTimestamp(b_rank) <<" "<<queue_n[b_rank]<<std::endl;
             queue_n[b_rank] = request_counter++;
             // also banks which are on the same node are marked as "busy"
-            //            for (int ii = 0; ii < 8; ii++)  if(b_rank-b_rank%8+ii!=0) bank_queue.moveToBack(b_rank-b_rank%8+ii-1); // asummes 8 mpi per node, ranks starting from a multiple of 8
+            //            for (int ii = 0; ii < 8; ii++)  if(b_rank-b_rank%8+ii!=0)
+            //            bank_queue.moveToBack(b_rank-b_rank%8+ii-1); // asummes 8 mpi per node, ranks starting from a
+            //            multiple of 8
             bank_queue.moveToBack(b_rank); // mark last-> busiest
             MPI_Send(&b_rank, 1, MPI_INTEGER, status.MPI_SOURCE, 847, mpi::comm_bank);
-       }
+        }
     }
 #endif
 }
@@ -792,7 +794,8 @@
 int Bank::put_orb(int id, Orbital &orb) {
 #ifdef HAVE_MPI
     // for now we distribute according to id
-  if (id > id_shift) std::cout<<"WARNING: Bank id should be less than id_shift ("<<id_shift<<"), found id="<<id<<std::endl;
+    if (id > id_shift)
+        std::cout << "WARNING: Bank id should be less than id_shift (" << id_shift << "), found id=" << id << std::endl;
     if (id > id_shift) MSG_WARN("Bank id should be less than id_shift");
     MPI_Send(&SAVE_ORBITAL, 1, MPI_INTEGER, mpi::bankmaster[id % mpi::orb_bank_size], id, mpi::comm_bank);
     mpi::send_orbital(orb, mpi::bankmaster[id % mpi::orb_bank_size], id, mpi::comm_bank);
@@ -920,7 +923,7 @@
     return maxtot;
 }

-std::vector<int>  Bank::get_totalsize() {
+std::vector<int> Bank::get_totalsize() {
     std::vector<int> tot;
 #ifdef HAVE_MPI
     MPI_Status status;
@@ -934,8 +937,6 @@
     return tot;
 }

-
-
 // remove all deposits
 // NB:: collective call. All clients must call this
 void Bank::clear_all(int iclient, MPI_Comm comm) {
@@ -943,7 +944,7 @@
     comm = mpi::comm_orb;
     // 1) wait until all clients are ready
     mpi::barrier(comm);
-   // master send signal to bank
+    // master send signal to bank
     if (iclient == 0) {
         for (int i = 0; i < mpi::bank_size; i++) {
             MPI_Send(&CLEAR_BANK, 1, MPI_INTEGER, mpi::bankmaster[i], 0, mpi::comm_bank);
@@ -955,7 +956,7 @@
             MPI_Recv(&message, 1, MPI_INTEGER, mpi::bankmaster[i], 77, mpi::comm_bank, &status);
         }
     }
-    mpi::barrier(comm); //NB: MUST wait until cleared finished before starting other things
+    mpi::barrier(comm); // NB: MUST wait until cleared finished before starting other things
 #endif
 }

@@ -983,16 +984,16 @@
     mpi::barrier(comm); // NB: everybody must be done before init
     int dest = mpi::task_bank;
     int ntasks[2];
-    ntasks[0]=ntasksi;
-    ntasks[1]=ntasksj;
+    ntasks[0] = ntasksi;
+    ntasks[1] = ntasksj;
     // NB: Must be Synchronous send (SSend), because others must not
     // be able to start
-    if(rank == 0) {
+    if (rank == 0) {
         MPI_Ssend(&INIT_TASKS, 1, MPI_INTEGER, dest, 0, mpi::comm_bank);
         MPI_Ssend(ntasks, 2, MPI_INTEGER, dest, 0, mpi::comm_bank);
     }
     mpi::barrier(comm); // NB: nobody must start before init is ready
- #endif
+#endif
 }

 void Bank::init_tasks(int ntaskstot, int rank, MPI_Comm comm) {
@@ -1004,12 +1005,12 @@
     ntasks[1] = 1;
     // NB: Must be Synchronous send (SSend), because others must not
     // be able to start
-    if(rank == 0) {
+    if (rank == 0) {
         MPI_Ssend(&INIT_TASKS, 1, MPI_INTEGER, dest, 0, mpi::comm_bank);
         MPI_Ssend(ntasks, 2, MPI_INTEGER, dest, 0, mpi::comm_bank);
     }
     mpi::barrier(comm); // NB: nobody must start before init is ready
- #endif
+#endif
 }

 void Bank::get_task(int *task_2D, int i) {
@@ -1021,7 +1022,6 @@
 #endif
 }

-
 void Bank::get_task(int *task) {
 #ifdef HAVE_MPI
     MPI_Status status;
@@ -1055,14 +1055,14 @@
 #ifdef HAVE_MPI
     MPI_Status status;
     int task = GET_READYTASK;
-    if(del == 1) task = GET_READYTASK_DEL;
+    if (del == 1) task = GET_READYTASK_DEL;

     MPI_Send(&task, 1, MPI_INTEGER, mpi::task_bank, i, mpi::comm_bank);
     int nready;
     MPI_Recv(&nready, 1, MPI_INTEGER, mpi::task_bank, 844, mpi::comm_bank, &status);
     readytasks.resize(nready);
     MPI_Recv(readytasks.data(), nready, MPI_INTEGER, mpi::task_bank, 845, mpi::comm_bank, &status);
-    if(nready>0 and readytasks[0]==0)std::cout<<"ERROR get_readytask "<<nready<<std::endl;
+    if (nready > 0 and readytasks[0] == 0) std::cout << "ERROR get_readytask " << nready << std::endl;

 #endif
     return readytasks;
@@ -1072,7 +1072,7 @@
 int Bank::put_orb_n(int id, Orbital &orb, int n) {
 #ifdef HAVE_MPI
     MPI_Status status;
-    //the task manager assigns n bank to whom the orbital is sent
+    // the task manager assigns n bank to whom the orbital is sent
     int dest = mpi::task_bank;
     MPI_Send(&PUT_ORB_N, 1, MPI_INTEGER, dest, id, mpi::comm_bank);
     MPI_Send(&n, 1, MPI_INTEGER, dest, id, mpi::comm_bank);
@@ -1089,14 +1089,14 @@
 int Bank::get_orb_n(int id, Orbital &orb, int del) {
 #ifdef HAVE_MPI
     MPI_Status status;
-    //the task manager tells from whom the orbital is to be fetched
+    // the task manager tells from whom the orbital is to be fetched
     int dest = mpi::task_bank;
     MPI_Send(&GET_ORB_N, 1, MPI_INTEGER, dest, id, mpi::comm_bank);
     int b_rank;
     MPI_Recv(&b_rank, 1, MPI_INTEGER, dest, 847, mpi::comm_bank, &status);
     /*MPI_Send(&GET_ORBITAL, 1, MPI_INTEGER, bank_rank, id, mpi::comm_bank);
     mpi::recv_orbital(orb, bank_rank, id, mpi::comm_bank);*/
-     if(del == 1){
+    if (del == 1) {
         MPI_Status status;
         MPI_Send(&GET_ORBITAL_AND_DELETE, 1, MPI_INTEGER, mpi::bankmaster[b_rank], id, mpi::comm_bank);
         int found;
@@ -1107,11 +1107,12 @@
         } else {
             return 0;
         }
-     } else {
-         Timer tt;
-         MPI_Send(&GET_ORBITAL_AND_WAIT, 1, MPI_INTEGER, mpi::bankmaster[b_rank], id, mpi::comm_bank);
-         mpi::recv_orbital(orb, mpi::bankmaster[b_rank], id, mpi::comm_bank);
-         //         if(mpi::world_rank==0)std::cout << mpi::world_rank << "waited " << (int)((float)tt.elapsed()* 1000)<<" for "<<orb.getSizeNodes(NUMBER::Total)/1000<<" MB from bank:"<<b_rank<<std::endl;
+    } else {
+        Timer tt;
+        MPI_Send(&GET_ORBITAL_AND_WAIT, 1, MPI_INTEGER, mpi::bankmaster[b_rank], id, mpi::comm_bank);
+        mpi::recv_orbital(orb, mpi::bankmaster[b_rank], id, mpi::comm_bank);
+        //         if(mpi::world_rank==0)std::cout << mpi::world_rank << "waited " << (int)((float)tt.elapsed()*
+        //         1000)<<" for "<<orb.getSizeNodes(NUMBER::Total)/1000<<" MB from bank:"<<b_rank<<std::endl;
     }
 #endif
     return 1;

src/parallel.h

--- src/parallel.h
+++ src/parallel.h
@@ -170,7 +170,7 @@
     std::map<int, int> id2qu;
     std::vector<bank::queue_struct> queue;
     long long currentsize = 0; // total deposited data size (without containers)
-    long long maxsize = 0; // max total deposited data size (without containers)
+    long long maxsize = 0;     // max total deposited data size (without containers)

     void clear_bank();
 };

src/qmfunctions/QMFunction.cpp

--- src/qmfunctions/QMFunction.cpp
+++ src/qmfunctions/QMFunction.cpp
@@ -85,7 +85,7 @@
         if (hasImag()) delete this->func_ptr->im;
         this->func_ptr->im = nullptr;
     }
-    if(isShared() and !hasImag() and !hasReal()) this->func_ptr->shared_mem->clear();
+    if (isShared() and !hasImag() and !hasReal()) this->func_ptr->shared_mem->clear();
 }

 /** @brief Returns the orbital meta data

src/qmfunctions/orbital_utils.cpp

--- src/qmfunctions/orbital_utils.cpp
+++ src/qmfunctions/orbital_utils.cpp
@@ -226,8 +226,8 @@
 OrbitalVector orbital::rotate(OrbitalVector &Phi, const ComplexMatrix &U, double prec) {

     //    if(mpi::bank_size > 0 and Phi.size() >= sqrt(8*mpi::orb_size)) {
-        return orbital::rotate_bank(Phi, U, prec);
-        //
+    return orbital::rotate_bank(Phi, U, prec);
+    //

     // Get all out orbitals belonging to this MPI
     auto inter_prec = (mpi::numerically_exact) ? -1.0 : prec;
@@ -260,7 +260,6 @@
     return out;
 }

-
 /** @brief Orbital transformation out_j = sum_i inp_i*U_ij
  *
  * NOTE: OrbitalVector is considered a ROW vector, so rotation
@@ -270,13 +269,11 @@
  *
  */

-
-
 OrbitalVector orbital::rotate_bank(OrbitalVector &Phi, const ComplexMatrix &U, double prec) {

     mpi::barrier(mpi::comm_orb); // for testing

-    Timer t_tot,t_bankr,t_bankrx,t_bankw,t_add,t_task,t_last;
+    Timer t_tot, t_bankr, t_bankrx, t_bankw, t_add, t_task, t_last;
     t_bankw.stop();
     t_bankr.stop();
     t_bankrx.stop();
@@ -287,10 +284,10 @@
     auto out = orbital::param_copy(Phi);

     mpi::orb_bank.clear_all(mpi::orb_rank, mpi::comm_orb); // needed!
-    mpi::barrier(mpi::comm_orb); // for testing
-    IntVector orb_sizesvec(N); // store the size of the orbitals
-    std::set<std::pair<int, int>> orb_sizes; // store the size of the orbitals, and their indices
-    for(int i = 0 ; i < N; i++) orb_sizesvec[i] = 0;
+    mpi::barrier(mpi::comm_orb);                           // for testing
+    IntVector orb_sizesvec(N);                             // store the size of the orbitals
+    std::set<std::pair<int, int>> orb_sizes;               // store the size of the orbitals, and their indices
+    for (int i = 0; i < N; i++) orb_sizesvec[i] = 0;
     t_bankw.resume();
     // save all orbitals in bank
     for (int i = 0; i < N; i++) {
@@ -302,63 +299,71 @@
     mpi::allreduce_vector(orb_sizesvec, mpi::comm_orb);
     for (int i = 0; i < N; i++) orb_sizes.insert({orb_sizesvec[i], i});
     int totorbsize = 0;
-    for (int i = 0; i < N; i++) totorbsize += orb_sizesvec[i]/1024; // NB: use MB to avoid overflow
-    int avorbsize = totorbsize/N;
+    for (int i = 0; i < N; i++) totorbsize += orb_sizesvec[i] / 1024; // NB: use MB to avoid overflow
+    int avorbsize = totorbsize / N;

     // we do not want to store temporarily more than 1/2 of the total memory for orbitals.
-    int maxBlockSize = omp::n_threads * 2000 / 2 / avorbsize; //assumes 2GB available per thread
-    if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" Time bank write all orb " << (int)((float)t_bankw.elapsed() * 1000) <<" av size:"<<avorbsize<<"MB. Maxblocksize:"<<maxBlockSize<<std::endl;
+    int maxBlockSize = omp::n_threads * 2000 / 2 / avorbsize; // assumes 2GB available per thread
+    if (mpi::orb_rank == 0)
+        std::cout << mpi::orb_rank << " Time bank write all orb " << (int)((float)t_bankw.elapsed() * 1000)
+                  << " av size:" << avorbsize << "MB. Maxblocksize:" << maxBlockSize << std::endl;

     // first divide into a fixed number of tasks
     int block_size; // U is partitioned in blocks with size block_size x block_size
-    block_size = std::min(maxBlockSize, static_cast<int>(N/sqrt(2*mpi::orb_size) + 0.5)); // each MPI process will have about 2 tasks
+    block_size = std::min(
+        maxBlockSize, static_cast<int>(N / sqrt(2 * mpi::orb_size) + 0.5)); // each MPI process will have about 2 tasks
     block_size = std::max(block_size, 1);
-    //int iblocks =  (N + block_size -1)/block_size;
+    // int iblocks =  (N + block_size -1)/block_size;

-    // we assume that jorbitals are 5 times more memory demanding than an average input orbital (because they are the linear combinations of many i orbitals.)
-    //and we demand that  iblock_size+jblock_size < maxBlockSize
-    int jblock_size = std::max(1,(maxBlockSize-3)/5);
-    int iblock_size = ((maxBlockSize-3)/5)*4;
-    int iblocks = (N + iblock_size -1)/iblock_size;
-    int jblocks = (N + jblock_size -1)/jblock_size;
+    // we assume that jorbitals are 5 times more memory demanding than an average input orbital (because they are the
+    // linear combinations of many i orbitals.)
+    // and we demand that  iblock_size+jblock_size < maxBlockSize
+    int jblock_size = std::max(1, (maxBlockSize - 3) / 5);
+    int iblock_size = ((maxBlockSize - 3) / 5) * 4;
+    int iblocks = (N + iblock_size - 1) / iblock_size;
+    int jblocks = (N + jblock_size - 1) / jblock_size;
     int ntasks = iblocks * jblocks;
-    if(mpi::orb_rank==0)std::cout<<" block sizes " <<iblock_size<<" x "<<jblock_size<<" iblocks="<<iblocks<<" jblocks="<<jblocks<<" maxBlockSize="<<maxBlockSize<<std::endl;
-    if( ntasks <  3*mpi::orb_size){ // at least 3 tasks per mpi process
-      //too few tasks
-      // reduce jblock_size until satisfactory
-      while(ntasks <  3*mpi::orb_size and jblock_size > 4 and iblock_size > 15){
-   jblock_size--;
-   iblock_size-=4;
-        //iblock_size = std::max(1,jblock_size*5);
-        iblocks = (N + iblock_size -1)/iblock_size;
-   jblocks = (N + jblock_size -1)/jblock_size;
-   ntasks = iblocks * jblocks;
-      }
-      if(mpi::orb_rank==0)std::cout<<"new block sizes " <<iblock_size<<" x "<<jblock_size<<" iblocks="<<iblocks<<" jblocks="<<jblocks<<" maxBlockSize="<<maxBlockSize<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << " block sizes " << iblock_size << " x " << jblock_size << " iblocks=" << iblocks
+                  << " jblocks=" << jblocks << " maxBlockSize=" << maxBlockSize << std::endl;
+    if (ntasks < 3 * mpi::orb_size) { // at least 3 tasks per mpi process
+        // too few tasks
+        // reduce jblock_size until satisfactory
+        while (ntasks < 3 * mpi::orb_size and jblock_size > 4 and iblock_size > 15) {
+            jblock_size--;
+            iblock_size -= 4;
+            // iblock_size = std::max(1,jblock_size*5);
+            iblocks = (N + iblock_size - 1) / iblock_size;
+            jblocks = (N + jblock_size - 1) / jblock_size;
+            ntasks = iblocks * jblocks;
+        }
+        if (mpi::orb_rank == 0)
+            std::cout << "new block sizes " << iblock_size << " x " << jblock_size << " iblocks=" << iblocks
+                      << " jblocks=" << jblocks << " maxBlockSize=" << maxBlockSize << std::endl;
     }

     // fill blocks with orbitals, so that they become roughly evenly distributed
-    std::vector<std::pair<int,int>> orb_map[iblocks]; // orbitals that belong to each block
-    int b = 0; // block index
-    for (auto i: orb_sizes) { // orb_sizes is ordered in increasing orbital sizes
-   b %= iblocks;
-   orb_map[b].push_back(i);
-   b++;
+    std::vector<std::pair<int, int>> orb_map[iblocks]; // orbitals that belong to each block
+    int b = 0;                                         // block index
+    for (auto i : orb_sizes) {                         // orb_sizes is ordered in increasing orbital sizes
+        b %= iblocks;
+        orb_map[b].push_back(i);
+        b++;
     }

-    int task = 0; // task rank
+    int task = 0;                                  // task rank
     std::vector<std::vector<int>> itasks(iblocks); // the i values (orbitals) of each block
     std::vector<std::vector<int>> jtasks(jblocks); // the j values (orbitals) of each block

-     for (int ib = 0; ib < iblocks; ib++) {
-       int isize = orb_map[ib].size();
-      // we fetch orbitals within the same iblocks in different orders in order to avoid overloading the bank
-       for (int i = 0; i < isize; i++) itasks[ib].push_back(orb_map[ib][(i+mpi::orb_rank)%isize].second);
-     }
-
-     for (int jb = 0; jb < jblocks; jb++) {
-      // we simply take the orbitals in the original order
-         for (int j = jb*jblock_size; j < (jb+1)*jblock_size and j<N; j++) jtasks[jb].push_back(j);
+    for (int ib = 0; ib < iblocks; ib++) {
+        int isize = orb_map[ib].size();
+        // we fetch orbitals within the same iblocks in different orders in order to avoid overloading the bank
+        for (int i = 0; i < isize; i++) itasks[ib].push_back(orb_map[ib][(i + mpi::orb_rank) % isize].second);
+    }
+
+    for (int jb = 0; jb < jblocks; jb++) {
+        // we simply take the orbitals in the original order
+        for (int j = jb * jblock_size; j < (jb + 1) * jblock_size and j < N; j++) jtasks[jb].push_back(j);
     }

     // make sum_i inp_i*U_ij within each block, and store result in bank
@@ -374,20 +379,22 @@
     int nodesize = 0;
     int maxjsize = 0;
     OrbitalVector jorb_vec;
-    while(true) { // fetch new tasks until all are completed
-   int task_2D[2];
-   t_task.resume();
-   //        mpi::orb_bank.get_task(&it);
-        mpi::orb_bank.get_task(task_2D, mpi::orb_rank%jblocks);
-   int jb = task_2D[1];
-   int ib = (task_2D[0]+jb)%iblocks; // NB: the blocks are shifted diagonally, so that not all processes start with the first iblock
-   t_task.stop();
-        if(jb<0){
-            if(previous_ib >= 0){
-              // we are done but still need to store the results form previous block
-               for (int j = 0; j<jorb_vec.size(); j++){
+    while (true) { // fetch new tasks until all are completed
+        int task_2D[2];
+        t_task.resume();
+        //        mpi::orb_bank.get_task(&it);
+        mpi::orb_bank.get_task(task_2D, mpi::orb_rank % jblocks);
+        int jb = task_2D[1];
+        int ib =
+            (task_2D[0] + jb) %
+            iblocks; // NB: the blocks are shifted diagonally, so that not all processes start with the first iblock
+        t_task.stop();
+        if (jb < 0) {
+            if (previous_ib >= 0) {
+                // we are done but still need to store the results form previous block
+                for (int j = 0; j < jorb_vec.size(); j++) {
                     int jorb = jtasks[previous_jb][j];
-                    int id = N+itasks[previous_ib][0]*N+jorb;
+                    int id = N + itasks[previous_ib][0] * N + jorb;
                     t_bankw.resume();
                     mpi::orb_bank.put_orb(id, jorb_vec[j]);
                     t_bankw.stop();
@@ -399,27 +406,30 @@
             }
             break;
         }
-   count++;
+        count++;
         t_last.start();
         QMFunctionVector ifunc_vec;
-        nodesize=0;
-   t_bankr.resume();
+        nodesize = 0;
+        t_bankr.resume();
         for (int i = 0; i < itasks[ib].size(); i++) {
             int iorb = itasks[ib][i];
             Orbital phi_i;
             mpi::orb_bank.get_orb_n(iorb, phi_i, 0);
             ifunc_vec.push_back(phi_i);
-            nodesize+=phi_i.getSizeNodes(NUMBER::Total);
+            nodesize += phi_i.getSizeNodes(NUMBER::Total);
         }
-        if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" "<<ib<<" "<<jb<<" fetched total " <<nodesize/1024  <<" MB for "<<itasks[ib].size()<<" orbitals "<<" time read "<<(int)((float)t_bankr.elapsed() * 1000) <<std::endl;
-   t_bankr.stop();
+        if (mpi::orb_rank == 0)
+            std::cout << mpi::orb_rank << " " << ib << " " << jb << " fetched total " << nodesize / 1024 << " MB for "
+                      << itasks[ib].size() << " orbitals "
+                      << " time read " << (int)((float)t_bankr.elapsed() * 1000) << std::endl;
+        t_bankr.stop();

-   if(previous_jb != jb and previous_jb >= 0){
+        if (previous_jb != jb and previous_jb >= 0) {
             // we have got a new j and need to store the results form previous block
-            for (int j = 0; j<jorb_vec.size(); j++){
-         if(previous_jb<0 or j>jtasks[previous_jb].size())std::cout<<ib<<" ERROR jtasks"<<std::endl;
+            for (int j = 0; j < jorb_vec.size(); j++) {
+                if (previous_jb < 0 or j > jtasks[previous_jb].size()) std::cout << ib << " ERROR jtasks" << std::endl;
                 int jorb = jtasks[previous_jb][j];
-                int id = N+itasks[previous_ib][0]*N+jorb;
+                int id = N + itasks[previous_ib][0] * N + jorb;
                 t_bankw.resume();
                 mpi::orb_bank.put_orb(id, jorb_vec[j]);
                 t_bankw.stop();
@@ -428,13 +438,13 @@
                 t_task.stop();
             }
             jorb_vec.clear();
-   } else {
+        } else {
             // same jblock as before. We include results from previous block
-            if(mpi::orb_rank==0 and previous_jb >= 0)std::cout<<ib<<" "<<jb<<" reusing j" <<std::endl;
-   }
+            if (mpi::orb_rank == 0 and previous_jb >= 0) std::cout << ib << " " << jb << " reusing j" << std::endl;
+        }

         for (int j = 0; j < jtasks[jb].size(); j++) {
-            ComplexVector coef_vec(itasks[ib].size()+iblocks);
+            ComplexVector coef_vec(itasks[ib].size() + iblocks);
             int jorb = jtasks[jb][j];
             for (int i = 0; i < itasks[ib].size(); i++) {
                 int iorb = itasks[ib][i];
@@ -443,41 +453,47 @@
             // include also block results which may be ready
             t_task.resume();
             std::vector<int> jvec = mpi::orb_bank.get_readytasks(jorb, 1);
-            if(iblock_size + jblock_size + jvec.size() >  maxBlockSize ){
+            if (iblock_size + jblock_size + jvec.size() > maxBlockSize) {
                 // Security mechanism to avoid the possibility of ifunc_vec getting too large
-                for (int ix = maxBlockSize-iblock_size-jblock_size; ix < jvec.size(); ix++ ) {
-                    //define as ready again (i.e. "undelete")
-                    if(mpi::orb_rank==0)std::cout<<jvec[ix]<<" undelete "<<jvec.size()<<" "<<ix<<jorb<<std::endl;
+                for (int ix = maxBlockSize - iblock_size - jblock_size; ix < jvec.size(); ix++) {
+                    // define as ready again (i.e. "undelete")
+                    if (mpi::orb_rank == 0)
+                        std::cout << jvec[ix] << " undelete " << jvec.size() << " " << ix << jorb << std::endl;
                     mpi::orb_bank.put_readytask(jvec[ix], jorb);
                     countxok++;
                 }
-                jvec.resize(maxBlockSize-iblock_size-jblock_size);
+                jvec.resize(maxBlockSize - iblock_size - jblock_size);
             }
             t_task.stop();
-            if(jvec.size() > 0) {
+            if (jvec.size() > 0) {
                 for (int id : jvec) {
-         Orbital phi_i;
+                    Orbital phi_i;
                     t_bankrx.resume();
-                    if(id==0){
-                        std::cout<<mpi::orb_rank<<" got id=0 for"<<jorb<<std::endl;
-                        for (int ix = 0; ix < jvec.size(); ix++ ) std::cout<<ix<<" id= "<<id<<std::endl;
-                   }
+                    if (id == 0) {
+                        std::cout << mpi::orb_rank << " got id=0 for" << jorb << std::endl;
+                        for (int ix = 0; ix < jvec.size(); ix++) std::cout << ix << " id= " << id << std::endl;
+                    }
                     int ok = mpi::orb_bank.get_orb_n(id, phi_i, 1);
                     countx++;
                     t_bankrx.stop();
                     if (ok) {
-           if(ifunc_vec.size()>itasks[ib].size()+iblocks)std::cout<<mpi::orb_rank<<" ERROR too large "<<ifunc_vec.size()<<" "<<ib<<" "<<itasks[ib].size()<<" "<<iblocks<<" "<<jvec.size()<<std::endl;
+                        if (ifunc_vec.size() > itasks[ib].size() + iblocks)
+                            std::cout << mpi::orb_rank << " ERROR too large " << ifunc_vec.size() << " " << ib << " "
+                                      << itasks[ib].size() << " " << iblocks << " " << jvec.size() << std::endl;
                         coef_vec(ifunc_vec.size()) = 1.0;
                         ifunc_vec.push_back(phi_i);
                     } else {
-                        std::cout<<mpi::orb_rank<<"did not find contribution for  " << jorb<<" expected "<<jvec.size()<<" id "<<id<<std::endl;
+                        std::cout << mpi::orb_rank << "did not find contribution for  " << jorb << " expected "
+                                  << jvec.size() << " id " << id << std::endl;
                         MSG_ABORT("did not find contribution for my orbitals");
                     }
-               }
+                }
             }
-            if(previous_jb == jb) {
+            if (previous_jb == jb) {
                 // include result from previous block
-         if(ifunc_vec.size()>itasks[ib].size()+iblocks)std::cout<<mpi::orb_rank<<" BERROR too large "<<ifunc_vec.size()<<" "<<ib<<" "<<itasks[ib].size()<<" "<<iblocks<<" "<<jvec.size()<<std::endl;
+                if (ifunc_vec.size() > itasks[ib].size() + iblocks)
+                    std::cout << mpi::orb_rank << " BERROR too large " << ifunc_vec.size() << " " << ib << " "
+                              << itasks[ib].size() << " " << iblocks << " " << jvec.size() << std::endl;
                 coef_vec(ifunc_vec.size()) = 1.0;
                 ifunc_vec.push_back(jorb_vec[j]);
             }
@@ -485,29 +501,38 @@
             t_add.resume();
             qmfunction::linear_combination(tmp_j, coef_vec, ifunc_vec, priv_prec);
             t_add.stop();
-       maxjsize = std::max(maxjsize, tmp_j.getSizeNodes(NUMBER::Total)/1024);
-            if(previous_jb == jb){
+            maxjsize = std::max(maxjsize, tmp_j.getSizeNodes(NUMBER::Total) / 1024);
+            if (previous_jb == jb) {
                 // results are stored in jorb_vec, replacing the old entries
-           //jorb_vec[j].free(NUMBER::Total);
+                // jorb_vec[j].free(NUMBER::Total);
                 jorb_vec[j] = tmp_j;
-                //tmp_j.release(); ?
-            }else{
+                // tmp_j.release(); ?
+            } else {
                 // old jorb_vec not in use, build new
                 jorb_vec.push_back(tmp_j);
             }
-       // remove block the additional results from the i vector
-       ifunc_vec.resize(itasks[ib].size());
+            // remove block the additional results from the i vector
+            ifunc_vec.resize(itasks[ib].size());
         }
-   ifunc_vec.resize(itasks[ib].size());
-       previous_ib = ib;
-   previous_jb = jb;
+        ifunc_vec.resize(itasks[ib].size());
+        previous_ib = ib;
+        previous_jb = jb;

         t_last.stop();
     }

-    if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" Time rotate1 " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<" Time bank read " << (int)((float)t_bankr.elapsed() * 1000) <<" Time bank read xtra " << (int)((float)t_bankrx.elapsed() * 1000) <<" xorb="<<countx<<" xundeleted="<<countxok<<" Time bank write " << (int)((float)t_bankw.elapsed() * 1000) <<" Time add " << (int)((float)t_add.elapsed() * 1000) <<" Time task manager " << (int)((float)t_task.elapsed() * 1000) <<" Time last task " << (int)((float)t_last.elapsed() * 1000) <<" block size "<<iblock_size<<"x"<<jblock_size<<" ntasks executed: "<<count<<" max j size "<<maxjsize<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << mpi::orb_rank << " Time rotate1 " << (int)((float)t_tot.elapsed() * 1000) << " ms "
+                  << " Time bank read " << (int)((float)t_bankr.elapsed() * 1000) << " Time bank read xtra "
+                  << (int)((float)t_bankrx.elapsed() * 1000) << " xorb=" << countx << " xundeleted=" << countxok
+                  << " Time bank write " << (int)((float)t_bankw.elapsed() * 1000) << " Time add "
+                  << (int)((float)t_add.elapsed() * 1000) << " Time task manager "
+                  << (int)((float)t_task.elapsed() * 1000) << " Time last task "
+                  << (int)((float)t_last.elapsed() * 1000) << " block size " << iblock_size << "x" << jblock_size
+                  << " ntasks executed: " << count << " max j size " << maxjsize << std::endl;
     mpi::barrier(mpi::comm_orb);
-    if(mpi::orb_rank==0)std::cout<<" Time rotate1 all " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << " Time rotate1 all " << (int)((float)t_tot.elapsed() * 1000) << " ms " << std::endl;

     // by now most of the operations are finished. We add only contributions to own orbitals.
     count = 0;
@@ -515,12 +540,13 @@
         if (not mpi::my_orb(Phi[jorb])) continue;
         QMFunctionVector ifunc_vec;
         t_task.resume();
-        std::vector<int> jvec = mpi::orb_bank.get_readytasks(jorb, 1); // Only jorb will use those contributions: delete when done.
+        std::vector<int> jvec =
+            mpi::orb_bank.get_readytasks(jorb, 1); // Only jorb will use those contributions: delete when done.
         t_task.stop();

         ComplexVector coef_vec(jvec.size());
         int i = 0;
-        int idsave=-1;
+        int idsave = -1;
         for (int id : jvec) {
             Orbital phi_i;
             t_bankr.resume();
@@ -529,25 +555,31 @@
             t_bankr.stop();

             if (ok) {
-           if(ifunc_vec.size()>jvec.size())std::cout<<" ERRORifunc "<<std::endl;
+                if (ifunc_vec.size() > jvec.size()) std::cout << " ERRORifunc " << std::endl;
                 coef_vec(ifunc_vec.size()) = 1.0;
                 ifunc_vec.push_back(phi_i);
                 count++;
             } else {
-                std::cout<<mpi::orb_rank<<"did not find contribution for  " << jorb<<" expected "<<jvec.size()<<" id "<<id<<std::endl;
+                std::cout << mpi::orb_rank << "did not find contribution for  " << jorb << " expected " << jvec.size()
+                          << " id " << id << std::endl;
                 MSG_ABORT("did not find contribution for my orbitals");
             }
-            //if (ifunc_vec.size() >= 2 * block_size ) break; // we do not want too long vectors
+            // if (ifunc_vec.size() >= 2 * block_size ) break; // we do not want too long vectors
         }
         // write result in out
         t_add.resume();
         qmfunction::linear_combination(out[jorb], coef_vec, ifunc_vec, priv_prec);
         t_add.stop();
     }
-    if(mpi::orb_rank==0)std::cout<<" Time total " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<" Time bankr2 " << (int)((float)t_bankr.elapsed() * 1000) <<" Time bankw2 " << (int)((float)t_bankw.elapsed() * 1000) <<" Time add " << (int)((float)t_add.elapsed() * 1000) <<" Time task manager " << (int)((float)t_task.elapsed() * 1000) <<" added "<<count<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << " Time total " << (int)((float)t_tot.elapsed() * 1000) << " ms "
+                  << " Time bankr2 " << (int)((float)t_bankr.elapsed() * 1000) << " Time bankw2 "
+                  << (int)((float)t_bankw.elapsed() * 1000) << " Time add " << (int)((float)t_add.elapsed() * 1000)
+                  << " Time task manager " << (int)((float)t_task.elapsed() * 1000) << " added " << count << std::endl;

     mpi::barrier(mpi::comm_orb);
-    if(mpi::orb_rank==0)std::cout<<" Time total all " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << " Time total all " << (int)((float)t_tot.elapsed() * 1000) << " ms " << std::endl;

     if (mpi::numerically_exact) {
         for (auto &phi : out) {
@@ -559,233 +591,238 @@

     return out;
 }
-  /*
+/*
 OrbitalVector orbital::rotate_bank(OrbitalVector &Phi, const ComplexMatrix &U, double prec) {

-    mpi::barrier(mpi::comm_orb); // for testing
-
-    Timer t_tot,t_bankr,t_bankrx,t_bankw,t_add,t_task,t_last;
-    t_bankw.stop();
-    t_bankr.stop();
-    t_bankrx.stop();
-    t_add.stop();
-    t_task.stop();
-    int N = Phi.size();
-    auto priv_prec = (mpi::numerically_exact) ? -1.0 : prec;
-    auto out = orbital::param_copy(Phi);
-
-    mpi::orb_bank.clear_all(mpi::orb_rank, mpi::comm_orb); // needed!
-    mpi::barrier(mpi::comm_orb); // for testing
-    IntVector orb_sizesvec(N); // store the size of the orbitals, for future fine tuning
-    std::set<std::pair<int, int>> orb_sizes; // store the size of the orbitals, and their indices
-    for(int i = 0 ; i < N; i++) orb_sizesvec[i] = 0;
-    t_bankw.resume();
-    // save all orbitals in bank
-    for (int i = 0; i < N; i++) {
-        if (not mpi::my_orb(Phi[i])) continue;
-        mpi::orb_bank.put_orb_n(i, Phi[i], 3);
-        orb_sizesvec[i] = Phi[i].getSizeNodes(NUMBER::Total);
-    }
-    t_bankw.stop();
-    mpi::allreduce_vector(orb_sizesvec, mpi::comm_orb);
-    for (int i = 0; i < N; i++) orb_sizes.insert({orb_sizesvec[i], i});
-    int totorbsize = 0;
-    for (int i = 0; i < N; i++) totorbsize += orb_sizesvec[i];
-    int avorbsize = totorbsize/N;
-
-    // we do not want to store temporarily more than 1/2 of the total memory for orbitals. (Note this is in an unlikely scenario where all orbitals on one node have maxsize)
-    int maxBlockSize = omp::n_threads * 2000000 / 3 / avorbsize; //assumes 2GB available per thread
-    if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" Time bank write all orb " << (int)((float)t_bankw.elapsed() * 1000) <<" av size:"<<avorbsize/1024<<"MB. Maxblocksize:"<<maxBlockSize<<std::endl;
-
-    // first divide into a fixed number of tasks
-    int block_size; // U is partitioned in blocks with size block_size x block_size
-    block_size = std::min(maxBlockSize, static_cast<int>(N/sqrt(2*mpi::orb_size) + 0.5)); // each MPI process will have about 2 tasks
-    int iblocks =  (N + block_size -1)/block_size;
-
-    // fill blocks with orbitals, so that they become roughly evenly distributed
-    std::vector<std::pair<int,int>> orb_map[iblocks]; // orbitals that belong to each block
-    int b = 0; // block index
-    for (auto i: orb_sizes) {
-   b %= iblocks;
-   orb_map[b].push_back(i);
-   b++;
-    }
-
-    int task = 0; // task rank
-    int ntasks = iblocks * iblocks;
-    std::vector<std::vector<int>> itasks(ntasks); // the i values (orbitals) of each block
-    std::vector<std::vector<int>> jtasks(ntasks); // the j values (orbitals) of each block
-
-    // Task order are organised so that block along a diagonal are computed before starting next diagonal
-    // Taking blocks from same column would mean that the same orbitals are fetched from bank, taking
-    // blocks from same row would mean that all row results would be finished approximately together and
-    // all would be written out as partial results, meaning too partial results saved in bank.
-    //
-    // Results from previous blocks on the same row are ready they are also summed into current block
-    // This is because we do not want to store in bank too many block results at a time (~0(N^2))
-    //
-    int diag_shift = 0;
-    int ib = 0;
-    while (task < ntasks) {
-      // blocks move along diagonals, so as not to fetch identical orbitals, and not store too many orbitals from the same row simultaneously.
-      ib = ib%iblocks;
-      int jb = (ib + diag_shift) % iblocks;
-      int isize = orb_map[ib].size();
-      int jsize = orb_map[jb].size();
-      // we also fetch orbitals within the same i or j blocks in different orders
-      for (int i = 0; i < isize; i++) itasks[task].push_back(orb_map[ib][(i+mpi::orb_rank)%isize].second);
-      for (int j = 0; j < jsize; j++) jtasks[task].push_back(orb_map[jb][(j+mpi::orb_rank)%jsize].second);
-      task++;
-      ib++;
-      if (ib == iblocks) diag_shift++;
-    }
-
-    // make sum_i inp_i*U_ij within each block, and store result in bank
+  mpi::barrier(mpi::comm_orb); // for testing

-    t_task.resume();
-    mpi::orb_bank.init_tasks(ntasks, mpi::orb_rank, mpi::comm_orb);
-    t_task.stop();
-    int count = 0;
-    int countx = 0;
-    int countxok = 0;
-    while(true) { // fetch new tasks until all are completed
-        int it;
-   t_task.resume();
-        mpi::orb_bank.get_task(&it);
-   t_task.stop();
-        if(it<0)break;
-   count++;
-        t_last.start();
-        QMFunctionVector ifunc_vec;
-   t_bankr.resume();
-        int nodesize=0;
-        for (int i = 0; i < itasks[it].size(); i++) {
-            int iorb = itasks[it][i];
-            Orbital phi_i;
-            mpi::orb_bank.get_orb_n(iorb, phi_i, 0);
-            ifunc_vec.push_back(phi_i);
-            nodesize+=phi_i.getSizeNodes(NUMBER::Total);
-        }
-        if(mpi::orb_rank==0)std::cout<<it<<" fetched total " <<nodesize/1024  <<" MB for "<<itasks[it].size()<<" orbitals "<<" time read "<<(int)((float)t_bankr.elapsed() * 1000) <<std::endl;
-   t_bankr.stop();
-        for (int j = 0; j < jtasks[it].size(); j++) {
-            if(mpi::orb_rank==0)std::cout<<it<<" fetching " <<j <<" of "<<jtasks[it].size()-1<<std::endl;
-            ComplexVector coef_vec(itasks[it].size()+iblocks);
-            int jorb = jtasks[it][j];
-            for (int i = 0; i < itasks[it].size(); i++) {
-                int iorb = itasks[it][i];
-                coef_vec(i) = U(iorb, jorb);
-            }
-            // include also block results which may be ready
-            t_task.resume();
-            std::vector<int> jvec = mpi::orb_bank.get_readytasks(jorb, 1);
-            if(block_size + jvec.size() >  2*maxBlockSize ){
-                // Security mechanism to avoid the possibility of ifunc_vec getting too large
-                for (int ix = 2*maxBlockSize-block_size; ix < jvec.size(); ix++ ) {
-                    //define as ready again (i.e. "undelete")
-                    std::cout<<jvec[ix]<<" undelete "<<jvec.size()<<" "<<ix<<jorb<<std::endl;
-                    mpi::orb_bank.put_readytask(jvec[ix], jorb);
-                    countxok++;
-                }
-                jvec.resize(2*maxBlockSize-block_size);
-            }
-            t_task.stop();
-            if(jvec.size() > 0) {
-                int i = itasks[it].size();
-                for (int id : jvec) {
-                    Orbital phi_i;
-                    t_bankrx.resume();
-                    if(id==0){
-                        std::cout<<mpi::orb_rank<<" got id=0 for"<<jorb<<std::endl;
-                        for (int ix = 0; ix < jvec.size(); ix++ ) std::cout<<ix<<" id= "<<id<<std::endl;
-                   }
-                    //                        int ok = mpi::orb_bank.get_orb_del(id, phi_i);
-                    if(mpi::orb_rank==0)std::cout<<it<<" fetching xtra" <<id <<" of "<<jvec.size()<<std::endl;
-                    int ok = mpi::orb_bank.get_orb_n(id, phi_i, 1);
-                    countx++;
-                    t_bankrx.stop();
-                    if (ok) {
-                        ifunc_vec.push_back(phi_i);
-                        coef_vec(i++) = 1.0;
-                    } else {
-                        std::cout<<mpi::orb_rank<<"did not find contribution for  " << jorb<<" expected "<<jvec.size()<<" id "<<id<<std::endl;
-                        MSG_ABORT("did not find contribution for my orbitals");
-                    }
-               }
-            }
-            auto tmp_j = out[jorb].paramCopy();
-            t_add.resume();
-            qmfunction::linear_combination(tmp_j, coef_vec, ifunc_vec, priv_prec);
-            t_add.stop();
-            ifunc_vec.resize(itasks[it].size());
-            // save block result in bank
-            int id = N+it*N+jorb; // N first indices are reserved for original orbitals
-            id = N+itasks[it][0]*N+jorb;
-            t_bankw.resume();
-            //            mpi::orb_bank.put_orb_n(id, tmp_j, 1);
-            mpi::orb_bank.put_orb(id, tmp_j);
-            t_bankw.stop();
-            t_task.resume();
-            mpi::orb_bank.put_readytask(id, jorb);
-            t_task.stop();
-        }
-        t_last.stop();
-    }
-
-    if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" Time rotate1 " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<" Time bank read " << (int)((float)t_bankr.elapsed() * 1000) <<" Time bank read xtra " << (int)((float)t_bankrx.elapsed() * 1000) <<" xorb="<<countx<<" xundeleted="<<countxok<<" Time bank write " << (int)((float)t_bankw.elapsed() * 1000) <<" Time add " << (int)((float)t_add.elapsed() * 1000) <<" Time task manager " << (int)((float)t_task.elapsed() * 1000) <<" Time last task " << (int)((float)t_last.elapsed() * 1000) <<" block size "<<block_size<<" ntasks executed: "<<count<<std::endl;
-    mpi::barrier(mpi::comm_orb);
-    if(mpi::orb_rank==0)std::cout<<" Time rotate1 all " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<std::endl;
-
-    // by now most of the operations are finished. We add only contributions to own orbitals.
-    count = 0;
-    for (int jorb = 0; jorb < N; jorb++) {
-        if (not mpi::my_orb(Phi[jorb])) continue;
-        QMFunctionVector ifunc_vec;
-        t_task.resume();
-        std::vector<int> jvec = mpi::orb_bank.get_readytasks(jorb, 1); // Only jorb will use those contributions: delete when done.
-        t_task.stop();
-
-        ComplexVector coef_vec(jvec.size());
-        int i = 0;
-        int idsave=-1;
-        for (int id : jvec) {
-            Orbital phi_i;
-            t_bankr.resume();
-            //            int ok = mpi::orb_bank.get_orb_n(id, phi_i, 1);
-            int ok = mpi::orb_bank.get_orb_del(id, phi_i);
-            t_bankr.stop();
-
-            if (ok) {
-                coef_vec(ifunc_vec.size()) = 1.0;
-                ifunc_vec.push_back(phi_i);
-                count++;
-            } else {
-                std::cout<<mpi::orb_rank<<"did not find contribution for  " << jorb<<" expected "<<jvec.size()<<" id "<<id<<std::endl;
-                MSG_ABORT("did not find contribution for my orbitals");
-            }
-            //if (ifunc_vec.size() >= 2 * block_size ) break; // we do not want too long vectors
-        }
-        // if ifunc_vec.size()==1, we must still resave the orbital, because we have deleted it
-        int id = idsave; // index guaranteed not to collide with previously defined indices
-        // write result in out
-        t_add.resume();
-        qmfunction::linear_combination(out[jorb], coef_vec, ifunc_vec, priv_prec);
-        t_add.stop();
-    }
-    if(mpi::orb_rank==0)std::cout<<" Time total " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<" Time bankr2 " << (int)((float)t_bankr.elapsed() * 1000) <<" Time bankw2 " << (int)((float)t_bankw.elapsed() * 1000) <<" Time add " << (int)((float)t_add.elapsed() * 1000) <<" Time task manager " << (int)((float)t_task.elapsed() * 1000) <<" added "<<count<<" block size "<<block_size<<std::endl;
-
-    mpi::barrier(mpi::comm_orb);
-    if(mpi::orb_rank==0)std::cout<<" Time total all " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<std::endl;
+  Timer t_tot,t_bankr,t_bankrx,t_bankw,t_add,t_task,t_last;
+  t_bankw.stop();
+  t_bankr.stop();
+  t_bankrx.stop();
+  t_add.stop();
+  t_task.stop();
+  int N = Phi.size();
+  auto priv_prec = (mpi::numerically_exact) ? -1.0 : prec;
+  auto out = orbital::param_copy(Phi);
+
+  mpi::orb_bank.clear_all(mpi::orb_rank, mpi::comm_orb); // needed!
+  mpi::barrier(mpi::comm_orb); // for testing
+  IntVector orb_sizesvec(N); // store the size of the orbitals, for future fine tuning
+  std::set<std::pair<int, int>> orb_sizes; // store the size of the orbitals, and their indices
+  for(int i = 0 ; i < N; i++) orb_sizesvec[i] = 0;
+  t_bankw.resume();
+  // save all orbitals in bank
+  for (int i = 0; i < N; i++) {
+      if (not mpi::my_orb(Phi[i])) continue;
+      mpi::orb_bank.put_orb_n(i, Phi[i], 3);
+      orb_sizesvec[i] = Phi[i].getSizeNodes(NUMBER::Total);
+  }
+  t_bankw.stop();
+  mpi::allreduce_vector(orb_sizesvec, mpi::comm_orb);
+  for (int i = 0; i < N; i++) orb_sizes.insert({orb_sizesvec[i], i});
+  int totorbsize = 0;
+  for (int i = 0; i < N; i++) totorbsize += orb_sizesvec[i];
+  int avorbsize = totorbsize/N;
+
+  // we do not want to store temporarily more than 1/2 of the total memory for orbitals. (Note this is in an unlikely
+scenario where all orbitals on one node have maxsize) int maxBlockSize = omp::n_threads * 2000000 / 3 / avorbsize;
+//assumes 2GB available per thread if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" Time bank write all orb " <<
+(int)((float)t_bankw.elapsed() * 1000) <<" av size:"<<avorbsize/1024<<"MB. Maxblocksize:"<<maxBlockSize<<std::endl;
+
+  // first divide into a fixed number of tasks
+  int block_size; // U is partitioned in blocks with size block_size x block_size
+  block_size = std::min(maxBlockSize, static_cast<int>(N/sqrt(2*mpi::orb_size) + 0.5)); // each MPI process will have
+about 2 tasks int iblocks =  (N + block_size -1)/block_size;
+
+  // fill blocks with orbitals, so that they become roughly evenly distributed
+  std::vector<std::pair<int,int>> orb_map[iblocks]; // orbitals that belong to each block
+  int b = 0; // block index
+  for (auto i: orb_sizes) {
+      b %= iblocks;
+      orb_map[b].push_back(i);
+      b++;
+  }
+
+  int task = 0; // task rank
+  int ntasks = iblocks * iblocks;
+  std::vector<std::vector<int>> itasks(ntasks); // the i values (orbitals) of each block
+  std::vector<std::vector<int>> jtasks(ntasks); // the j values (orbitals) of each block
+
+  // Task order are organised so that block along a diagonal are computed before starting next diagonal
+  // Taking blocks from same column would mean that the same orbitals are fetched from bank, taking
+  // blocks from same row would mean that all row results would be finished approximately together and
+  // all would be written out as partial results, meaning too partial results saved in bank.
+  //
+  // Results from previous blocks on the same row are ready they are also summed into current block
+  // This is because we do not want to store in bank too many block results at a time (~0(N^2))
+  //
+  int diag_shift = 0;
+  int ib = 0;
+  while (task < ntasks) {
+    // blocks move along diagonals, so as not to fetch identical orbitals, and not store too many orbitals from the same
+row simultaneously. ib = ib%iblocks; int jb = (ib + diag_shift) % iblocks; int isize = orb_map[ib].size(); int jsize =
+orb_map[jb].size();
+    // we also fetch orbitals within the same i or j blocks in different orders
+    for (int i = 0; i < isize; i++) itasks[task].push_back(orb_map[ib][(i+mpi::orb_rank)%isize].second);
+    for (int j = 0; j < jsize; j++) jtasks[task].push_back(orb_map[jb][(j+mpi::orb_rank)%jsize].second);
+    task++;
+    ib++;
+    if (ib == iblocks) diag_shift++;
+  }
+
+  // make sum_i inp_i*U_ij within each block, and store result in bank
+
+  t_task.resume();
+  mpi::orb_bank.init_tasks(ntasks, mpi::orb_rank, mpi::comm_orb);
+  t_task.stop();
+  int count = 0;
+  int countx = 0;
+  int countxok = 0;
+  while(true) { // fetch new tasks until all are completed
+      int it;
+      t_task.resume();
+      mpi::orb_bank.get_task(&it);
+      t_task.stop();
+      if(it<0)break;
+      count++;
+      t_last.start();
+      QMFunctionVector ifunc_vec;
+      t_bankr.resume();
+      int nodesize=0;
+      for (int i = 0; i < itasks[it].size(); i++) {
+          int iorb = itasks[it][i];
+          Orbital phi_i;
+          mpi::orb_bank.get_orb_n(iorb, phi_i, 0);
+          ifunc_vec.push_back(phi_i);
+          nodesize+=phi_i.getSizeNodes(NUMBER::Total);
+      }
+      if(mpi::orb_rank==0)std::cout<<it<<" fetched total " <<nodesize/1024  <<" MB for "<<itasks[it].size()<<" orbitals
+"<<" time read "<<(int)((float)t_bankr.elapsed() * 1000) <<std::endl; t_bankr.stop(); for (int j = 0; j <
+jtasks[it].size(); j++) { if(mpi::orb_rank==0)std::cout<<it<<" fetching " <<j <<" of "<<jtasks[it].size()-1<<std::endl;
+          ComplexVector coef_vec(itasks[it].size()+iblocks);
+          int jorb = jtasks[it][j];
+          for (int i = 0; i < itasks[it].size(); i++) {
+              int iorb = itasks[it][i];
+              coef_vec(i) = U(iorb, jorb);
+          }
+          // include also block results which may be ready
+          t_task.resume();
+          std::vector<int> jvec = mpi::orb_bank.get_readytasks(jorb, 1);
+          if(block_size + jvec.size() >  2*maxBlockSize ){
+              // Security mechanism to avoid the possibility of ifunc_vec getting too large
+              for (int ix = 2*maxBlockSize-block_size; ix < jvec.size(); ix++ ) {
+                  //define as ready again (i.e. "undelete")
+                  std::cout<<jvec[ix]<<" undelete "<<jvec.size()<<" "<<ix<<jorb<<std::endl;
+                  mpi::orb_bank.put_readytask(jvec[ix], jorb);
+                  countxok++;
+              }
+              jvec.resize(2*maxBlockSize-block_size);
+          }
+          t_task.stop();
+          if(jvec.size() > 0) {
+              int i = itasks[it].size();
+              for (int id : jvec) {
+                  Orbital phi_i;
+                  t_bankrx.resume();
+                  if(id==0){
+                      std::cout<<mpi::orb_rank<<" got id=0 for"<<jorb<<std::endl;
+                      for (int ix = 0; ix < jvec.size(); ix++ ) std::cout<<ix<<" id= "<<id<<std::endl;
+                 }
+                  //                        int ok = mpi::orb_bank.get_orb_del(id, phi_i);
+                  if(mpi::orb_rank==0)std::cout<<it<<" fetching xtra" <<id <<" of "<<jvec.size()<<std::endl;
+                  int ok = mpi::orb_bank.get_orb_n(id, phi_i, 1);
+                  countx++;
+                  t_bankrx.stop();
+                  if (ok) {
+                      ifunc_vec.push_back(phi_i);
+                      coef_vec(i++) = 1.0;
+                  } else {
+                      std::cout<<mpi::orb_rank<<"did not find contribution for  " << jorb<<" expected "<<jvec.size()<<"
+id "<<id<<std::endl; MSG_ABORT("did not find contribution for my orbitals");
+                  }
+             }
+          }
+          auto tmp_j = out[jorb].paramCopy();
+          t_add.resume();
+          qmfunction::linear_combination(tmp_j, coef_vec, ifunc_vec, priv_prec);
+          t_add.stop();
+          ifunc_vec.resize(itasks[it].size());
+          // save block result in bank
+          int id = N+it*N+jorb; // N first indices are reserved for original orbitals
+          id = N+itasks[it][0]*N+jorb;
+          t_bankw.resume();
+          //            mpi::orb_bank.put_orb_n(id, tmp_j, 1);
+          mpi::orb_bank.put_orb(id, tmp_j);
+          t_bankw.stop();
+          t_task.resume();
+          mpi::orb_bank.put_readytask(id, jorb);
+          t_task.stop();
+      }
+      t_last.stop();
+  }

-    if (mpi::numerically_exact) {
-        for (auto &phi : out) {
-            if (mpi::my_orb(phi)) phi.crop(prec);
-        }
-    }
-    mpi::orb_bank.clear_all(mpi::orb_rank, mpi::comm_orb);
-    mpi::barrier(mpi::comm_orb);
+  if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" Time rotate1 " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<"
+Time bank read " << (int)((float)t_bankr.elapsed() * 1000) <<" Time bank read xtra " << (int)((float)t_bankrx.elapsed()
+* 1000) <<" xorb="<<countx<<" xundeleted="<<countxok<<" Time bank write " << (int)((float)t_bankw.elapsed() * 1000) <<"
+Time add " << (int)((float)t_add.elapsed() * 1000) <<" Time task manager " << (int)((float)t_task.elapsed() * 1000) <<"
+Time last task " << (int)((float)t_last.elapsed() * 1000) <<" block size "<<block_size<<" ntasks executed:
+"<<count<<std::endl; mpi::barrier(mpi::comm_orb); if(mpi::orb_rank==0)std::cout<<" Time rotate1 all " <<
+(int)((float)t_tot.elapsed() * 1000) << " ms "<<std::endl;
+
+  // by now most of the operations are finished. We add only contributions to own orbitals.
+  count = 0;
+  for (int jorb = 0; jorb < N; jorb++) {
+      if (not mpi::my_orb(Phi[jorb])) continue;
+      QMFunctionVector ifunc_vec;
+      t_task.resume();
+      std::vector<int> jvec = mpi::orb_bank.get_readytasks(jorb, 1); // Only jorb will use those contributions: delete
+when done. t_task.stop();
+
+      ComplexVector coef_vec(jvec.size());
+      int i = 0;
+      int idsave=-1;
+      for (int id : jvec) {
+          Orbital phi_i;
+          t_bankr.resume();
+          //            int ok = mpi::orb_bank.get_orb_n(id, phi_i, 1);
+          int ok = mpi::orb_bank.get_orb_del(id, phi_i);
+          t_bankr.stop();
+
+          if (ok) {
+              coef_vec(ifunc_vec.size()) = 1.0;
+              ifunc_vec.push_back(phi_i);
+              count++;
+          } else {
+              std::cout<<mpi::orb_rank<<"did not find contribution for  " << jorb<<" expected "<<jvec.size()<<" id
+"<<id<<std::endl; MSG_ABORT("did not find contribution for my orbitals");
+          }
+          //if (ifunc_vec.size() >= 2 * block_size ) break; // we do not want too long vectors
+      }
+      // if ifunc_vec.size()==1, we must still resave the orbital, because we have deleted it
+      int id = idsave; // index guaranteed not to collide with previously defined indices
+      // write result in out
+      t_add.resume();
+      qmfunction::linear_combination(out[jorb], coef_vec, ifunc_vec, priv_prec);
+      t_add.stop();
+  }
+  if(mpi::orb_rank==0)std::cout<<" Time total " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<" Time bankr2 " <<
+(int)((float)t_bankr.elapsed() * 1000) <<" Time bankw2 " << (int)((float)t_bankw.elapsed() * 1000) <<" Time add " <<
+(int)((float)t_add.elapsed() * 1000) <<" Time task manager " << (int)((float)t_task.elapsed() * 1000) <<" added
+"<<count<<" block size "<<block_size<<std::endl;
+
+  mpi::barrier(mpi::comm_orb);
+  if(mpi::orb_rank==0)std::cout<<" Time total all " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<std::endl;
+
+  if (mpi::numerically_exact) {
+      for (auto &phi : out) {
+          if (mpi::my_orb(phi)) phi.crop(prec);
+      }
+  }
+  mpi::orb_bank.clear_all(mpi::orb_rank, mpi::comm_orb);
+  mpi::barrier(mpi::comm_orb);

-    return out;
+  return out;
 }
 */
 /** @brief Deep copy
@@ -1082,14 +1119,14 @@
     int N = NB + NK;
     if (sym and NB != NK) MSG_ERROR("Overlap: cannot use symmetry if matrix is not square");
     mpi::orb_bank.clear_all(mpi::orb_rank, mpi::comm_orb);
-    Timer t_bankr,t_bankw,timer,t_dot,t_red;
+    Timer t_bankr, t_bankw, timer, t_dot, t_red;
     t_bankr.stop();
     t_dot.stop();
     t_red.stop();
-    IntVector orb_sizesvec(N); // store the size of the orbitals, for fine tuning
+    IntVector orb_sizesvec(N);                // store the size of the orbitals, for fine tuning
     std::set<std::pair<int, int>> iorb_sizes; // store the size of the orbitals, and their indices
     std::set<std::pair<int, int>> jorb_sizes; // store the size of the orbitals, and their indices
-    for(int i = 0 ; i < N; i++) orb_sizesvec[i] = 0;
+    for (int i = 0; i < N; i++) orb_sizesvec[i] = 0;
     for (int i = 0; i < NB; i++) {
         Orbital &Phi_B = Bra[i];
         if (not mpi::my_orb(Phi_B)) continue;
@@ -1099,152 +1136,156 @@
     for (int j = 0; j < NK; j++) {
         Orbital &Phi_K = Ket[j];
         if (not mpi::my_orb(Phi_K)) continue;
-        mpi::orb_bank.put_orb_n(j+NB, Phi_K, 3);
-        orb_sizesvec[j+NB] = Phi_K.getSizeNodes(NUMBER::Total);
+        mpi::orb_bank.put_orb_n(j + NB, Phi_K, 3);
+        orb_sizesvec[j + NB] = Phi_K.getSizeNodes(NUMBER::Total);
     }
     t_bankw.stop();
     mpi::allreduce_vector(orb_sizesvec, mpi::comm_orb);
     for (int i = 0; i < NB; i++) iorb_sizes.insert({orb_sizesvec[i], i});
-    for (int j = 0; j < NK; j++) jorb_sizes.insert({orb_sizesvec[j+NB], j});
+    for (int j = 0; j < NK; j++) jorb_sizes.insert({orb_sizesvec[j + NB], j});
     int totorbsize = 0;
     int maxorbsize = 0;
-    for (int i = 0; i < N; i++) totorbsize += orb_sizesvec[i]/1024; // NB: use MB to avoid overflow
-    for(int i = 0 ; i < N; i++) maxorbsize = std::max(maxorbsize, orb_sizesvec[i]/1024);
-    int avorbsize = totorbsize/N;
+    for (int i = 0; i < N; i++) totorbsize += orb_sizesvec[i] / 1024; // NB: use MB to avoid overflow
+    for (int i = 0; i < N; i++) maxorbsize = std::max(maxorbsize, orb_sizesvec[i] / 1024);
+    int avorbsize = totorbsize / N;

-    for(int i = 0 ; i < N; i++) maxorbsize = std::max(maxorbsize, orb_sizesvec[i]/1024);
+    for (int i = 0; i < N; i++) maxorbsize = std::max(maxorbsize, orb_sizesvec[i] / 1024);
     // we do not want to store temporarily more than 1/2 of the total memory for orbitals.
-    int maxBlockSize = omp::n_threads * 2000 / 2 / avorbsize; //assumes 2GB available per thread
-    int ilargest=-1;
-    if(mpi::orb_rank==0){
-        for(int i = 0 ; i < N; i++) {
-            if(maxorbsize == orb_sizesvec[i]/1024)ilargest=i;
+    int maxBlockSize = omp::n_threads * 2000 / 2 / avorbsize; // assumes 2GB available per thread
+    int ilargest = -1;
+    if (mpi::orb_rank == 0) {
+        for (int i = 0; i < N; i++) {
+            if (maxorbsize == orb_sizesvec[i] / 1024) ilargest = i;
         }
     }
-    if(mpi::orb_rank==0)std::cout<<" bra, ket:maxBlockSize "<<maxBlockSize<<" avorb size "<<avorbsize<<" MB "<<" max "<<maxorbsize<<" MB "<<"(orb "<<ilargest<<")"<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << " bra, ket:maxBlockSize " << maxBlockSize << " avorb size " << avorbsize << " MB "
+                  << " max " << maxorbsize << " MB "
+                  << "(orb " << ilargest << ")" << std::endl;

     // divide into a fixed number of tasks
     int iblock_size, jblock_size; // S is partitioned in blocks with sizes iblock_size x jblock_size
-    iblock_size = std::min(maxBlockSize, static_cast<int>(NB/sqrt(3*mpi::orb_size) + 0.5 ));
-    jblock_size = std::min(maxBlockSize, static_cast<int>(NK/sqrt(3*mpi::orb_size) + 0.5 ));
+    iblock_size = std::min(maxBlockSize, static_cast<int>(NB / sqrt(3 * mpi::orb_size) + 0.5));
+    jblock_size = std::min(maxBlockSize, static_cast<int>(NK / sqrt(3 * mpi::orb_size) + 0.5));
     iblock_size = std::max(iblock_size, 1);
     jblock_size = std::max(jblock_size, 1);

     int task = 0; // task rank
-    int iblocks =  (NB + iblock_size -1)/iblock_size;
-    int jblocks =  (NK + jblock_size -1)/jblock_size;
+    int iblocks = (NB + iblock_size - 1) / iblock_size;
+    int jblocks = (NK + jblock_size - 1) / jblock_size;
     int ntasks = iblocks * jblocks; // NB: overwritten; will get less if sym

     // fill blocks with orbitals, so that they become roughly evenly distributed
-    std::vector<std::pair<int,int>> iorb_map[iblocks]; // i orbitals that belong to each block
-    int b = 0; // block index
-    for (auto i: iorb_sizes) { // iorb_sizes is ordered in increasing orbital sizes
-   b %= iblocks;
-   iorb_map[b].push_back(i);
-   b++;
-    }
-    std::vector<std::pair<int,int>> jorb_map[jblocks]; // j orbitals that belong to each block
-    b = 0; // block index
-    for (auto j: jorb_sizes) { // jorb_sizes is ordered in increasing orbital sizes
-   b %= jblocks;
-   jorb_map[b].push_back(j);
-   b++;
+    std::vector<std::pair<int, int>> iorb_map[iblocks]; // i orbitals that belong to each block
+    int b = 0;                                          // block index
+    for (auto i : iorb_sizes) {                         // iorb_sizes is ordered in increasing orbital sizes
+        b %= iblocks;
+        iorb_map[b].push_back(i);
+        b++;
+    }
+    std::vector<std::pair<int, int>> jorb_map[jblocks]; // j orbitals that belong to each block
+    b = 0;                                              // block index
+    for (auto j : jorb_sizes) {                         // jorb_sizes is ordered in increasing orbital sizes
+        b %= jblocks;
+        jorb_map[b].push_back(j);
+        b++;
     }

-
     std::vector<std::vector<int>> itasks(iblocks); // the i values (orbitals) of each block
     std::vector<std::vector<int>> jtasks(jblocks); // the j values (orbitals) of each block

-    if(mpi::orb_rank==0)std::cout<<ntasks<<" tasks = "<<iblocks<<" x "<<jblocks<<std::endl;
+    if (mpi::orb_rank == 0) std::cout << ntasks << " tasks = " << iblocks << " x " << jblocks << std::endl;

     for (int ib = 0; ib < iblocks; ib++) {
-   int isize = iorb_map[ib].size();
-   // we fetch orbitals within the same iblocks in different orders in order to avoid overloading the bank
-   for (int i = 0; i < isize; i++) itasks[ib].push_back(iorb_map[ib][(i+mpi::orb_rank)%isize].second);
+        int isize = iorb_map[ib].size();
+        // we fetch orbitals within the same iblocks in different orders in order to avoid overloading the bank
+        for (int i = 0; i < isize; i++) itasks[ib].push_back(iorb_map[ib][(i + mpi::orb_rank) % isize].second);
     }

-    if(sym) {
-      // we must have same mapping for i and j
-      for (int jb = 0; jb < jblocks; jb++) {
-   int isize = iorb_map[jb].size(); // NB: iorb
-   for (int i = 0; i < isize; i++) jtasks[jb].push_back(iorb_map[jb][(i+mpi::orb_rank)%isize].second);
-      }
-   } else {
-
-    for (int jb = 0; jb < jblocks; jb++) {
-   int jsize = jorb_map[jb].size();
-   // we fetch orbitals within the same jblocks in different orders in order to avoid overloading the bank
-   for (int j = 0; j < jsize; j++) jtasks[jb].push_back(jorb_map[jb][(j+mpi::orb_rank)%jsize].second);
-    }
+    if (sym) {
+        // we must have same mapping for i and j
+        for (int jb = 0; jb < jblocks; jb++) {
+            int isize = iorb_map[jb].size(); // NB: iorb
+            for (int i = 0; i < isize; i++) jtasks[jb].push_back(iorb_map[jb][(i + mpi::orb_rank) % isize].second);
+        }
+    } else {

+        for (int jb = 0; jb < jblocks; jb++) {
+            int jsize = jorb_map[jb].size();
+            // we fetch orbitals within the same jblocks in different orders in order to avoid overloading the bank
+            for (int j = 0; j < jsize; j++) jtasks[jb].push_back(jorb_map[jb][(j + mpi::orb_rank) % jsize].second);
+        }
     }
-    mpi::orb_bank.init_tasks(iblocks, jblocks, mpi::orb_rank, mpi::comm_orb);// note that the bank does not know about the symmetri. Symmetri must be taken care of in the loop
+    mpi::orb_bank.init_tasks(
+        iblocks, jblocks, mpi::orb_rank, mpi::comm_orb); // note that the bank does not know about the symmetri.
+                                                         // Symmetri must be taken care of in the loop

     int previous_ib = -1;
     int previous_jb = -1;
     int count = 0;
     OrbitalVector iorb_vec;
     OrbitalVector jorb_vec;
-    while(true) { // fetch new tasks until all are completed
-   int task_2D[2];
-        mpi::orb_bank.get_task(task_2D, mpi::orb_rank%jblocks);
-   int jb = task_2D[1];
-   int ib = (task_2D[0]+jb)%iblocks; // NB: the blocks are shifted diagonally, so that not all processes start with the first iblock
-
-   if(jb<0) break; // no more tasks in queue
-
-   if(sym) {
-     // block symmetry: we only take every second block. (Almost a checker board but one white diagonal is missing!)
-     // X0X0X0X
-     // XX0X0X0
-     // 0XX0X0X
-     // X0XX0X0
-     // 0X0XX0X
-     // X0X0XX0
-     // 0X0X0XX
-     if((ib<jb and (ib+jb)%2) or (ib>jb and (ib+jb+1)%2)) continue; //take exactly half of the off diagonal blocks and all diagonal blocks
-   }
-   count++;
-   if(previous_jb != jb){
+    while (true) { // fetch new tasks until all are completed
+        int task_2D[2];
+        mpi::orb_bank.get_task(task_2D, mpi::orb_rank % jblocks);
+        int jb = task_2D[1];
+        int ib =
+            (task_2D[0] + jb) %
+            iblocks; // NB: the blocks are shifted diagonally, so that not all processes start with the first iblock
+
+        if (jb < 0) break; // no more tasks in queue
+
+        if (sym) {
+            // block symmetry: we only take every second block. (Almost a checker board but one white diagonal is
+            // missing!) X0X0X0X XX0X0X0 0XX0X0X X0XX0X0 0X0XX0X X0X0XX0 0X0X0XX
+            if ((ib < jb and (ib + jb) % 2) or (ib > jb and (ib + jb + 1) % 2))
+                continue; // take exactly half of the off diagonal blocks and all diagonal blocks
+        }
+        count++;
+        if (previous_jb != jb) {
             // we have got a new jb and need to fetch new jb orbitals
-       jorb_vec.clear();
-       t_bankr.resume();
-       int nodesize=0;
-       for (int j = 0; j < jtasks[jb].size(); j++) {
-         int jorb = jtasks[jb][j];
-         Orbital phi_j;
-         mpi::orb_bank.get_orb_n(jorb + NB,  phi_j, 0);
-         jorb_vec.push_back(phi_j);
-         nodesize+=phi_j.getSizeNodes(NUMBER::Total);
-       }
-       if(mpi::orb_rank==0)std::cout<<ib<<" "<<jb<<" fetched total " <<nodesize/1024  <<" MB for "<<jtasks[jb].size()<<" orbitals "<<" time read "<<(int)((float)t_bankr.elapsed() * 1000) <<std::endl;
-       t_bankr.stop();
-   }else{
-     if(mpi::orb_rank==0)std::cout<<ib<<" "<<jb<<" reusing j orbitals " <<std::endl;
-   }
-
-   for (int i = 0; i < itasks[ib].size(); i++) {
-       int iorb = itasks[ib][i];
-       Orbital ket_i;
-       t_bankr.resume();
-       mpi::orb_bank.get_orb_n(iorb, ket_i, 0);
-       t_bankr.stop();
-       for (int j = 0; j < jtasks[jb].size(); j++) {
+            jorb_vec.clear();
+            t_bankr.resume();
+            int nodesize = 0;
+            for (int j = 0; j < jtasks[jb].size(); j++) {
+                int jorb = jtasks[jb][j];
+                Orbital phi_j;
+                mpi::orb_bank.get_orb_n(jorb + NB, phi_j, 0);
+                jorb_vec.push_back(phi_j);
+                nodesize += phi_j.getSizeNodes(NUMBER::Total);
+            }
+            if (mpi::orb_rank == 0)
+                std::cout << ib << " " << jb << " fetched total " << nodesize / 1024 << " MB for " << jtasks[jb].size()
+                          << " orbitals "
+                          << " time read " << (int)((float)t_bankr.elapsed() * 1000) << std::endl;
+            t_bankr.stop();
+        } else {
+            if (mpi::orb_rank == 0) std::cout << ib << " " << jb << " reusing j orbitals " << std::endl;
+        }
+
+        for (int i = 0; i < itasks[ib].size(); i++) {
+            int iorb = itasks[ib][i];
+            Orbital ket_i;
+            t_bankr.resume();
+            mpi::orb_bank.get_orb_n(iorb, ket_i, 0);
+            t_bankr.stop();
+            for (int j = 0; j < jtasks[jb].size(); j++) {
                 int jorb = jtasks[jb][j];
-                if( sym and ib==jb and i < j) continue; //only j<=i is computed in diagonal blocks
+                if (sym and ib == jb and i < j) continue; // only j<=i is computed in diagonal blocks
                 t_dot.resume();
-                //std::cout<<iorb<<" dot "<<jorb<<std::endl;
+                // std::cout<<iorb<<" dot "<<jorb<<std::endl;
                 S(jorb, iorb) = orbital::dot(jorb_vec[j], ket_i);
-                if(sym) S(iorb, jorb) = std::conj(S(jorb, iorb));
+                if (sym) S(iorb, jorb) = std::conj(S(jorb, iorb));
                 t_dot.stop();
-       //if(mpi::orb_rank==0)std::cout<<ib<<" "<<jb<<" dot " <<i<<" "<<j<<" "<<iorb<<" "<<jorb<<" "<<S(jorb, iorb)<<std::endl;
+                // if(mpi::orb_rank==0)std::cout<<ib<<" "<<jb<<" dot " <<i<<" "<<j<<" "<<iorb<<" "<<jorb<<" "<<S(jorb,
+                // iorb)<<std::endl;
             }
         }
-   previous_jb = jb;
-   previous_ib = ib;
+        previous_jb = jb;
+        previous_ib = ib;

-   /*
-   for (int j = 0; j < jtasks[it].size(); j++) {
+        /*
+        for (int j = 0; j < jtasks[it].size(); j++) {
             int jorb = jtasks[it][j];
             Orbital ket_j;
             t_bankr.resume();
@@ -1259,17 +1300,20 @@
                 if(sym) S(jorb, iorb) = std::conj(S(iorb, jorb));
                 t_dot.stop();
             }
-       }*/
+            }*/
     }

     // Assumes each tasks has only defined its block

     mpi::allreduce_matrix(S, mpi::comm_orb);
-    if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" Time overlap " << (int)((float)timer.elapsed() * 1000) << " ms "<<" Time bank read " << (int)((float)t_bankr.elapsed() * 1000) <<" Time bank write " << (int)((float)t_bankw.elapsed() * 1000) <<" Time dot " << (int)((float)t_dot.elapsed() * 1000) <<" block size "<<iblock_size<<"x"<<jblock_size<<" ntasks executed: "<<count<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << mpi::orb_rank << " Time overlap " << (int)((float)timer.elapsed() * 1000) << " ms "
+                  << " Time bank read " << (int)((float)t_bankr.elapsed() * 1000) << " Time bank write "
+                  << (int)((float)t_bankw.elapsed() * 1000) << " Time dot " << (int)((float)t_dot.elapsed() * 1000)
+                  << " block size " << iblock_size << "x" << jblock_size << " ntasks executed: " << count << std::endl;
     return S;
 }

-
 /** @brief Compute the overlap matrix S_ij = <bra_i|ket_j>
  *
  * The matrix is divided into blocks and the dot products within blocks
@@ -1284,84 +1328,87 @@
     ComplexMatrix S = ComplexMatrix::Zero(BraKet.size(), BraKet.size());
     int N = BraKet.size();
     mpi::orb_bank.clear_all(mpi::orb_rank, mpi::comm_orb);
-    Timer t_bankr,t_bankw,timer,t_dot;
+    Timer t_bankr, t_bankw, timer, t_dot;
     t_bankr.stop();
     t_dot.stop();
-    IntVector orb_sizesvec(N); // store the size of the orbitals
+    IntVector orb_sizesvec(N);               // store the size of the orbitals
     std::set<std::pair<int, int>> orb_sizes; // store the size of the orbitals, and their indices
-    for(int i = 0 ; i < N; i++) orb_sizesvec[i] = 0;
+    for (int i = 0; i < N; i++) orb_sizesvec[i] = 0;
     for (int i = 0; i < N; i++) {
         Orbital &Phi_i = BraKet[i];
         if (not mpi::my_orb(Phi_i)) continue;
         mpi::orb_bank.put_orb_n(i, Phi_i, 3);
         orb_sizesvec[i] = Phi_i.getSizeNodes(NUMBER::Total);
     }
-     t_bankw.stop();
+    t_bankw.stop();
     mpi::allreduce_vector(orb_sizesvec, mpi::comm_orb);
     for (int i = 0; i < N; i++) orb_sizes.insert({orb_sizesvec[i], i});
     int totorbsize = 0;
     int maxorbsize = 0;
-    for (int i = 0; i < N; i++) totorbsize += orb_sizesvec[i]/1024; // NB: use MB to avoid overflow
-    int avorbsize = totorbsize/N;
+    for (int i = 0; i < N; i++) totorbsize += orb_sizesvec[i] / 1024; // NB: use MB to avoid overflow
+    int avorbsize = totorbsize / N;

-    for(int i = 0 ; i < N; i++) maxorbsize = std::max(maxorbsize, orb_sizesvec[i]/1024);
+    for (int i = 0; i < N; i++) maxorbsize = std::max(maxorbsize, orb_sizesvec[i] / 1024);
     // we do not want to store temporarily more than 1/2 of the total memory for orbitals.
-    int maxBlockSize = omp::n_threads * 2000 / 2 / avorbsize; //assumes 2GB available per thread
-    int ilargest=-1;
-    if(mpi::orb_rank==0){
-        for(int i = 0 ; i < N; i++) {
-            if(maxorbsize == orb_sizesvec[i]/1024)ilargest=i;
+    int maxBlockSize = omp::n_threads * 2000 / 2 / avorbsize; // assumes 2GB available per thread
+    int ilargest = -1;
+    if (mpi::orb_rank == 0) {
+        for (int i = 0; i < N; i++) {
+            if (maxorbsize == orb_sizesvec[i] / 1024) ilargest = i;
         }
     }
-    if(mpi::orb_rank==0)std::cout<<" maxBlockSize "<<maxBlockSize<<" avorb size "<<avorbsize<<" MB "<<" max "<<maxorbsize<<" MB "<<"(orb "<<ilargest<<")"<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << " maxBlockSize " << maxBlockSize << " avorb size " << avorbsize << " MB "
+                  << " max " << maxorbsize << " MB "
+                  << "(orb " << ilargest << ")" << std::endl;
     // make a set of tasks
     // We use symmetry: each pair (i,j) must be used once only. Only j<i
     // Divide into square blocks, with the diagonal blocks taken at the end (because they are faster to compute)
-    int block_size = std::min(maxBlockSize, static_cast<int>(N/sqrt(3*mpi::orb_size) + 0.5 )); // each MPI process will have about 3(/2) tasks
+    int block_size =
+        std::min(maxBlockSize,
+                 static_cast<int>(N / sqrt(3 * mpi::orb_size) + 0.5)); // each MPI process will have about 3(/2) tasks
     int iblock_size = std::max(block_size, 1);
     int jblock_size = iblock_size; // we must have square blocks
-    int iblocks =  (N + block_size - 1) / block_size;
+    int iblocks = (N + block_size - 1) / block_size;
     int jblocks = iblocks;
-    int ntasks = ((iblocks+1) * iblocks) / 2;
+    int ntasks = ((iblocks + 1) * iblocks) / 2;

     // fill blocks with orbitals, so that they become roughly evenly distributed
-    std::vector<std::pair<int,int>> orb_map[iblocks]; // orbitals that belong to each block
-    int b = 0; // block index
-    for (auto i: orb_sizes) { // orb_sizes is ordered in increasing orbital sizes
-   b %= iblocks;
-   orb_map[b].push_back(i);
-   b++;
+    std::vector<std::pair<int, int>> orb_map[iblocks]; // orbitals that belong to each block
+    int b = 0;                                         // block index
+    for (auto i : orb_sizes) {                         // orb_sizes is ordered in increasing orbital sizes
+        b %= iblocks;
+        orb_map[b].push_back(i);
+        b++;
     }

     std::vector<std::vector<int>> itasks(iblocks); // the i values (orbitals) of each block
     std::vector<std::vector<int>> jtasks(jblocks); // the j values (orbitals) of each block

     for (int ib = 0; ib < iblocks; ib++) {
-      int isize = orb_map[ib].size();
-      // we fetch orbitals within the same iblocks in different orders in order to avoid overloading the bank
-      for (int i = 0; i < isize; i++) itasks[ib].push_back(orb_map[ib][(i+mpi::orb_rank)%isize].second);
+        int isize = orb_map[ib].size();
+        // we fetch orbitals within the same iblocks in different orders in order to avoid overloading the bank
+        for (int i = 0; i < isize; i++) itasks[ib].push_back(orb_map[ib][(i + mpi::orb_rank) % isize].second);
     }

     for (int jb = 0; jb < jblocks; jb++) {
-      // we must have square blocks, i.e. jsize=isize. Use the same as i
-      int isize = orb_map[jb].size();
-      for (int i = 0; i < isize; i++) jtasks[jb].push_back(orb_map[jb][(i+mpi::orb_rank)%isize].second);
+        // we must have square blocks, i.e. jsize=isize. Use the same as i
+        int isize = orb_map[jb].size();
+        for (int i = 0; i < isize; i++) jtasks[jb].push_back(orb_map[jb][(i + mpi::orb_rank) % isize].second);
     }

     /*int task = 0;
     for (int i = 0; i < N; i+=block_size) {
         for (int j = 0; j < i; j+=block_size) {
-       // save all i,j related to one block
+            // save all i,j related to one block
             for (int jj = 0; jj < block_size and j+jj < N; jj++) {
                 int jjj = jj + j;
-                if (j + block_size <= N) jjj = j + (jj+mpi::orb_rank)%block_size; // only for blocks with size block_size
-                jtasks[task].push_back(jjj);
+                if (j + block_size <= N) jjj = j + (jj+mpi::orb_rank)%block_size; // only for blocks with size
+    block_size jtasks[task].push_back(jjj);
             }
-       int ishift = (i+j)/block_size; // we want the tasks to be distributed among all i, so that we do not fetch the same orbitals
-            for (int ii = 0; ii < block_size; ii++) {
-                int iii = ii + i;
-                if(iii >= N) continue;
-                if (i + block_size < N) iii = i + (ii+ishift)%block_size; // only for blocks with size block_size
+            int ishift = (i+j)/block_size; // we want the tasks to be distributed among all i, so that we do not fetch
+    the same orbitals for (int ii = 0; ii < block_size; ii++) { int iii = ii + i; if(iii >= N) continue; if (i +
+    block_size < N) iii = i + (ii+ishift)%block_size; // only for blocks with size block_size
                 itasks[task].push_back(iii);
             }
             task++;
@@ -1381,78 +1428,91 @@
     }
     ntasks = task;
     */
-    if(mpi::orb_rank==0)std::cout<<ntasks<<" tasks = "<<iblocks<<" x ("<<jblocks<<"+1)/2"<<std::endl;
+    if (mpi::orb_rank == 0) std::cout << ntasks << " tasks = " << iblocks << " x (" << jblocks << "+1)/2" << std::endl;

-    //mpi::orb_bank.init_tasks(ntasks, mpi::orb_rank, mpi::comm_orb);
+    // mpi::orb_bank.init_tasks(ntasks, mpi::orb_rank, mpi::comm_orb);

-    mpi::orb_bank.init_tasks(iblocks, jblocks, mpi::orb_rank, mpi::comm_orb);// note that the bank does not know about the symmetri. Symmetri must be taken care of in the loop
+    mpi::orb_bank.init_tasks(
+        iblocks, jblocks, mpi::orb_rank, mpi::comm_orb); // note that the bank does not know about the symmetri.
+                                                         // Symmetri must be taken care of in the loop

     int previous_ib = -1;
     int previous_jb = -1;
     int count = 0;
     OrbitalVector iorb_vec;
     OrbitalVector jorb_vec;
-    while(true) { // fetch new tasks until all are completed
-   int task_2D[2];
-        mpi::orb_bank.get_task(task_2D, mpi::orb_rank%jblocks);
-   int jb = task_2D[1];
-   int ib = (task_2D[0]+jb)%iblocks; // NB: the blocks are shifted diagonally, so that not all processes start with the first iblock
-
-   if(jb<0) break; // no more tasks in queue
-
-   // block symmetry: we only take every second block. (Almost a checker board but one white diagonal is missing!)
-   // X0X0X0X
-   // XX0X0X0
-   // 0XX0X0X
-   // X0XX0X0
-   // 0X0XX0X
-   // X0X0XX0
-   // 0X0X0XX
-
-   if((ib<jb and (ib+jb)%2) or (ib>jb and (ib+jb+1)%2)) continue; //take exactly half of the off diagonal blocks and all diagonal blocks
-   count++;
-   if(previous_jb != jb){
+    while (true) { // fetch new tasks until all are completed
+        int task_2D[2];
+        mpi::orb_bank.get_task(task_2D, mpi::orb_rank % jblocks);
+        int jb = task_2D[1];
+        int ib =
+            (task_2D[0] + jb) %
+            iblocks; // NB: the blocks are shifted diagonally, so that not all processes start with the first iblock
+
+        if (jb < 0) break; // no more tasks in queue
+
+        // block symmetry: we only take every second block. (Almost a checker board but one white diagonal is missing!)
+        // X0X0X0X
+        // XX0X0X0
+        // 0XX0X0X
+        // X0XX0X0
+        // 0X0XX0X
+        // X0X0XX0
+        // 0X0X0XX
+
+        if ((ib < jb and (ib + jb) % 2) or (ib > jb and (ib + jb + 1) % 2))
+            continue; // take exactly half of the off diagonal blocks and all diagonal blocks
+        count++;
+        if (previous_jb != jb) {
             // we have got a new jb and need to fetch new jb orbitals
-       jorb_vec.clear();
-       t_bankr.resume();
-       int nodesize=0;
-       for (int j = 0; j < jtasks[jb].size(); j++) {
-         int jorb = jtasks[jb][j];
-         Orbital phi_j;
-         mpi::orb_bank.get_orb_n(jorb, phi_j, 0);
-         jorb_vec.push_back(phi_j);
-         nodesize+=phi_j.getSizeNodes(NUMBER::Total);
-       }
-       if(mpi::orb_rank==0)std::cout<<ib<<" "<<jb<<" fetched total " <<nodesize/1024  <<" MB for "<<jtasks[jb].size()<<" orbitals "<<" time read "<<(int)((float)t_bankr.elapsed() * 1000) <<std::endl;
-       t_bankr.stop();
-   }else{
-     if(mpi::orb_rank==0)std::cout<<ib<<" "<<jb<<" reusing j orbitals " <<std::endl;
-   }
-
-   for (int i = 0; i < itasks[ib].size(); i++) {
-       int iorb = itasks[ib][i];
-       Orbital ket_i;
-       t_bankr.resume();
-       mpi::orb_bank.get_orb_n(iorb, ket_i, 0);
-       t_bankr.stop();
-       for (int j = 0; j < jtasks[jb].size(); j++) {
+            jorb_vec.clear();
+            t_bankr.resume();
+            int nodesize = 0;
+            for (int j = 0; j < jtasks[jb].size(); j++) {
                 int jorb = jtasks[jb][j];
-                if( ib==jb and i < j) continue; //only j<=i is computed in diagonal blocks
+                Orbital phi_j;
+                mpi::orb_bank.get_orb_n(jorb, phi_j, 0);
+                jorb_vec.push_back(phi_j);
+                nodesize += phi_j.getSizeNodes(NUMBER::Total);
+            }
+            if (mpi::orb_rank == 0)
+                std::cout << ib << " " << jb << " fetched total " << nodesize / 1024 << " MB for " << jtasks[jb].size()
+                          << " orbitals "
+                          << " time read " << (int)((float)t_bankr.elapsed() * 1000) << std::endl;
+            t_bankr.stop();
+        } else {
+            if (mpi::orb_rank == 0) std::cout << ib << " " << jb << " reusing j orbitals " << std::endl;
+        }
+
+        for (int i = 0; i < itasks[ib].size(); i++) {
+            int iorb = itasks[ib][i];
+            Orbital ket_i;
+            t_bankr.resume();
+            mpi::orb_bank.get_orb_n(iorb, ket_i, 0);
+            t_bankr.stop();
+            for (int j = 0; j < jtasks[jb].size(); j++) {
+                int jorb = jtasks[jb][j];
+                if (ib == jb and i < j) continue; // only j<=i is computed in diagonal blocks
                 t_dot.resume();
-                //std::cout<<iorb<<" dot "<<jorb<<std::endl;
+                // std::cout<<iorb<<" dot "<<jorb<<std::endl;
                 S(jorb, iorb) = orbital::dot(jorb_vec[j], ket_i);
                 S(iorb, jorb) = std::conj(S(jorb, iorb));
                 t_dot.stop();
-       //if(mpi::orb_rank==0)std::cout<<ib<<" "<<jb<<" dot " <<i<<" "<<j<<" "<<iorb<<" "<<jorb<<" "<<S(jorb, iorb)<<std::endl;
+                // if(mpi::orb_rank==0)std::cout<<ib<<" "<<jb<<" dot " <<i<<" "<<j<<" "<<iorb<<" "<<jorb<<" "<<S(jorb,
+                // iorb)<<std::endl;
             }
         }
-   previous_jb = jb;
-   previous_ib = ib;
+        previous_jb = jb;
+        previous_ib = ib;
     }

     // Assumes each tasks has only defined its block
     mpi::allreduce_matrix(S, mpi::comm_orb);
-    if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" Time overlap " << (int)((float)timer.elapsed() * 1000) << " ms "<<" Time bank read " << (int)((float)t_bankr.elapsed() * 1000) <<" Time bank write " << (int)((float)t_bankw.elapsed() * 1000) <<" Time dot " << (int)((float)t_dot.elapsed() * 1000) <<" block size "<<block_size<<" ntasks executed: "<<count<<std::endl;
+    if (mpi::orb_rank == 0)
+        std::cout << mpi::orb_rank << " Time overlap " << (int)((float)timer.elapsed() * 1000) << " ms "
+                  << " Time bank read " << (int)((float)t_bankr.elapsed() * 1000) << " Time bank write "
+                  << (int)((float)t_bankw.elapsed() * 1000) << " Time dot " << (int)((float)t_dot.elapsed() * 1000)
+                  << " block size " << block_size << " ntasks executed: " << count << std::endl;
     return S;
 }

src/qmoperators/two_electron/CoulombPotential.cpp

--- src/qmoperators/two_electron/CoulombPotential.cpp
+++ src/qmoperators/two_electron/CoulombPotential.cpp
@@ -12,7 +12,6 @@
 using mrcpp::Printer;
 using mrcpp::Timer;

-
 using PoissonOperator = mrcpp::PoissonOperator;
 using PoissonOperator_p = std::shared_ptr<mrcpp::PoissonOperator>;
 using OrbitalVector_p = std::shared_ptr<mrchem::OrbitalVector>;

src/qmoperators/two_electron/ExchangePotentialD1.cpp

--- src/qmoperators/two_electron/ExchangePotentialD1.cpp
+++ src/qmoperators/two_electron/ExchangePotentialD1.cpp
@@ -265,7 +265,6 @@
     mrcpp::print::tree(2, "Hartree-Fock exchange", n, m, t);
 }

-
 /** @brief precomputes the exchange potential
  *
  *  @param[in] phi_p input orbital
@@ -298,7 +297,7 @@
     println(2, " Exchange time diagonal " << (int)((float)timer.elapsed() * 1000) << " ms ");

     // Save all orbitals in Bank, so that they can be accessed asynchronously
-    Timer timerS,t_bankr,t_add,t_task,t_wait,t_int;
+    Timer timerS, t_bankr, t_add, t_task, t_wait, t_int;
     t_bankr.stop();
     t_add.stop();
     t_task.stop();
@@ -316,38 +315,34 @@
     // We use symmetry: each pair (i,j) must be used once only. Only j<i
     // Divide into square blocks, with the diagonal blocks taken at the end (because they are faster to compute)
     int block_size = 6; // NB: block_size*block_size intermediate exchange results are stored temporarily
-    int iblocks =  (N + block_size - 1) / block_size;
-    int ntasks = ((iblocks+1) * iblocks) / 2;
+    int iblocks = (N + block_size - 1) / block_size;
+    int ntasks = ((iblocks + 1) * iblocks) / 2;
     std::vector<std::vector<int>> itasks(ntasks); // the i values (orbitals) of each block
     std::vector<std::vector<int>> jtasks(ntasks); // the j values (orbitals) of each block

     int task = 0;
-    for (int i = 0; i < N; i+=block_size) {
-        for (int j = 0; j < i; j+=block_size) {
-       // save all i,j related to one block
-            for (int jj = 0; jj < block_size and j+jj < N; jj++) { // note that j+block_size is alway <N
+    for (int i = 0; i < N; i += block_size) {
+        for (int j = 0; j < i; j += block_size) {
+            // save all i,j related to one block
+            for (int jj = 0; jj < block_size and j + jj < N; jj++) { // note that j+block_size is alway <N
                 int jjj = j + jj;
-                if (j+block_size <= N) jjj = j + (jj + mpi::orb_rank)%block_size;
+                if (j + block_size <= N) jjj = j + (jj + mpi::orb_rank) % block_size;
                 jtasks[task].push_back(jjj);
-             }
-            for (int ii = 0; ii < block_size and i+ii < N; ii++) {
+            }
+            for (int ii = 0; ii < block_size and i + ii < N; ii++) {
                 int iii = i + ii;
-                if (i+block_size <= N) iii = i + (ii + mpi::orb_rank)%block_size;
+                if (i + block_size <= N) iii = i + (ii + mpi::orb_rank) % block_size;
                 itasks[task].push_back(iii);
             }
             task++;
-       }
+        }
     }
     // add diagonal blocks:
-    for (int i = 0; i < N; i+=block_size) {
+    for (int i = 0; i < N; i += block_size) {
         // note that not we store all ii and jj in the block, but must use only half of them
         int j = i;
-        for (int jj = j; jj < j+block_size and jj < N; jj++) {
-            jtasks[task].push_back(jj);
-        }
-        for (int ii = i; ii < i+block_size and ii < N; ii++) {
-            itasks[task].push_back(ii);
-        }
+        for (int jj = j; jj < j + block_size and jj < N; jj++) { jtasks[task].push_back(jj); }
+        for (int ii = i; ii < i + block_size and ii < N; ii++) { itasks[task].push_back(ii); }
         task++;
     }
     ntasks = task;
@@ -357,41 +352,42 @@
     int foundcount = 0;
     int count = 0;
     ComplexVector coef_vec(N);
-    for (int i = 0; i<N; i++) coef_vec(i) = 1.0;
+    for (int i = 0; i < N; i++) coef_vec(i) = 1.0;
     // one task is for one block with block_size x block_size exchange integrals (except diagonal block which has less)
     // Each integral provides two exchange contributions: iij and jij
     // The contributions are summed up within block, and ready results from other blocks are added sometimes.
-    while(true) { // fetch new tasks until all are completed
+    while (true) { // fetch new tasks until all are completed
         int it;
-   t_task.resume();
+        t_task.resume();
         mpi::orb_bank.get_task(&it);
-   t_task.stop();
-        if(it<0)break;
+        t_task.stop();
+        if (it < 0) break;
         count++;
         QMFunctionVector ifunc_vec;
         OrbitalVector iorb_vec;
-        int nodesize=0;
-   // we fetch all required i (but only one j at a time)
+        int nodesize = 0;
+        // we fetch all required i (but only one j at a time)
         for (int i = 0; i < itasks[it].size(); i++) {
             int iorb = itasks[it][i];
             Orbital phi_i;
-       t_bankr.resume();
+            t_bankr.resume();
             mpi::orb_bank.get_orb_n(iorb, phi_i, 0);
-       t_bankr.stop();
+            t_bankr.stop();
             iorb_vec.push_back(phi_i);
-            nodesize+=phi_i.getSizeNodes(NUMBER::Total);
+            nodesize += phi_i.getSizeNodes(NUMBER::Total);
         }
-        //        if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" fetched total " <<nodesize/1024  <<" MB for "<<itasks[it].size()<<" orbitals "<<std::endl;
-   std::vector<QMFunctionVector> jijfunc_vec(N);
+        //        if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" fetched total " <<nodesize/1024  <<" MB for
+        //        "<<itasks[it].size()<<" orbitals "<<std::endl;
+        std::vector<QMFunctionVector> jijfunc_vec(N);
         for (int j = 0; j < jtasks[it].size(); j++) {
             int jorb = jtasks[it][j];
             Orbital phi_j;
-       t_bankr.resume();
+            t_bankr.resume();
             mpi::orb_bank.get_orb_n(jorb, phi_j, 0);
-       t_bankr.stop();
-       QMFunctionVector iijfunc_vec;
+            t_bankr.stop();
+            QMFunctionVector iijfunc_vec;
             for (int i = 0; i < iorb_vec.size(); i++) {
-                if(itasks[it][i] <= jorb) continue; //only j<i is computed
+                if (itasks[it][i] <= jorb) continue; // only j<i is computed
                 int iorb = itasks[it][i];
                 Orbital &phi_i = iorb_vec[i];
                 Orbital phi_jij;
@@ -402,16 +398,16 @@
                 t_int.resume();
                 calc_i_Int_jk_P(precf, phi_i, phi_j, phi_i, phi_iij, &phi_jij);
                 t_int.stop();
-                if(phi_iij.norm() > prec) iijfunc_vec.push_back(phi_iij);
-                if(phi_jij.norm() > prec) jijfunc_vec[iorb].push_back(phi_jij);
+                if (phi_iij.norm() > prec) iijfunc_vec.push_back(phi_iij);
+                if (phi_jij.norm() > prec) jijfunc_vec[iorb].push_back(phi_jij);

-                if((it/iblocks) % 4 == 3) { // every x column only
+                if ((it / iblocks) % 4 == 3) { // every x column only
                     // include also results which may be ready
                     t_task.resume();
                     // include all Ex[jorb] contributions already stored
                     std::vector<int> jvec = mpi::orb_bank.get_readytasks(jorb, 0);
                     t_task.stop();
-                    if(jvec.size() > 3) {
+                    if (jvec.size() > 3) {
                         for (int id : jvec) {
                             Orbital phi_iij_extra;
                             t_bankr.resume();
@@ -419,119 +415,132 @@
                             t_bankr.stop();
                             if (ok) {
                                 t_task.resume();
-                                mpi::orb_bank.del_readytask(id, jorb); // tell the task manager that this task should not be used anymore
+                                mpi::orb_bank.del_readytask(
+                                    id, jorb); // tell the task manager that this task should not be used anymore
                                 t_task.stop();
-               iijfunc_vec.push_back(phi_iij_extra);
-               foundcount++;
+                                iijfunc_vec.push_back(phi_iij_extra);
+                                foundcount++;
                             }
-                            if(iijfunc_vec.size() > 1.5 * block_size ) break; // we do not want too long vectors
+                            if (iijfunc_vec.size() > 1.5 * block_size) break; // we do not want too long vectors
                         }
                     }
                 }
             }
-       // sum all iij contributions: (fixed j sum over i)
-       auto tmp_iij = phi_j.paramCopy();
+            // sum all iij contributions: (fixed j sum over i)
+            auto tmp_iij = phi_j.paramCopy();
             t_add.resume();
             qmfunction::linear_combination(tmp_iij, coef_vec, iijfunc_vec, prec);
             tmp_iij.crop(prec);
             t_add.stop();
             if (tmp_iij.norm() > prec) {
-           timerS.resume();
-                int iorb = itasks[it][0]; // any iorb would do
-                int idiij = N+jorb+N*iorb; // N first indices are reserved for original orbitals;
+                timerS.resume();
+                int iorb = itasks[it][0];        // any iorb would do
+                int idiij = N + jorb + N * iorb; // N first indices are reserved for original orbitals;
                 mpi::orb_bank.put_orb_n(idiij, tmp_iij, 1);
-       timerS.stop();
-       t_task.resume();
-       mpi::orb_bank.put_readytask(idiij, jorb);
-       t_task.stop();
+                timerS.stop();
+                t_task.resume();
+                mpi::orb_bank.put_readytask(idiij, jorb);
+                t_task.stop();
             }
             tmp_iij.free(NUMBER::Total);
-            for (int jj = 0; jj<iijfunc_vec.size(); jj++) iijfunc_vec[jj].free(NUMBER::Total);
+            for (int jj = 0; jj < iijfunc_vec.size(); jj++) iijfunc_vec[jj].free(NUMBER::Total);
         }

-   // sum all contributions jij (fixed i sum over j)
-   for (int i = 0; i < iorb_vec.size(); i++) {
+        // sum all contributions jij (fixed i sum over j)
+        for (int i = 0; i < iorb_vec.size(); i++) {
             // include all Ex[iorb] contributions already stored
-       t_task.resume();
+            t_task.resume();
             int iorb = itasks[it][i];
-       std::vector<int> jvec = mpi::orb_bank.get_readytasks(iorb, 0);
-       t_task.stop();
-       if(jvec.size() > 3) {
-           for (int id : jvec) {
-           Orbital phi_jij_extra;
-           t_bankr.resume();
-           int ok = mpi::orb_bank.get_orb_n(id, phi_jij_extra, 1);
-           t_bankr.stop();
-           if (ok) {
-               t_task.resume();
-           mpi::orb_bank.del_readytask(id, iorb); // tell the task manager that this task should not be used anymore
-           t_task.stop();
-           jijfunc_vec[iorb].push_back(phi_jij_extra);
-           foundcount++;
-           }
-           if(jijfunc_vec[iorb].size() > 1.5 * block_size ) break; // we do not want too long vectors
-       }
-       }
+            std::vector<int> jvec = mpi::orb_bank.get_readytasks(iorb, 0);
+            t_task.stop();
+            if (jvec.size() > 3) {
+                for (int id : jvec) {
+                    Orbital phi_jij_extra;
+                    t_bankr.resume();
+                    int ok = mpi::orb_bank.get_orb_n(id, phi_jij_extra, 1);
+                    t_bankr.stop();
+                    if (ok) {
+                        t_task.resume();
+                        mpi::orb_bank.del_readytask(
+                            id, iorb); // tell the task manager that this task should not be used anymore
+                        t_task.stop();
+                        jijfunc_vec[iorb].push_back(phi_jij_extra);
+                        foundcount++;
+                    }
+                    if (jijfunc_vec[iorb].size() > 1.5 * block_size) break; // we do not want too long vectors
+                }
+            }
             Orbital &phi_i = iorb_vec[i];
-       auto tmp_jij = phi_i.paramCopy();
-       t_add.resume();
-       qmfunction::linear_combination(tmp_jij, coef_vec, jijfunc_vec[iorb], prec);
-       tmp_jij.crop(prec);
-       t_add.stop();
-       if (tmp_jij.norm() > prec) {
-           timerS.resume();
-                int jorb = jtasks[it][0];//any jorb from task would do
-                //The diagonal blocks can have (iorb,jtasks[it][0])=(itasks[it][0],jorb), therefore N*N to distinguish them
-                int idjij = N+iorb+N*jorb+N*N; // N first indices are reserved for original orbitals;
-       mpi::orb_bank.put_orb_n(idjij, tmp_jij, 1);
-       timerS.stop();
-       t_task.resume();
-       mpi::orb_bank.put_readytask(idjij, iorb);
-       t_task.stop();
-       }
-       tmp_jij.free(NUMBER::Total);
-       for (int jj = 0; jj<jijfunc_vec[iorb].size(); jj++) jijfunc_vec[iorb][jj].free(NUMBER::Total);
-   }
+            auto tmp_jij = phi_i.paramCopy();
+            t_add.resume();
+            qmfunction::linear_combination(tmp_jij, coef_vec, jijfunc_vec[iorb], prec);
+            tmp_jij.crop(prec);
+            t_add.stop();
+            if (tmp_jij.norm() > prec) {
+                timerS.resume();
+                int jorb = jtasks[it][0]; // any jorb from task would do
+                // The diagonal blocks can have (iorb,jtasks[it][0])=(itasks[it][0],jorb), therefore N*N to distinguish
+                // them
+                int idjij = N + iorb + N * jorb + N * N; // N first indices are reserved for original orbitals;
+                mpi::orb_bank.put_orb_n(idjij, tmp_jij, 1);
+                timerS.stop();
+                t_task.resume();
+                mpi::orb_bank.put_readytask(idjij, iorb);
+                t_task.stop();
+            }
+            tmp_jij.free(NUMBER::Total);
+            for (int jj = 0; jj < jijfunc_vec[iorb].size(); jj++) jijfunc_vec[iorb][jj].free(NUMBER::Total);
+        }
     }
-    println(2, " Time exchanges compute " << (int)((float)timer.elapsed() * 1000) << " ms. "<<" Treated "<<count<<" tasks"<<" blocksize "<<block_size);
+    println(2,
+            " Time exchanges compute " << (int)((float)timer.elapsed() * 1000) << " ms. "
+                                       << " Treated " << count << " tasks"
+                                       << " blocksize " << block_size);
     mpi::barrier(mpi::comm_orb);
     println(2, " Time exchanges compute all " << (int)((float)timer.elapsed() * 1000) << " ms ");
     // sum all contributions that may not yet be summed, and put them into Ex[j]
-    for (int j = 0; j < N ; j++) {  //
+    for (int j = 0; j < N; j++) {              //
         if (not mpi::my_orb(Phi[j])) continue; // fetch only own j
-   t_task.resume();
-   // include all Ex[jorb] contributions already stored
-   std::vector<int> jvec = mpi::orb_bank.get_readytasks(j, 0);
+        t_task.resume();
+        // include all Ex[jorb] contributions already stored
+        std::vector<int> jvec = mpi::orb_bank.get_readytasks(j, 0);
         QMFunctionVector iijfunc_vec;
-   t_task.stop();
-   for (int id : jvec) {
-       Orbital phi_iij;
-       t_bankr.resume();
-       int ok = mpi::orb_bank.get_orb_n(id, phi_iij, 1);
-       t_bankr.stop();
-       if (ok) {
-           t_task.resume();
-       mpi::orb_bank.del_readytask(id, j); // tell the task manager that this task should not be used anymore
-       t_task.stop();
-           iijfunc_vec.push_back(phi_iij);
-       foundcount++;
-       } else {
-                std::cout<<mpi::orb_rank<<"did not find exchange contributions for  " << j<<" expected "<<jvec.size()<<" id "<<id<<std::endl;
+        t_task.stop();
+        for (int id : jvec) {
+            Orbital phi_iij;
+            t_bankr.resume();
+            int ok = mpi::orb_bank.get_orb_n(id, phi_iij, 1);
+            t_bankr.stop();
+            if (ok) {
+                t_task.resume();
+                mpi::orb_bank.del_readytask(id, j); // tell the task manager that this task should not be used anymore
+                t_task.stop();
+                iijfunc_vec.push_back(phi_iij);
+                foundcount++;
+            } else {
+                std::cout << mpi::orb_rank << "did not find exchange contributions for  " << j << " expected "
+                          << jvec.size() << " id " << id << std::endl;
                 MSG_ABORT("did not find contribution for my exchange");
             }
-   }
-   // sum all found contributions:
-   t_add.resume();
+        }
+        // sum all found contributions:
+        t_add.resume();
         auto tmp_j = Ex[j].paramCopy();
-   qmfunction::linear_combination(tmp_j, coef_vec, iijfunc_vec, prec);
+        qmfunction::linear_combination(tmp_j, coef_vec, iijfunc_vec, prec);
         Ex[j].add(1.0, tmp_j);
-   Ex[j].crop(prec);
-   t_add.stop();
-   for (int jj = 0; jj<iijfunc_vec.size(); jj++) iijfunc_vec[jj].free(NUMBER::Total);
-   }
-    println(2, " fetched in total " << foundcount << " Exchange contributions from bank.  Total time "<<(int)((float)timer.elapsed() * 1000) << " ms ");
-
-    println(2, " Time integrate " << (int)((float)t_int.elapsed() * 1000) << " ms. Time write " << (int)((float)timerS.elapsed() * 1000) << " ms. Time read "<< (int)((float)t_bankr.elapsed() * 1000) << " ms. Time add "<<(int)((float)t_add.elapsed() * 1000));
+        Ex[j].crop(prec);
+        t_add.stop();
+        for (int jj = 0; jj < iijfunc_vec.size(); jj++) iijfunc_vec[jj].free(NUMBER::Total);
+    }
+    println(2,
+            " fetched in total " << foundcount << " Exchange contributions from bank.  Total time "
+                                 << (int)((float)timer.elapsed() * 1000) << " ms ");
+
+    println(2,
+            " Time integrate " << (int)((float)t_int.elapsed() * 1000) << " ms. Time write "
+                               << (int)((float)timerS.elapsed() * 1000) << " ms. Time read "
+                               << (int)((float)t_bankr.elapsed() * 1000) << " ms. Time add "
+                               << (int)((float)t_add.elapsed() * 1000));

     mpi::orb_bank.clear_all(mpi::orb_rank, mpi::comm_orb);

src/scf_solver/GroundStateSolver.cpp

--- src/scf_solver/GroundStateSolver.cpp
+++ src/scf_solver/GroundStateSolver.cpp
@@ -308,19 +308,22 @@
         // Rotate orbitals
         if (needLocalization(nIter, converged)) {
             // brutal "smoothing" of the orbitals
-     //            for (auto j :Phi_n) if(mpi::my_orb(j) and mpi::orb_rank<2)std::cout<<" size before crop "<<j.getSizeNodes(NUMBER::Total)<<std::endl;
-     // for (auto j :Phi_n) if(nIter<20 and mpi::my_orb(j))j.crop(orb_prec*100);
-     // for (auto j :Phi_n) if(mpi::my_orb(j) and mpi::orb_rank<2)std::cout<<" size after crop "<<j.getSizeNodes(NUMBER::Total)<<std::endl;
+            //            for (auto j :Phi_n) if(mpi::my_orb(j) and mpi::orb_rank<2)std::cout<<" size before crop
+            //            "<<j.getSizeNodes(NUMBER::Total)<<std::endl;
+            // for (auto j :Phi_n) if(nIter<20 and mpi::my_orb(j))j.crop(orb_prec*100);
+            // for (auto j :Phi_n) if(mpi::my_orb(j) and mpi::orb_rank<2)std::cout<<" size after crop
+            // "<<j.getSizeNodes(NUMBER::Total)<<std::endl;
             ComplexMatrix U_mat = orbital::localize(orb_prec, Phi_n, F_mat);
             F.rotate(U_mat);
-            //            for (auto j :Phi_n) if(mpi::my_orb(j) and mpi::orb_rank<2)std::cout<<" size after localization "<<j.getSizeNodes(NUMBER::Total)<<std::endl;
+            //            for (auto j :Phi_n) if(mpi::my_orb(j) and mpi::orb_rank<2)std::cout<<" size after localization
+            //            "<<j.getSizeNodes(NUMBER::Total)<<std::endl;

-            //kain.rotate(U_mat, false);
+            // kain.rotate(U_mat, false);
             kain.clear();
         } else if (needDiagonalization(nIter, converged)) {
             ComplexMatrix U_mat = orbital::diagonalize(orb_prec, Phi_n, F_mat);
             F.rotate(U_mat);
-            //kain.rotate(U_mat, false);
+            // kain.rotate(U_mat, false);
             kain.clear();
         }

src/scf_solver/KAIN.cpp

--- src/scf_solver/KAIN.cpp
+++ src/scf_solver/KAIN.cpp
@@ -175,7 +175,7 @@

                 auto partStep = phi_m.paramCopy();
                 qmfunction::linear_combination(partStep, partCoefs, partOrbs, prec);
-                partStep.crop(prec*partStep.norm());
+                partStep.crop(prec * partStep.norm());
                 auto c_j = this->c[m](j);
                 totCoefs.push_back(c_j);
                 totOrbs.push_back(partStep);
@@ -187,7 +187,7 @@

             dPhi[n] = Phi[n].paramCopy();
             qmfunction::linear_combination(dPhi[n], coefsVec, totOrbs, prec);
-            dPhi[n].crop(prec*dPhi[n].norm());
+            dPhi[n].crop(prec * dPhi[n].norm());
         }
     }

src/utils/NonlinearMaximizer.cpp

--- src/utils/NonlinearMaximizer.cpp
+++ src/utils/NonlinearMaximizer.cpp
@@ -44,7 +44,7 @@
  */
 // We consider only the diagonal of the Hessian, until we are close to the minimum
 int NonlinearMaximizer::maximize() {
-    Timer t_tot,t_hess,t_step;
+    Timer t_tot, t_hess, t_step;
     t_hess.stop();
     t_step.stop();
     int i, j, k, l, iter, dcount;
@@ -81,7 +81,7 @@

     // Start of iterations
     for (iter = 1; iter < maxIter + 1 && !converged; iter++) {
-        int lastICG=0;
+        int lastICG = 0;
         if (print > 5 and mpi::orb_rank == 0) cout << " iteration  " << iter << endl;
         dcount = 0;
         for (i = 0; i < N2h; i++) { eiVal(i) = this->get_hessian(i, i); }
@@ -192,7 +192,7 @@
                         }
                         if (mpi::orb_rank == 0) cout << iCG << " error this iteration " << std::sqrt(ee) << endl;
                     }
-                    lastICG=iCG;
+                    lastICG = iCG;
                 }
                 step_norm2 = step.transpose() * step;
                 first_order = this->gradient.transpose() * step;
@@ -306,7 +306,13 @@
         }
         if (print == 2 && !wrongstep) { cout << setw(10) << dcount; }
         if (print == 2) cout << endl;
-        if(mpi::orb_rank==0)std::cout<<"iteration "<<iter<<" Newton "<<newton_step_exact<<" "<<lastICG<<" Time sofar " << (int)((float)t_tot.elapsed() * 1000) << " ms "<<" Time hessian " <<(int)((float)t_hess.elapsed() * 1000) << " Time dostep " <<(int)((float)t_step.elapsed() * 1000) << " gradient norm " << gradient_norm<< "trust radius set  to " << h << " test: " << relative_change << " mu: " << mu << " maxeival: " << maxEiVal <<std::endl;
+        if (mpi::orb_rank == 0)
+            std::cout << "iteration " << iter << " Newton " << newton_step_exact << " " << lastICG << " Time sofar "
+                      << (int)((float)t_tot.elapsed() * 1000) << " ms "
+                      << " Time hessian " << (int)((float)t_hess.elapsed() * 1000) << " Time dostep "
+                      << (int)((float)t_step.elapsed() * 1000) << " gradient norm " << gradient_norm
+                      << "trust radius set  to " << h << " test: " << relative_change << " mu: " << mu
+                      << " maxeival: " << maxEiVal << std::endl;
     } // iterations

     if (print > 15 and mpi::orb_rank == 0) {

src/utils/RRMaximizer.cpp

--- src/utils/RRMaximizer.cpp
+++ src/utils/RRMaximizer.cpp
@@ -23,8 +23,8 @@
  * <https://mrchem.readthedocs.io/>
  */

-#include <unsupported/Eigen/MatrixFunctions> // faster exponential of matrices
 #include "MRCPP/Printer"
+#include <unsupported/Eigen/MatrixFunctions> // faster exponential of matrices

 #include "utils/RRMaximizer.h"
 #include "utils/math_utils.h"
@@ -83,11 +83,11 @@
     r_y.clear();
     r_z.clear();

-    if(mpi::orb_rank==0)std::cout<<" start lowdin "<<std::endl;
+    if (mpi::orb_rank == 0) std::cout << " start lowdin " << std::endl;
     // rotate R matrices into orthonormal basis
     ComplexMatrix S_m12 = orbital::calc_lowdin_matrix(Phi);

-    if(mpi::orb_rank==0)std::cout<<" start U "<<std::endl;
+    if (mpi::orb_rank == 0) std::cout << " start U " << std::endl;
     this->total_U = S_m12.real() * this->total_U;

     DoubleMatrix R(this->N, this->N);
@@ -106,8 +106,8 @@
             }
         }
     }
-    if(mpi::orb_rank==0)std::cout<<mpi::orb_rank<<" rr finished "<<std::endl;;
-
+    if (mpi::orb_rank == 0) std::cout << mpi::orb_rank << " rr finished " << std::endl;
+    ;
 }

 /** compute the value of
@@ -309,7 +309,7 @@

     // calculate U=exp(-A) by diagonalization and U=Vexp(id)Vt with VdVt=iA
     // could also sum the term in the expansion
-    //this->total_U *= math_utils::skew_matrix_exp(A);
+    // this->total_U *= math_utils::skew_matrix_exp(A);
     this->total_U *= A.exp().transpose(); // transpose because we want exp(-A)

     // rotate the original r matrix with total U

Generated by :no_entry_sign: Danger

MRChemSoft / mrchem

Tasks #318

Code Style Check

src/parallel.cpp

src/parallel.h

src/qmfunctions/QMFunction.cpp

src/qmfunctions/orbital_utils.cpp

src/qmoperators/two_electron/CoulombPotential.cpp

src/qmoperators/two_electron/ExchangePotentialD1.cpp

src/scf_solver/GroundStateSolver.cpp

src/scf_solver/KAIN.cpp

src/utils/NonlinearMaximizer.cpp

src/utils/RRMaximizer.cpp