Closed ax3l closed 8 years ago
@psychocoderHPC for a quick fix, this is @BeyondEspresso 's diff so far: https://github.com/BeyondEspresso/picongpu/compare/bb1f0f98118605cd9fc6f586b0782c8ff6aa51df...BeyondEspresso:8909c081f0bcc9d0dda429fb1ce9adb146e8f524
diff --git a/src/libPMacc/include/algorithms/math/defines/floatingPoint.hpp b/src/libPMacc/include/algorithms/math/defines/floatingPoint.hpp
index 1a7813a..565420a 100644
--- a/src/libPMacc/include/algorithms/math/defines/floatingPoint.hpp
+++ b/src/libPMacc/include/algorithms/math/defines/floatingPoint.hpp
@@ -36,6 +36,8 @@ struct Floor;
template<typename Type>
struct Float2int_rd;
+template<typename Type>
+struct Fmod;
template<typename T1>
HDINLINE typename Floor< T1>::result floor(T1 value)
@@ -49,6 +51,12 @@ HDINLINE typename Float2int_rd< T1>::result float2int_rd(T1 value)
return Float2int_rd< T1 > ()(value);
}
+template<typename T1>
+HDINLINE typename Fmod< T1>::result fmod(T1 x,T1 y)
+{
+ return Fmod< T1 > ()(x,y);
+}
+
} //namespace math
} //namespace algorithms
}//namespace PMacc
diff --git a/src/libPMacc/include/algorithms/math/doubleMath/floatingPoint.tpp b/src/libPMacc/include/algorithms/math/doubleMath/floatingPoint.tpp
index 666c19e..864f4be 100644
--- a/src/libPMacc/include/algorithms/math/doubleMath/floatingPoint.tpp
+++ b/src/libPMacc/include/algorithms/math/doubleMath/floatingPoint.tpp
@@ -59,6 +59,16 @@ struct Float2int_rd<double>
}
};
+template<>
+struct Fmod<double>
+{
+ typedef double result;
+
+ HDINLINE result operator( )(result x, result y)
+ {
+ return ::fmod( x , y );
+ }
+};
} //namespace math
} //namespace algorithms
diff --git a/src/picongpu/include/simulationControl/MovingWindow.hpp b/src/picongpu/include/simulationControl/MovingWindow.hpp
index 4902f28..ec65a73 100644
--- a/src/picongpu/include/simulationControl/MovingWindow.hpp
+++ b/src/picongpu/include/simulationControl/MovingWindow.hpp
@@ -1,5 +1,5 @@
/**
- * Copyright 2013-2014 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt
+ * Copyright 2013-2016 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, Alexander Debus
*
* This file is part of PIConGPU.
*
@@ -63,20 +63,22 @@ class MovingWindow
/* later used to calculate smoother offsets */
double stepsInFutureAfterComma = stepsInFuture_tmp - (double) stepsInFuture;
- /* round to nearest step so we get smaller sliding dfference
+ /* round to nearest step so we get smaller sliding difference
* this is valid if we activate sliding window because y direction has
* the same size for all gpus
*/
- const uint32_t stepsPerGPU = (uint32_t) math::floor(
- (double) (subGrid.getLocalDomain().size.y() * cell_height) / light_way_per_step + 0.5);
- const uint32_t firstSlideStep = stepsPerGPU * devices_y - stepsInFuture;
- const uint32_t firstMoveStep = stepsPerGPU * (devices_y - 1) - stepsInFuture;
+ const double stepsPerGPU = (double) (subGrid.getLocalDomain().size.y() * cell_height) / light_way_per_step;
+ const uint32_t firstSlideStep = (uint32_t) math::floor( stepsPerGPU * (double) devices_y
+ - (double) stepsInFuture + 0.5 );
+ const uint32_t firstMoveStep = (uint32_t) math::floor( stepsPerGPU * (double) (devices_y - 1)
+ - (double) stepsInFuture + 0.5 );
if (slidingWindowActive==true && firstMoveStep <= currentStep)
{
- const uint32_t stepsInLastGPU = (currentStep + stepsInFuture) % stepsPerGPU;
+ const double stepsInLastGPU = math::fmod( (double) currentStep + (double) stepsInFuture,
+ stepsPerGPU );
/* moving window start */
- if (firstSlideStep <= currentStep && stepsInLastGPU == 0)
+ if (firstSlideStep <= currentStep && stepsInLastGPU < 1.0 )
{
incrementSlideCounter(currentStep);
if (doSlide)
@@ -86,8 +88,8 @@ class MovingWindow
/* round to nearest cell to have smoother offset jumps */
if (offsetFirstGPU)
{
- *offsetFirstGPU = math::floor(((double) stepsInLastGPU + stepsInFutureAfterComma) *
- light_way_per_step / cell_height + 0.5);
+ *offsetFirstGPU = math::floor( (stepsInLastGPU + stepsInFutureAfterComma) *
+ light_way_per_step / cell_height + 0.5 );
}
}
}
closed with #1337 (l33t!!111elf)
@BeyondEspresso discovered that the moving window implementation is broken in terms of correctness of speed. Currently, it moves a few percent too slow due to rounding errors.
To fix this reasonably, the implementation in
simulationControl/MovingWindow.hpp
needs a full refactoring, is highly obfuscated, very old and needs doxygen documentation (we knew that at least). (In contrast, the wiki documents all its non-code aspects very well.)Code Analysis
It's very old, pre-git code and we are not surprised that there will be dragons :)
PIConGPU Code Affected
MovingWindow
members:getCurrentSlideInfo
getSlideCounter
,slideInCurrentStep
,getWindow
(the last one is the only one that will actually update the offset)getWindow
is the only one that will actually update the offset members inMovingWindow
, but is not triggered when actually doing the slide insimulationControl/MySimulation.hpp
leaving the offsets partly outdated for some time.PMacc code
MySimulation
calls thenGridController::slide(...)
which then implements the offset calculation again on it's own in PMaccmappings/simulation/GridController::updateLocalDomainOffset
that finally also updates theSubGrid
. Also, it triggerscommunication/CommunicatorMPI::slide
... . Finally, as a special feature, theGridController
andCommunicatorMPI
each implement asetStateAfterSlides
that can be used to restart a simulation and "emulate" the number of slides it did before (forSubGrid
andCommunicatorMPI
) and return true if a slide has happened for this MPI-rank.Again,
slide/setStateAfterSlides
calls will perform a slide as soon as called and are collective but return only true if one was one of the GPUs that was moved from back to front.Last word on the
CommunicatorMPI
: even if documented called in-code precisely, it updates only the position in the 3D grid of MPI ranks (no manipulation of cell domain information or offsets). At time of slide the exchange partners are updated. An interesting and fishy member isyoffset
which is together withcoords
defining a virtual layer for enumerating the MPI-rank positions in the domain decomposition since theMPI_Cart_create
operation is expensive and only done once at the beginning of the sim. One wants a continuously, monotonically increasing numbered space of GPU positions after slides. Its memberupdateCoordinates
might still need a good review but is unaffected as long as slides continue to happen iny
.Proposed Refactoring
[WIP!]
"Slide" describes the moving of a whole plane of GPUs from the back of the simulation box to the front.
Algorithm (PIConGPU Code)
MovingWindow
should first separate the raw algorithm on moving window offset calculation and the manipulation of members.Raw members for algorithm:
setStartAfterStep(uint32_t)
: start in steps; step after that the moving window starts to move (time offset for the moving starts, precision: one time step)setSpeed(float64_t)
: currentlyc
int64_t getCurrentMovedCells( float32_t cellSizeInDir, uint32_t currentStep)
: current number of cells moved which is a scalar, not a 3D position, which makes the direction irrelevantThe
MovingWindow
's memberwindow
should be a separate singleton asSubGrid
, manipulated with the results from the previous class.Slide Detection (PIConGPU Code)
To detect if a slide occurs after the current step (which is the connection to PMacc that controls MPI & reinit after sliding) one can simply calculate:
Offset and Window Size Updates
SubGrid
is already there for local domain size and offset remembering and can be manipulated by all members.Window
is regularly updated every time step andSubGrid
only on slides.PIConGPU now decides on a
slideDirection
.PIConGPU (MySimulation) code logic at the beginning of a step:
isSlideAfterStep(t-1)
isSliding =
GPU position in Communicator...(slideDirection) is back of box in that slideDirectionisSlideAfterStep
: slide GC/CommunicatorSubGrid
(assume cell in y equal for all GPUs):SubGrid.globalDomain.offset(slideDirection) += SubGrid.localDomain.size
SubGrid.localDomain.offset(slideDirection) = globalWindow.offset(slideDirection) + globalWindow.size(slideDirection) - SubGrid.localDomain.size
moveCells = MovingWindow::getCurrentMovedCells
Window
:Window
member)globalWindow.offset = MovingWindow::getCurrentMovedCells - SubGrid.globalDomain.offset
localWindow.offset = SubGrid.localDomain.offset - globalWindow.offset
< 0 ?
(means back of simulation box):localWindow.size -= moveCells
[local&global]Window.offset += moveCells
localWindow.size <= 0
: (orisSliding == true
):localWindow.size += SubGrid.localDomain.size
localWindow.offset = 0