Closed kostrzewa closed 2 years ago
I'm able to reproduce this in a test run. Running with tmLQCD commit f1d6e374d3b47ee857a23cdc96be6341c9c3d78b and QUDA (develop) commit 1a6ec1821ea621cd4c003cefb6fc88a31d7544ee.
Using the following input file:
NrXProcs = 1
NrYProcs = 1
NrZProcs = 1
ompnumthreads = 3
L=16
T=32
Measurements = 9999
# StartCondition = hot
StartCondition = continue
InitialStoreCounter = readin
2KappaMu = 0.0005602580000000
2KappaMuBar = 0.0394421632
2KappaEpsBar = 0.0426076209
CSW = 1.74
kappa = 0.1400645
NSave = 500000
ThetaT = 1.0
UseEvenOdd = yes
ReversibilityCheck = no
ReversibilityCheckIntervall = 100
DebugLevel = 2
#StrictResidualCheck = yes
UseRelativePrecision = yes
BeginExternalInverter QUDA
Pipeline = 24
gcrNkrylov = 24
MGCoarseMuFactor = 1.0, 1.0, 40.0
MGNumberOfLevels = 3
MGNumberOfVectors = 24, 32
MGSetupSolver = cg
MGSetup2KappaMu = 0.0005602580000000
MGVerbosity = silent, silent, silent
MGSetupSolverTolerance = 5e-7, 5e-7
MGSetupMaxSolverIterations = 1500, 1500
MGCoarseSolverType = gcr, gcr, cagcr
MgCoarseSolverTolerance = 0.1, 0.1, 0.1
MGCoarseMaxSolverIterations = 25, 25, 25
MGSmootherType = cagcr, cagcr, cagcr
MGSmootherTolerance = 0.2, 0.2, 0.2
MGSmootherPreIterations = 0, 0, 0
MGSmootherPostIterations = 4, 4, 4
MGBlockSizesX = 4,2
MGBlockSizesY = 4,2
MGBlockSizesZ = 4,2
MGBlockSizesT = 4,2
MGOverUnderRelaxationFactor = 0.90, 0.90, 0.90
MGResetSetupMDUThreshold = 1.0
MGRefreshSetupMDUThreshold = 0.083
MGRefreshSetupMaxSolverIterations = 25, 25
EnableDeviceMemoryPool = no
EnablePinnedMemoryPool = no
EndExternalInverter
BeginMeasurement CORRELATORS
Frequency = 1
EndMeasurement
BeginOperator CLOVER
kappa = 0.1400645
2KappaMu = 0.0005602580000000
CSW = 1.74
UseEvenOdd = yes
SolverPrecision = 1e-18
MaxSolverIterations = 50000
UseExternalInverter = QUDA
Solver = cg
usesloppyprecision = half
EndOperator
BeginMonomial GAUGE
Type = Iwasaki
beta = 1.726
Timescale = 0
UseExternalLibrary = quda
EndMonomial
BeginMonomial CLOVERDET
Timescale = 1
kappa = 0.1400645
2KappaMu = 0.0005602580000000
CSW = 1.74
rho = 0.12
MaxSolverIterations = 1000
AcceptancePrecision = 1.e-21
ForcePrecision = 1.e-16
Name = cloverdetlight
solver = cg
useexternalinverter = quda
usesloppyprecision = half
EndMonomial
BeginMonomial CLOVERDETRATIO
Timescale = 2
kappa = 0.1400645
2KappaMu = 0.0005602580000000
rho = 0.01
rho2 = 0.12
CSW = 1.74
MaxSolverIterations = 10000
AcceptancePrecision = 1.e-21
ForcePrecision = 1.e-18
Name = cloverdetratio1light
solver = mg
useexternalinverter = quda
usesloppyprecision = single
EndMonomial
BeginMonomial CLOVERDETRATIO
Timescale = 3
kappa = 0.1400645
2KappaMu = 0.0005602580000000
rho = 0.005
rho2 = 0.01
CSW = 1.74
MaxSolverIterations = 50000
AcceptancePrecision = 1.e-21
ForcePrecision = 1.e-18
Name = cloverdetratio2light
solver = mg
useexternalinverter = quda
usesloppyprecision = single
EndMonomial
BeginMonomial CLOVERDETRATIO
Timescale = 4
kappa = 0.1400645
2KappaMu = 0.0005602580000000
rho = 0.0
rho2 = 0.005
CSW = 1.74
MaxSolverIterations = 50000
AcceptancePrecision = 1.e-21
ForcePrecision = 1.e-18
Name = cloverdetratio3light
solver = mg
useexternalinverter = quda
usesloppyprecision = single
EndMonomial
BeginMonomial NDCLOVERRAT
Timescale = 2
kappa = 0.1400645
CSW = 1.74
AcceptancePrecision = 1e-21
ForcePrecision = 1e-16
StildeMin = 0.0000376
StildeMax = 4.7
MaxSolverIterations = 500
Name = ndcloverrat_0_3
DegreeOfRational = 10
Cmin = 0
Cmax = 3
ComputeEVFreq = 0
2Kappamubar = 0.0394421632
2Kappaepsbar = 0.0426076209
AddTrLog = yes
UseExternalInverter = quda
UseSloppyPrecision = single
RefinementPrecision = half
solver = cgmmsnd
EndMonomial
BeginMonomial NDCLOVERRAT
Timescale = 3
kappa = 0.1400645
CSW = 1.74
MaxSolverIterations = 1000
AcceptancePrecision = 1e-21
ForcePrecision = 1e-16
# lambda_min = 8e-6 (min evals go as low as 1.5e-5), maximal evals are found as high as 0.85 and fluctuate strongly
StildeMin = 0.0000376
StildeMax = 4.7
Name = ndcloverrat_4_6
DegreeOfRational = 10
Cmin = 4
Cmax = 6
ComputeEVFreq = 0
2Kappamubar = 0.0394421632
2Kappaepsbar = 0.0426076209
AddTrLog = no
useexternalinverter = quda
usesloppyprecision = single
refinementprecision = half
solver = cgmmsnd
EndMonomial
BeginMonomial NDCLOVERRAT
Timescale = 4
kappa = 0.1400645
CSW = 1.74
AcceptancePrecision = 1e-21
ForcePrecision = 1e-16
MaxSolverIterations = 5000
StildeMin = 0.0000376
StildeMax = 4.7
Name = ndcloverrat_7_9
DegreeOfRational = 10
Cmin = 7
Cmax = 9
ComputeEVFreq = 0
2Kappamubar = 0.0394421632
2Kappaepsbar = 0.0426076209
AddTrLog = no
useexternalinverter = quda
usesloppyprecision = single
refinementprecision = half
solver = cgmmsnd
EndMonomial
BeginMonomial NDCLOVERRATCOR
Timescale = 1
kappa = 0.1400645
CSW = 1.74
AcceptancePrecision = 1e-20
ForcePrecision = 1e-16
MaxSolverIterations = 5000
StildeMin = 0.0000376
StildeMax = 4.7
Name = ndcloverratcor
DegreeOfRational = 10
ComputeEVFreq = 0
2Kappamubar = 0.0394421632
2Kappaepsbar = 0.0426076209
useexternalinverter = quda
usesloppyprecision = double
solver = cgmmsnd
EndMonomial
BeginIntegrator
Type0 = 2MN #FG
Type1 = 2MN #FG
Type2 = 2MN #FG
Type3 = 2MN #FG
IntegrationSteps0 = 1
IntegrationSteps1 = 1
IntegrationSteps2 = 1
IntegrationSteps3 = 1
IntegrationSteps4 = 12
Tau = 1.0
#Lambda0 = 0.166667
#Lambda1 = 0.166667
#Lambda2 = 0.166667
#Lambda3 = 0.166667
Lambda0 = 0.193
Lambda1 = 0.195
Lambda2 = 0.197
Lambda3 = 0.200
NumberOfTimescales = 5
MonitorForces = no
EndIntegrator
with the same type of integrator that you were using but one fewer time scale I also observe that the setup is never refreshed. This is clearly a logic problem and I think I know where it originates. The gauge_id
of the tm_QudaMGSetupState_t
should not only be updated after a setup refresh and never after a setup update.
This should be fixed by #533
It seems that b70e6f40b441bde1c0b3eff108f5ce58312e1221 has introduced a regression into the setup refresh logic. Instead of refreshing when
MGRefreshSetupMDUThreshold
is reached, the setup is merely updated. This of course destroys convergence as the trajectory progresses. Thanks @Marcogarofalo for the report.