Closed tlienart closed 7 years ago
PS: with MAXNEVENTS=10
the algorithm should be very quick (testing ground). In practice it will be put to something like 10000
### PART FOR THE USER (note the arrays)
bd = Dict(
"LATENT_D" => 30,
"SIGMA_R" => [0.5,1.0,3.0],
"SIGMA_U" => [10.0,15.0],
"SIGMA_V" => [20.0,25.0],
"LAMBDAREF" => [.01,.05],
"MAXNEVENTS" => 10,
"MAXT" => Inf
)
###################
### PART FOR US ### (this could be a separate script further down the line)
###################
using Iterators
sk = [k for k in keys(bd) if length(bd[k])==1]
mk = [k for k in keys(bd) if length(bd[k])>1]
basestring = ""
for k in sk
basestring *= k * " = " * string(bd[k][1]) * "; "
end
strings = String[]
for tpl in product([bd[k] for k in mk]...)
str = basestring
for (i,k) in enumerate(mk)
str *= k * " = " * string(tpl[i]) * "; "
end
push!(strings, str * "include(\"generalchild.jl\")")
end
open("test","w") do f
for s in strings
write(f, "julia -e \""*s*"\"\n")
end
end
>> head test
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 0.5; LAMBDAREF = 0.01; SIGMA_U = 10.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 1.0; LAMBDAREF = 0.01; SIGMA_U = 10.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 3.0; LAMBDAREF = 0.01; SIGMA_U = 10.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 0.5; LAMBDAREF = 0.05; SIGMA_U = 10.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 1.0; LAMBDAREF = 0.05; SIGMA_U = 10.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 3.0; LAMBDAREF = 0.05; SIGMA_U = 10.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 0.5; LAMBDAREF = 0.01; SIGMA_U = 15.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 1.0; LAMBDAREF = 0.01; SIGMA_U = 15.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 3.0; LAMBDAREF = 0.01; SIGMA_U = 15.0; SIGMA_V = 20.0; include("generalchild.jl")"
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 0.5; LAMBDAREF = 0.05; SIGMA_U = 15.0; SIGMA_V = 20.0; include("generalchild.jl")"
So I would just define a dict with the name bd
, this would generate a file with each line being as the one above which you should be able to put in the queue verbatim. Or maybe adding some stuff before and after.
hmm the include may have to be double escaped
### PART FOR THE USER (note the arrays)
bd = Dict(
"LATENT_D" => 30,
"SIGMA_R" => [0.5,1.0,3.0],
"SIGMA_U" => [10.0,15.0],
"SIGMA_V" => [20.0,25.0],
"LAMBDAREF" => [.01,.05],
"MAXNEVENTS" => 10,
"MAXT" => Inf
)
###################
### PART FOR US ### (this could be a separate script further down the line)
###################
using Iterators
sk = [k for k in keys(bd) if length(bd[k])==1]
mk = [k for k in keys(bd) if length(bd[k])>1]
basestring = ""
for k in sk
basestring *= k * " = " * string(bd[k][1]) * "; "
end
strings = String[]
for tpl in product([bd[k] for k in mk]...)
str = basestring
for (i,k) in enumerate(mk)
str *= k * " = " * string(tpl[i]) * "; "
end
push!(strings, str * "include(\\\"generalchild.jl\\\")")
end
open("test","w") do f
for s in strings
write(f, "julia -e \""*s*"\"\n")
end
end
julia -e "LATENT_D = 30; MAXNEVENTS = 10; MAXT = Inf; SIGMA_R = 0.5; LAMBDAREF = 0.01; SIGMA_U = 10.0; SIGMA_V = 20.0; include(\"generalchild.jl\")"
yeah that works
Executing machine Mi needs to have
generalchild.jl
child.jl
data/ratings.csv
filethe data can be obtained via the file
ratings.dat
from https://grouplens.org/datasets/movielens/1m/ it's got a crappy format so before putting that on machine it should be pre-processed as:On the machine Mi, call
julia -e "child.jl"
, recuperate generated datachild_(hash).jld
+ STDERR and STDOUT (if possible)The format of a child script is (https://github.com/tlienart/pmf/blob/master/child.jl)
the first 8 lines can trivially be generated from the python mother script. Ideally we would want ranges for one or two of those and that would form one experiment. So the mother script would have
which would generate four children script (cartesian product), all other parameters should be fixed and that should correspond to four machines.
The code for the general script
generalchild
is on https://github.com/tlienart/pmf/blob/master/generalchild.jl@martintoreilly if you could adapt your mother script so that it can send to an executing machines the data, what is needed so that this call
julia -e "child.jl"
works, it would be fantastic, thanks!