JuliaLogging / TensorBoardLogger.jl

Easy peasy logging to TensorBoard with Julia
MIT License
102 stars 28 forks source link

`convert(MVHistory, logger)` fails if hyperparameters have been recorded #137

Open torfjelde opened 1 year ago

torfjelde commented 1 year ago

MWE:

julia> using TensorBoardLogger, Logging, ValueHistories

julia> lg = TBLogger(joinpath(mktempdir(), "runs"), min_level=Logging.Info; step_increment=0)
TBLogger:
    - Log level     : Info
    - Current step  : 0
    - Output        : /tmp/jl_HWNylZ/runs
    - open files    : 1

julia> TensorBoardLogger.write_hparams!(
           lg,
           Dict("hi" => 1.0),
           ["x/val"]
       )

julia> with_logger(lg) do
           @info "x" val=3.0
       end

julia> convert(MVHistory, lg)
ERROR: Summary value of Nothing while deserializing
Stacktrace:
 [1] error(s::String)
   @ Base ./error.jl:35
 [2] summary_type
   @ ~/.julia/packages/TensorBoardLogger/pDb23/src/Deserialization/deserialization.jl:141 [inlined]
 [3] iterate(iter::TensorBoardLogger.SummaryDeserializingIterator, state::Int64)
   @ TensorBoardLogger ~/.julia/packages/TensorBoardLogger/pDb23/src/Deserialization/deserialization.jl:190
 [4] iterate
   @ ~/.julia/packages/TensorBoardLogger/pDb23/src/Deserialization/deserialization.jl:184 [inlined]
 [5] map_summaries(fun::TensorBoardLogger.var"#131#132"{MVHistory{History}}, logdir::TBLogger{String, IOStream}; purge::Bool, tags::Nothing, steps::Nothing, smart::Bool)
   @ TensorBoardLogger ~/.julia/packages/TensorBoardLogger/pDb23/src/Deserialization/deserialization.jl:253
 [6] map_summaries
   @ ~/.julia/packages/TensorBoardLogger/pDb23/src/Deserialization/deserialization.jl:237 [inlined]
 [7] #convert#130
   @ ~/.julia/packages/TensorBoardLogger/pDb23/src/Optional/ValueHistories.jl:5 [inlined]
 [8] convert(::Type{MVHistory}, tbl::TBLogger{String, IOStream})
   @ TensorBoardLogger ~/.julia/packages/TensorBoardLogger/pDb23/src/Optional/ValueHistories.jl:3 [9] top-level scope
   @ REPL[34]:1

In contrast, the following works:

julia> lg = TBLogger(joinpath(mktempdir(), "runs"), min_level=Logging.Info; step_increment=0)
TBLogger:
    - Log level     : Info
    - Current step  : 0
    - Output        : /tmp/jl_u2Pmjk/runs
    - open files    : 1

julia> with_logger(lg) do
           @info "x" val=3.0
       end

julia> convert(MVHistory, lg)
MVHistory{History}
  :x/val => 1 elements {Int64,Float32}
System info ``` julia julia> versioninfo() Julia Version 1.9.3 Commit bed2cd540a1 (2023-08-24 14:43 UTC) Build Info: Official https://julialang.org/ release Platform Info: OS: Linux (x86_64-linux-gnu) CPU: 12 × Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz WORD_SIZE: 64 LIBM: libopenlibm LLVM: libLLVM-14.0.6 (ORCJIT, skylake) Threads: 1 on 12 virtual cores (jl_ymjBPx) pkg> st --manifest Status `/tmp/jl_ymjBPx/Manifest.toml` [621f4979] AbstractFFTs v1.5.0 [79e6a3ab] Adapt v3.6.2 [e1450e63] BufferedStreams v1.2.1 [3da002f7] ColorTypes v0.11.4 ⌅ [c3611d14] ColorVectorSpace v0.9.10 [5ae59095] Colors v0.12.10 [34da2185] Compat v4.9.0 [9a962f9c] DataAPI v1.15.0 [864edb3b] DataStructures v0.18.15 [ffbed154] DocStringExtensions v0.9.3 [4e289a0a] EnumX v1.0.4 [5789e2e9] FileIO v1.16.1 [53c48c17] FixedPointNumbers v0.8.4 [a2bd30eb] Graphics v1.1.2 ⌅ [a09fc81d] ImageCore v0.9.4 [92d709cd] IrrationalConstants v0.2.2 [692b3bcd] JLLWrappers v1.5.0 [2ab3a3ac] LogExpFunctions v0.3.26 [dbb5928d] MappedArrays v0.4.2 [e1d29d7a] Missings v1.1.0 [e94cdb99] MosaicViews v0.3.4 [77ba4419] NaNMath v1.0.2 [6fe1bfb0] OffsetArrays v1.12.10 [bac558e1] OrderedCollections v1.6.2 [5432bcbf] PaddedViews v0.5.12 [aea7be01] PrecompileTools v1.2.0 [21216c6a] Preferences v1.4.0 [3349acd9] ProtoBuf v1.0.14 [3cdcf5f2] RecipesBase v1.3.4 [189a3867] Reexport v1.2.2 [ae029012] Requires v1.3.0 [a2af1166] SortingAlgorithms v1.1.1 [276daf66] SpecialFunctions v2.3.1 [cae243ae] StackViews v0.1.1 [82ae8749] StatsAPI v1.7.0 [2913bbd2] StatsBase v0.34.0 [899adc3e] TensorBoardLogger v0.1.22 [62fd8b95] TensorCore v0.1.1 [3bb67fe8] TranscodingStreams v0.9.13 [98cad3c8] ValueHistories v0.5.4 [efe28fd5] OpenSpecFun_jll v0.5.5+0 [0dad84c5] ArgTools v1.1.1 [56f22d72] Artifacts [2a0f44e3] Base64 [8bf52ea8] CRC32c [ade2ca70] Dates [f43a241f] Downloads v1.6.0 [7b1f6079] FileWatching [b77e0a4c] InteractiveUtils [b27032c2] LibCURL v0.6.3 [76f85450] LibGit2 [8f399da3] Libdl [37e2e46d] LinearAlgebra [56ddb016] Logging [d6f4376e] Markdown [ca575930] NetworkOptions v1.2.0 [44cfe95a] Pkg v1.9.2 [de0858da] Printf [3fa0cd96] REPL [9a3f8284] Random [ea8e919c] SHA v0.7.0 [9e88b42a] Serialization [6462fe0b] Sockets [2f01184e] SparseArrays [10745b16] Statistics v1.9.0 [fa267f1f] TOML v1.0.3 [a4e569a6] Tar v1.10.0 [8dfed614] Test [cf7118a7] UUIDs [4ec0a83e] Unicode [e66e0078] CompilerSupportLibraries_jll v1.0.5+0 [deac9b47] LibCURL_jll v7.84.0+0 [29816b5a] LibSSH2_jll v1.10.2+0 [c8ffd9c3] MbedTLS_jll v2.28.2+0 [14a3606d] MozillaCACerts_jll v2022.10.11 [4536629a] OpenBLAS_jll v0.3.21+4 [05823500] OpenLibm_jll v0.8.1+0 [bea87d4a] SuiteSparse_jll v5.10.1+6 [83775a58] Zlib_jll v1.2.13+0 [8e850b90] libblastrampoline_jll v5.8.0+0 [8e850ede] nghttp2_jll v1.48.0+0 [3f19e933] p7zip_jll v17.4.0+0 ```
torfjelde commented 1 year ago

This seems like the offending Summary:

TensorBoardLogger.tensorboard.Summary(TensorBoardLogger.tensorboard.var"Summary.Value"[TensorBoardLogger.tensorboard.var"Summary.Value"("", "_hparams_/experiment", TensorBoardLogger.tensorboard.SummaryMetadata(TensorBoardLogger.tensorboard.var"SummaryMetadata.PluginData"("hparams", UInt8[0x12, 0x30, 0x19, 0x5b, 0x9a, 0x61, 0x3f, 0x2a, 0x42, 0xd9  …  0x09, 0x0a, 0x07, 0x12, 0x05, 0x78, 0x2f, 0x76, 0x61, 0x6c]), "", "", TensorBoardLogger.tensorboard.DataClass.DATA_CLASS_UNKNOWN), nothing)])

I got this from inserting print statements here:

https://github.com/JuliaLogging/TensorBoardLogger.jl/blob/3d9c1a554a08179785459ad7b83bce0177b90275/src/Deserialization/deserialization.jl#L244-L258

torfjelde commented 1 year ago

So that will be coming from https://github.com/JuliaLogging/TensorBoardLogger.jl/blob/3d9c1a554a08179785459ad7b83bce0177b90275/src/hparams.jl#L152-L154

torfjelde commented 1 year ago

One fix that works nicely on my end (though it's unclear to me if this is undesirable or not) is to simply skip a summary with value === nothing. For example,

function Base.iterate(iter::SummaryDeserializingIterator, state=1)
    evs = iter.summary
    res = iterate(evs, state)
    res isa Nothing && return nothing

    (tag, summary), i_state = res

    typ = summary_type(summary)
    if typ === :histo
        val = deserialize_histogram_summary(summary)
        tag, val, state = lookahead_deserialize(tag, val, evs, state, :histo)
    elseif typ === :image
        val = deserialize_image_summary(summary)
        tag, val, state = lookahead_deserialize(tag, val, evs, state, :image)
    elseif typ === :audio
        val = deserialize_audio_summary(summary)
    elseif typ === :tensor
        val = deserialize_tensor_summary(summary)
    elseif typ === :simple_value
        val = summary.value
        tag, val, state = lookahead_deserialize(tag, val, evs, state, :simple_value)
    else
        @error "Event with unknown field" summary=summary
    end

    return (tag, val), state + 1
end

can be replaced by

function Base.iterate(iter::SummaryDeserializingIterator, state=1)
    evs = iter.summary
    res = iterate(evs, state)
    res isa Nothing && return nothing

    (tag, summary), i_state = res

    # THIS IS THE CHANGE
    if summary.value === nothing
        return iterate(iter, i_state)
    end

    typ = summary_type(summary)
    if typ === :histo
        val = deserialize_histogram_summary(summary)
        tag, val, state = lookahead_deserialize(tag, val, evs, state, :histo)
    elseif typ === :image
        val = deserialize_image_summary(summary)
        tag, val, state = lookahead_deserialize(tag, val, evs, state, :image)
    elseif typ === :audio
        val = deserialize_audio_summary(summary)
    elseif typ === :tensor
        val = deserialize_tensor_summary(summary)
    elseif typ === :simple_value
        val = summary.value
        tag, val, state = lookahead_deserialize(tag, val, evs, state, :simple_value)
    else
        @error "Event with unknown field" summary=summary
    end

    return (tag, val), state + 1
end