JuliaStats / Lasso.jl

Lasso/Elastic Net linear and generalized linear models
Other
143 stars 31 forks source link

"matrix not square" error #36

Closed timholy closed 4 years ago

timholy commented 4 years ago

I'm attempting to use this in a multidimensional context in which the number of observations is smaller than the number of variables. I'm getting some errors I don't understand. Here's a demo:

julia> using Lasso, GLM

julia> m = fit(LassoModel, rand(5, 16), rand(5))
LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}}(Error showing value of type LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}}:
ERROR: DimensionMismatch("matrix is not square: dimensions are (5, 17)")
Stacktrace:
 [1] inv!(::LinearAlgebra.Cholesky{Float64,Array{Float64,2}}) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/LinearAlgebra/src/LinearAlgebra.jl:221
 [2] inv at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/LinearAlgebra/src/cholesky.jl:597 [inlined]
 [3] invchol at /home/tim/.julia/packages/GLM/ceUyW/src/linpred.jl:200 [inlined]
 [4] vcov(::LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}) at /home/tim/.julia/packages/GLM/ceUyW/src/linpred.jl:216
 [5] stderror(::LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}) at /home/tim/.julia/packages/GLM/ceUyW/src/linpred.jl:224
 [6] #coeftable#1(::Float64, ::typeof(coeftable), ::LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}) at /home/tim/.julia/packages/GLM/ceUyW/src/lm.jl:184
 [7] coeftable at /home/tim/.julia/packages/GLM/ceUyW/src/lm.jl:183 [inlined]
 [8] show(::IOContext{REPL.Terminals.TTYTerminal}, ::LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}) at /home/tim/.julia/packages/GLM/ceUyW/src/linpred.jl:227
 [9] _show_default(::IOContext{REPL.Terminals.TTYTerminal}, ::Any) at ./show.jl:347
 [10] show_default at ./show.jl:330 [inlined]
 [11] show at ./show.jl:327 [inlined]
 [12] show(::IOContext{REPL.Terminals.TTYTerminal}, ::MIME{Symbol("text/plain")}, ::LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}}) at ./multimedia.jl:47
 [13] display(::REPL.REPLDisplay, ::MIME{Symbol("text/plain")}, ::Any) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/REPL/src/REPL.jl:132
 [14] display(::REPL.REPLDisplay, ::Any) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/REPL/src/REPL.jl:136
 [15] display(::Any) at ./multimedia.jl:323
 [16] #invokelatest#1 at ./essentials.jl:709 [inlined]
 [17] invokelatest at ./essentials.jl:708 [inlined]
 [18] print_response(::IO, ::Any, ::Bool, ::Bool, ::Any) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/REPL/src/REPL.jl:156
 [19] print_response(::REPL.AbstractREPL, ::Any, ::Bool, ::Bool) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/REPL/src/REPL.jl:141
 [20] (::REPL.var"#do_respond#38"{Bool,REPL.var"#48#57"{REPL.LineEditREPL,REPL.REPLHistoryProvider},REPL.LineEditREPL,REPL.LineEdit.Prompt})(::Any, ::Any, ::Any) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/REPL/src/REPL.jl:719
 [21] #invokelatest#1 at ./essentials.jl:709 [inlined]
 [22] invokelatest at ./essentials.jl:708 [inlined]
 [23] run_interface(::REPL.Terminals.TextTerminal, ::REPL.LineEdit.ModalInterface, ::REPL.LineEdit.MIState) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/REPL/src/LineEdit.jl:2306
 [24] run_frontend(::REPL.LineEditREPL, ::REPL.REPLBackendRef) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/REPL/src/REPL.jl:1045
 [25] run_repl(::REPL.AbstractREPL, ::Any) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/REPL/src/REPL.jl:201
 [26] (::Base.var"#768#770"{Bool,Bool,Bool,Bool})(::Module) at ./client.jl:382
 [27] #invokelatest#1 at ./essentials.jl:709 [inlined]
 [28] invokelatest at ./essentials.jl:708 [inlined]
 [29] run_main_repl(::Bool, ::Bool, ::Bool, ::Bool, ::Bool) at ./client.jl:366
 [30] exec_options(::Base.JLOptions) at ./client.jl:304
 [31] _start() at ./client.jl:460

julia> stderror(m)
ERROR: DimensionMismatch("matrix is not square: dimensions are (5, 17)")
Stacktrace:
 [1] inv!(::LinearAlgebra.Cholesky{Float64,Array{Float64,2}}) at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/LinearAlgebra/src/LinearAlgebra.jl:221
 [2] inv at /home/tim/src/julia-1/usr/share/julia/stdlib/v1.3/LinearAlgebra/src/cholesky.jl:597 [inlined]
 [3] invchol at /home/tim/.julia/packages/GLM/ceUyW/src/linpred.jl:200 [inlined]
 [4] vcov(::LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}) at /home/tim/.julia/packages/GLM/ceUyW/src/linpred.jl:216
 [5] stderror(::LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}) at /home/tim/.julia/packages/GLM/ceUyW/src/linpred.jl:224
 [6] #stderror#31 at /home/tim/.julia/packages/StatsModels/9cHwk/src/statsmodel.jl:28 [inlined]
 [7] stderror(::LassoModel{LinearModel{GLM.LmResp{Array{Float64,1}},GLM.DensePredQR{Float64}}}) at /home/tim/.julia/packages/StatsModels/9cHwk/src/statsmodel.jl:28
 [8] top-level scope at REPL[3]:1

Note that the first error is triggered upon display, the computation of m succeeds.

Contrast this with

julia> m = fit(LassoPath, rand(5, 16), rand(5))
LassoPath (79) solutions for 17 predictors in 461 iterations):
───────────────────────────────────
               λ    pct_dev  ncoefs
───────────────────────────────────
 [1]  0.0795677   0.0             0
 [2]  0.0759513   0.0548964       2
 [3]  0.0724992   0.112467        2
 [4]  0.069204    0.164934        2
 [5]  0.0660585   0.21274         2
 [6]  0.0630561   0.256267        2
 [7]  0.0601901   0.295989        2
 [8]  0.0574543   0.332152        2
 [9]  0.054843    0.365227        2
[10]  0.0523503   0.399728        3
[11]  0.0499709   0.451952        3
[12]  0.0476996   0.499779        4
[13]  0.0455316   0.544217        4
[14]  0.0434621   0.5847          4
[15]  0.0414867   0.621608        4
[16]  0.039601    0.655211        4
[17]  0.0378011   0.685854        4
[18]  0.036083    0.713747        4
[19]  0.034443    0.739184        4
[20]  0.0328775   0.762356        4
[21]  0.0313832   0.783462        4
[22]  0.0299567   0.802699        4
[23]  0.0285952   0.820229        4
[24]  0.0272955   0.836205        4
[25]  0.0260548   0.850761        4
[26]  0.0248706   0.86401         4
[27]  0.0237402   0.876093        4
[28]  0.0226612   0.887102        4
[29]  0.0216312   0.897126        4
[30]  0.020648    0.906278        4
[31]  0.0197095   0.914592        4
[32]  0.0188137   0.922191        4
[33]  0.0179586   0.9291          4
[34]  0.0171423   0.935397        4
[35]  0.0163632   0.941135        4
[36]  0.0156195   0.946364        4
[37]  0.0149095   0.951128        4
[38]  0.0142319   0.955474        4
[39]  0.013585    0.959426        4
[40]  0.0129676   0.963036        4
[41]  0.0123782   0.966317        4
[42]  0.0118156   0.969307        4
[43]  0.0112785   0.972036        4
[44]  0.0107659   0.974516        4
[45]  0.0102766   0.976783        4
[46]  0.00980948  0.978844        4
[47]  0.00936363  0.980727        4
[48]  0.00893803  0.982438        4
[49]  0.00853179  0.983995        4
[50]  0.008144    0.985419        4
[51]  0.00777385  0.986715        4
[52]  0.00742051  0.987895        4
[53]  0.00708324  0.98897         4
[54]  0.0067613   0.98995         4
[55]  0.00645398  0.990844        4
[56]  0.00616064  0.991655        4
[57]  0.00588063  0.992398        4
[58]  0.00561335  0.993073        4
[59]  0.00535821  0.99369         4
[60]  0.00511467  0.994249        4
[61]  0.0048822   0.99476         4
[62]  0.0046603   0.995225        4
[63]  0.00444848  0.99565         4
[64]  0.00424629  0.996036        4
[65]  0.00405329  0.996389        4
[66]  0.00386906  0.99671         4
[67]  0.00369321  0.997003        4
[68]  0.00352534  0.997269        4
[69]  0.00336511  0.997511        4
[70]  0.00321216  0.997732        4
[71]  0.00306617  0.997933        4
[72]  0.0029268   0.998118        4
[73]  0.00279378  0.998285        4
[74]  0.00266679  0.998437        4
[75]  0.00254558  0.998576        4
[76]  0.00242988  0.998703        4
[77]  0.00231944  0.998817        4
[78]  0.00221402  0.998923        4
[79]  0.00211339  0.999018        4
───────────────────────────────────

julia> m.coefs[:,end]
16-element SparseArrays.SparseVector{Float64,Int64} with 4 stored entries:
  [2 ]  =  -0.112423
  [3 ]  =  -0.103032
  [6 ]  =  0.218529
  [9 ]  =  0.263868

but for which it's not possible to compute the error:

julia> stderror(m)
ERROR: vcov is not defined for LassoPath{LinearModel{GLM.LmResp{Array{Float64,1}},CovarianceCoordinateDescent{Float64,true,Array{Float64,2},Lasso.RandomCoefficientIterator,Nothing}},Float64}.
Stacktrace:
 [1] error(::String) at ./error.jl:33
 [2] vcov(::LassoPath{LinearModel{GLM.LmResp{Array{Float64,1}},CovarianceCoordinateDescent{Float64,true,Array{Float64,2},Lasso.RandomCoefficientIterator,Nothing}},Float64}) at /home/tim/.julia/packages/StatsBase/DyWPR/src/statmodels.jl:133
 [3] stderror(::LassoPath{LinearModel{GLM.LmResp{Array{Float64,1}},CovarianceCoordinateDescent{Float64,true,Array{Float64,2},Lasso.RandomCoefficientIterator,Nothing}},Float64}) at /home/tim/.julia/packages/StatsBase/DyWPR/src/statmodels.jl:126
 [4] top-level scope at REPL[6]:1
AsafManela commented 4 years ago

This may be an issue with GLM (or LinearAlgebra). I narrowed it down to this minimal example. Failed when X has more columns than rows:

julia> using GLM: DensePredQR

julia> using LinearAlgebra

julia> X = [ones(5) rand(5,16)]
5×17 Array{Float64,2}:
 1.0  0.580274  0.132501   0.37971   0.833601  0.475992  …  0.920004  0.497813  0.478095   0.511493 
 1.0  0.315058  0.544557   0.180399  0.411512  0.059083     0.254424  0.102081  0.0286219  0.647392 
 1.0  0.391001  0.504542   0.442633  0.969872  0.407162     0.435692  0.642055  0.424189   0.0115529
 1.0  0.564244  0.794587   0.878073  0.626503  0.980334     0.940471  0.326352  0.567887   0.138451 
 1.0  0.645479  0.0293748  0.633139  0.116494  0.801077     0.432296  0.132267  0.0319513  0.552074 

julia> pp = DensePredQR(X, [1.0; zeros(16)])
DensePredQR{Float64}([1.0 0.580274009583994 … 0.47809477805270717 0.5114929068474399; 1.0 0.3150583269104097 … 0.028621904555609978 0.6473922612151892; … ; 1.0 0.5642437355787289 … 0.5678874314808784 0.13845091449127933; 1.0 0.6454792441633972 … 0.031951265718494826 0.5520738462543564], [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], LinearAlgebra.QRCompactWY{Float64,Array{Float64,2}}([-2.23606797749979 -1.116270170750235 … -0.6845695901066935 -0.832247875015137; 0.3090169943749474 0.2789561805801902 … 0.10463228340229419 0.038530583627515824; … ; 0.3090169943749474 -0.08190521016680165 … -0.11848808249638054 -0.26013205225776165; 0.3090169943749474 -0.2483172515160915 … -0.38510326696972386 0.3625685822485454], [1.4472135954999579 -0.737806952509757 … 0.6614848324933887 0.0; 6.9236266118005e-310 1.749948096097638 … 0.6485165557577766 0.0; … ; 6.9236266118068e-310 6.92362661182263e-310 … 1.538444946831569 0.0; 6.92362661181e-310 6.9236266118258e-310 … 6.9236266118574e-310 0.0]))

julia> cholesky!(pp)
Cholesky{Float64,Array{Float64,2}}
U factor:
Error showing value of type Cholesky{Float64,Array{Float64,2}}:
ERROR: DimensionMismatch("matrix is not square: dimensions are (5, 17)")
Stacktrace:
 [1] show(::IOContext{REPL.Terminals.TTYTerminal}, ::MIME{Symbol("text/plain")}, ::Cholesky{Float64,Array{Float64,2}}) at /home/amanela/julialang/julia-1.2.0/share/julia/stdlib/v1.2/LinearAlgebra/src/cholesky.jl:214
 [2] display(::REPL.REPLDisplay, ::MIME{Symbol("text/plain")}, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/REPL/src/REPL.jl:132
 [3] display(::REPL.REPLDisplay, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/REPL/src/REPL.jl:136
 [4] display(::Any) at ./multimedia.jl:323
 [5] #invokelatest#1 at ./essentials.jl:790 [inlined]
 [6] invokelatest at ./essentials.jl:789 [inlined]
 [7] print_response(::IO, ::Any, ::Bool, ::Bool, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/REPL/src/REPL.jl:156
 [8] print_response(::REPL.AbstractREPL, ::Any, ::Bool, ::Bool) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/REPL/src/REPL.jl:141
 [9] (::getfield(REPL, Symbol("#do_respond#38")){Bool,getfield(REPL, Symbol("##48#57")){REPL.LineEditREPL,REPL.REPLHistoryProvider},REPL.LineEditREPL,REPL.LineEdit.Prompt})(::Any, ::Any, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/REPL/src/REPL.jl:718
 [10] #invokelatest#1 at ./essentials.jl:790 [inlined]
 [11] invokelatest at ./essentials.jl:789 [inlined]
 [12] run_interface(::REPL.Terminals.TextTerminal, ::REPL.LineEdit.ModalInterface, ::REPL.LineEdit.MIState) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/REPL/src/LineEdit.jl:2306
 [13] run_frontend(::REPL.LineEditREPL, ::REPL.REPLBackendRef) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/REPL/src/REPL.jl:1038
 [14] run_repl(::REPL.AbstractREPL, ::Any) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/REPL/src/REPL.jl:201
 [15] (::getfield(Base, Symbol("##737#739")){Bool,Bool,Bool,Bool})(::Module) at ./client.jl:390
 [16] #invokelatest#1 at ./essentials.jl:790 [inlined]
 [17] invokelatest at ./essentials.jl:789 [inlined]
 [18] run_main_repl(::Bool, ::Bool, ::Bool, ::Bool, ::Bool) at ./client.jl:374
 [19] exec_options(::Base.JLOptions) at ./client.jl:312
 [20] _start() at ./client.jl:464

Works when X has less columns than rows:

julia> X = [ones(5) rand(5,3)]
5×4 Array{Float64,2}:
 1.0  0.306428   0.31801    0.184585 
 1.0  0.232915   0.764172   0.0942903
 1.0  0.76324    0.417055   0.75017  
 1.0  0.0693803  0.0189854  0.606391 
 1.0  0.115757   0.928158   0.407045 

julia> pp = DensePredQR(X, [1.0; zeros(3)])
DensePredQR{Float64}([1.0 0.3064275898269031 0.31800993546137724 0.18458464212986359; 1.0 0.232914966109671 0.7641719493937462 0.09429026058084755; … ; 1.0 0.06938027136383895 0.018985389499792893 0.6063908045626791; 1.0 0.11575738677580527 0.9281580476591449 0.4070448096847634], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], LinearAlgebra.QRCompactWY{Float64,Array{Float64,2}}([-2.23606797749979 -0.665328504116652 -1.094054353537571 -0.9134248915931886; 0.3090169943749474 0.5533834420112992 -0.045900417539708194 0.23951738896324892; … ; 0.3090169943749474 0.3719791225729259 -0.566711539751736 -0.44845316472968644; 0.3090169943749474 0.29726861774802116 0.38777842756814573 0.34418746977329984], [1.4472135954999579 -0.4632669079734904 -1.0287834052206186 -1.4790393596226374; NaN 1.1217495432487656 1.2826653568087332 0.04231760876926971; 1.5e-323 6.9236282580383e-310 1.35912584673567 1.0529285021131154; 6.92352103362253e-310 2.0e-323 0.0 1.788165006809973]))

julia> cholesky!(pp)
Cholesky{Float64,Array{Float64,2}}
U factor:
4×4 UpperTriangular{Float64,Array{Float64,2}}:
 -2.23607  -0.665329  -1.09405    -0.913425
   ⋅        0.553383  -0.0459004   0.239517
   ⋅         ⋅        -0.722357    0.215147
   ⋅         ⋅          ⋅         -0.448453
timholy commented 4 years ago

Thanks for fixing this!