JuliaData / JuliaDB.jl

Parallel analytical database in pure Julia
http://juliadb.org/
Other
766 stars 61 forks source link

select on distributed table #221

Open jstrube opened 6 years ago

jstrube commented 6 years ago

I've run into an issue with a distributed table (https://discourse.julialang.org/t/first-steps-in-juliadb/14010)

tb = load("Digits") 
Distributed Table with 66889667 rows in 10 chunks:
Columns:
#   colname     type
───────────────────────
1   idx         Int64
2   mcpidx      Int64
3   pixelID     Int64
4   time        Float64
5   x           Float64
6   y           Float64
7   z           Float64
8   px          Float64
9   py          Float64
10  pz          Float64
11  thetaC      Float64
12  localTheta  Float64
13  localPhi    Float64
select(tb, :x)
MethodError: Dagger.distribute(::Dagger.DArray{Int64,1,JuliaDB.##58#60}, ::Dagger.DomainBlocks{1}) is ambiguous. Candidates:
  distribute(source, rowgroups::AbstractArray) in IterableTables at /home/ilc/jstrube/.julia/v0.6/IterableTables/src/integrations/juliadb.jl:19
  distribute(x::AbstractArray, dist) in Dagger at /home/ilc/jstrube/.julia/v0.6/Dagger/src/array/darray.jl:390
Possible fix, define
  distribute(::AbstractArray, ::AbstractArray)

Stacktrace:
 [1] macro expansion at /home/ilc/jstrube/.julia/v0.6/NamedTuples/src/NamedTuples.jl:289 [inlined]
 [2] _map(::JuliaDB.##39#43, ::NamedTuples._NT_idx_mcpidx_pixelID_time_x_y_z_px_py_pz_thetaC_localTheta_localPhi{Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60}}) at /home/ilc/jstrube/.julia/v0.6/NamedTuples/src/NamedTuples.jl:276
 [3] #table#37(::Void, ::Array{Any,1}, ::Function, ::Val{:distributed}, ::NamedTuples._NT_idx_mcpidx_pixelID_time_x_y_z_px_py_pz_thetaC_localTheta_localPhi{Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60}}) at /home/ilc/jstrube/.julia/v0.6/JuliaDB/src/table.jl:90
 [4] (::IndexedTables.#kw##table)(::Array{Any,1}, ::IndexedTables.#table, ::Val{:distributed}, ::NamedTuples._NT_idx_mcpidx_pixelID_time_x_y_z_px_py_pz_thetaC_localTheta_localPhi{Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60}}) at ./<missing>:0
 [5] #table#104(::Void, ::Array{Any,1}, ::Function, ::NamedTuples._NT_idx_mcpidx_pixelID_time_x_y_z_px_py_pz_thetaC_localTheta_localPhi{Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60}}) at /home/ilc/jstrube/.julia/v0.6/IndexedTables/src/table.jl:247
 [6] (::IndexedTables.#kw##table)(::Array{Any,1}, ::IndexedTables.#table, ::NamedTuples._NT_idx_mcpidx_pixelID_time_x_y_z_px_py_pz_thetaC_localTheta_localPhi{Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Int64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60},Dagger.DArray{Float64,1,JuliaDB.##58#60}}) at ./<missing>:0
 [7] #table#107(::Array{Symbol,1}, ::Array{Any,1}, ::Function, ::Dagger.DArray{Int64,1,JuliaDB.##58#60}, ::Vararg{AbstractArray,N} where N) at /home/ilc/jstrube/.julia/v0.6/IndexedTables/src/table.jl:259
 [8] (::IndexedTables.#kw##table)(::Array{Any,1}, ::IndexedTables.#table, ::Dagger.DArray{Int64,1,JuliaDB.##58#60}, ::Dagger.DArray{Int64,1,JuliaDB.##58#60}, ::Dagger.DArray{Int64,1,JuliaDB.##58#60}, ::Vararg{AbstractArray,N} where N) at ./<missing>:0
 [9] getindex(::IndexedTables.ColDict{JuliaDB.DNextTable{NamedTuples._NT_idx_mcpidx_pixelID_time_x_y_z_px_py_pz_thetaC_localTheta_localPhi{Int64,Int64,Int64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64},Tuple{Int64}}}) at /home/ilc/jstrube/.julia/v0.6/IndexedTables/src/table.jl:360
 [10] select(::JuliaDB.DNextTable{NamedTuples._NT_idx_mcpidx_pixelID_time_x_y_z_px_py_pz_thetaC_localTheta_localPhi{Int64,Int64,Int64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64},Tuple{Int64}}, ::Symbol) at /home/ilc/jstrube/.julia/v0.6/IndexedTables/src/selection.jl:131

I'm just starting with JuliaDB; any suggestions for workarounds are welcome.

andreasnoack commented 6 years ago

Which versions of Julia and packages are you using?

jstrube commented 6 years ago
               _
   _       _ _(_)_     |  A fresh approach to technical computing
  (_)     | (_) (_)    |  Documentation: https://docs.julialang.org
   _ _   _| |_  __ _   |  Type "?help" for help.
  | | | | | | |/ _` |  |
  | | |_| | | | (_| |  |  Version 0.6.4 (2018-07-09 19:09 UTC)
 _/ |\__'_|_|_|\__'_|  |  Official http://julialang.org/ release
|__/                   |  x86_64-pc-linux-gnu

julia> Pkg.status()
9 required packages:
 - Distributions                 0.15.0
 - HDF5                          0.9.4
 - IJulia                        1.9.3
 - JuliaDB                       0.8.4
 - JuliaDBMeta                   0.3.0
 - OnlineStats                   0.18.0
 - Plots                         0.17.4
 - Queryverse                    0.0.2
 - StatPlots                     0.7.3
121 additional packages:
 - Arrow                         0.1.2
 - AxisAlgorithms                0.3.0
 - BinDeps                       0.8.10
 - BinaryProvider                0.3.3
 - Blosc                         0.5.1
 - BufferedStreams               0.4.1
 - CMakeWrapper                  0.1.0
 - CSVFiles                      0.8.0
 - Cairo                         0.5.4
 - Calculus                      0.4.1
 - CategoricalArrays             0.3.13
 - Clustering                    0.9.1
 - CodecZlib                     0.4.4
 - ColorTypes                    0.6.7
 - Colors                        0.8.2
 - CommonSubexpressions          0.1.0
 - Compat                        1.0.1
 - Conda                         1.0.1
 - Contour                       0.4.0
 - Dagger                        0.6.2
 - DataFrames                    0.11.7
 - DataStreams                   0.3.6
 - DataStructures                0.8.4
 - DataValues                    0.3.3
 - DataVoyager                   0.2.0
 - DiffEqDiffTools               0.4.1
 - DiffResults                   0.0.3
 - DiffRules                     0.0.7
 - Distances                     0.6.0
 - DocStringExtensions           0.4.6
 - Documenter                    0.19.6
 - Electron                      0.1.2
 - ExcelFiles                    0.5.0
 - ExcelReaders                  0.9.0
 - FeatherFiles                  0.4.0
 - FeatherLib                    0.0.1
 - FileIO                        0.9.1
 - FilePaths                     0.7.0
 - FilePathsBase                 0.3.1
 - FixedPointNumbers             0.4.6
 - FlatBuffers                   0.3.2
 - ForwardDiff                   0.7.5
 - GR                            0.32.3
 - Glob                          1.1.1
 - Graphics                      0.3.0
 - HTTP                          0.6.14
 - IndexedTables                 0.7.4
 - IniFile                       0.4.0
 - Interpolations                0.8.0
 - IterTools                     0.2.1
 - IterableTables                0.7.3
 - IteratorInterfaceExtensions   0.0.2
 - JSON                          0.17.2
 - Juno                          0.4.1
 - KernelDensity                 0.4.1
 - LearnBase                     0.1.6
 - Libz                          0.4.0
 - LineSearches                  6.0.2
 - Logging                       0.3.1
 - LossFunctions                 0.2.0
 - MacroTools                    0.4.4
 - MbedTLS                       0.5.12
 - Measures                      0.2.0
 - Media                         0.3.0
 - MemPool                       0.0.11
 - Missings                      0.2.10
 - NLSolversBase                 6.1.1
 - NaNMath                       0.3.2
 - NamedTuples                   4.0.2
 - NearestNeighbors              0.3.0
 - NodeJS                        0.4.0
 - Nullables                     0.0.7
 - Observables                   0.1.2
 - OffsetArrays                  0.6.0
 - OnlineStatsBase               0.8.0
 - Optim                         0.15.3
 - PDMats                        0.8.0
 - Parameters                    0.9.2
 - Parquet                       0.1.0
 - ParquetFiles                  0.0.1
 - PenaltyFunctions              0.0.2
 - PlotThemes                    0.2.0
 - PlotUtils                     0.4.4
 - PooledArrays                  0.2.2
 - PositiveFactorizations        0.1.0
 - ProtoBuf                      0.6.1
 - PyCall                        1.18.2
 - QuadGK                        0.3.0
 - Query                         0.9.3
 - QueryOperators                0.2.3
 - Ratios                        0.3.0
 - ReadStat                      0.3.0
 - RecipesBase                   0.3.1
 - Reexport                      0.1.0
 - Requires                      0.4.4
 - Rmath                         0.4.0
 - Rsvg                          0.2.1
 - SHA                           0.5.7
 - ShowItLikeYouBuildIt          0.2.0
 - Showoff                       0.2.1
 - Snappy                        0.2.2
 - SortingAlgorithms             0.2.1
 - SpecialFunctions              0.6.0
 - StatFiles                     0.5.0
 - StaticArrays                  0.7.2
 - StatsBase                     0.23.1
 - StatsFuns                     0.6.1
 - SweepOperator                 0.1.0
 - TableShowUtils                0.0.2
 - TableTraits                   0.2.0
 - TableTraitsUtils              0.1.3
 - TextParse                     0.5.0
 - Thrift                        0.5.1
 - TranscodingStreams            0.5.4
 - URIParser                     0.3.1
 - VegaLite                      0.3.4
 - VersionParsing                1.1.2
 - WeakRefStrings                0.4.7
 - Widgets                       0.2.5
 - WoodburyMatrices              0.3.0
 - ZMQ                           0.6.4

julia> versioninfo()
Julia Version 0.6.4
Commit 9d11f62bcb (2018-07-09 19:09 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, haswell)