apache / arrow-julia

Official Julia implementation of Apache Arrow
https://arrow.apache.org/julia/
Other
284 stars 59 forks source link

Improve printing? #168

Closed ericphanson closed 3 years ago

ericphanson commented 3 years ago

using the example from https://github.com/beacon-biosignals/Onda.jl/blob/b8400b011cafbdef2a9909367f8d9c56ef1aacf4/examples/tour.jl#L72, if we read in the resulting Arrow table and let it print to the REPL, we get

Arrow.Table: (recording = UUID[UUID("70cf6832-a238-4878-bcee-9e95fab45f37"), UUID("70cf6832-a238-4878-bcee-9e95fab45f37"), UUID("70cf6832-a238-4878-bcee-9e95fab45f37"), UUID("c5a2e1f4-ba02-42fa-aeb7-c463c11ea3b9"), UUID("c5a2e1f4-ba02-42fa-aeb7-c463c11ea3b9"), UUID("c5a2e1f4-ba02-42fa-aeb7-c463c11ea3b9"), UUID("a2dd7ca8-962b-4ece-b6cc-c14de4e2f43f"), UUID("a2dd7ca8-962b-4ece-b6cc-c14de4e2f43f"), UUID("a2dd7ca8-962b-4ece-b6cc-c14de4e2f43f"), UUID("e1d6c148-7b93-45ba-bd75-813a4158ccf3")  …  UUID("94c784b6-863a-4e8c-9814-ca2742695b82"), UUID("0f0e695a-a842-4e5c-8c21-28be62707e4b"), UUID("0f0e695a-a842-4e5c-8c21-28be62707e4b"), UUID("0f0e695a-a842-4e5c-8c21-28be62707e4b"), UUID("6c7fa6e6-bfff-4130-a213-3d27d95dcfe5"), UUID("6c7fa6e6-bfff-4130-a213-3d27d95dcfe5"), UUID("6c7fa6e6-bfff-4130-a213-3d27d95dcfe5"), UUID("134c8f45-d2d9-4efe-827f-f3cd5a363d74"), UUID("134c8f45-d2d9-4efe-827f-f3cd5a363d74"), UUID("134c8f45-d2d9-4efe-827f-f3cd5a363d74")], file_path = ["/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/70cf6832-a238-4878-bcee-9e95fab45f37_eeg.lpcm", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/70cf6832-a238-4878-bcee-9e95fab45f37_ecg.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/70cf6832-a238-4878-bcee-9e95fab45f37_spo2.lpcm", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/c5a2e1f4-ba02-42fa-aeb7-c463c11ea3b9_eeg.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/c5a2e1f4-ba02-42fa-aeb7-c463c11ea3b9_ecg.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/c5a2e1f4-ba02-42fa-aeb7-c463c11ea3b9_spo2.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/a2dd7ca8-962b-4ece-b6cc-c14de4e2f43f_eeg.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/a2dd7ca8-962b-4ece-b6cc-c14de4e2f43f_ecg.lpcm", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/a2dd7ca8-962b-4ece-b6cc-c14de4e2f43f_spo2.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/e1d6c148-7b93-45ba-bd75-813a4158ccf3_eeg.lpcm"  …  "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/94c784b6-863a-4e8c-9814-ca2742695b82_spo2.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/0f0e695a-a842-4e5c-8c21-28be62707e4b_eeg.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/0f0e695a-a842-4e5c-8c21-28be62707e4b_ecg.lpcm", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/0f0e695a-a842-4e5c-8c21-28be62707e4b_spo2.lpcm", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/6c7fa6e6-bfff-4130-a213-3d27d95dcfe5_eeg.lpcm", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/6c7fa6e6-bfff-4130-a213-3d27d95dcfe5_ecg.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/6c7fa6e6-bfff-4130-a213-3d27d95dcfe5_spo2.lpcm", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/134c8f45-d2d9-4efe-827f-f3cd5a363d74_eeg.lpcm.zst", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/134c8f45-d2d9-4efe-827f-f3cd5a363d74_ecg.lpcm", "/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_Lv8S9j/134c8f45-d2d9-4efe-827f-f3cd5a363d74_spo2.lpcm.zst"], file_format = ["lpcm", "lpcm.zst", "lpcm", "lpcm.zst", "lpcm.zst", "lpcm.zst", "lpcm.zst", "lpcm", "lpcm.zst", "lpcm"  …  "lpcm.zst", "lpcm.zst", "lpcm", "lpcm", "lpcm", "lpcm.zst", "lpcm", "lpcm.zst", "lpcm", "lpcm.zst"], span = TimeSpan[TimeSpan(00:00:23.000000000, 00:07:23.000000000), TimeSpan(00:00:26.000000000, 00:04:26.000000000), TimeSpan(00:00:15.000000000, 00:07:15.000000000), TimeSpan(00:00:17.000000000, 00:02:17.000000000), TimeSpan(00:00:07.000000000, 00:08:07.000000000), TimeSpan(00:00:24.000000000, 00:06:24.000000000), TimeSpan(00:00:19.000000000, 00:08:19.000000000), TimeSpan(00:00:06.000000000, 00:07:06.000000000), TimeSpan(00:00:02.000000000, 00:06:02.000000000), TimeSpan(00:00:14.000000000, 00:10:14.000000000)  …  TimeSpan(00:00:00.000000000, 00:08:00.000000000), TimeSpan(00:00:14.000000000, 00:07:14.000000000), TimeSpan(00:00:07.000000000, 00:07:07.000000000), TimeSpan(00:00:26.000000000, 00:02:26.000000000), TimeSpan(00:00:07.000000000, 00:03:07.000000000), TimeSpan(00:00:06.000000000, 00:05:06.000000000), TimeSpan(00:00:19.000000000, 00:04:19.000000000), TimeSpan(00:00:24.000000000, 00:03:24.000000000), TimeSpan(00:00:03.000000000, 00:05:03.000000000), TimeSpan(00:00:18.000000000, 00:04:18.000000000)], kind = ["eeg", "ecg", "spo2", "eeg", "ecg", "spo2", "eeg", "ecg", "spo2", "eeg"  …  "spo2", "eeg", "ecg", "spo2", "eeg", "ecg", "spo2", "eeg", "ecg", "spo2"], channels = [["fp1", "f3", "c3", "p3", "f7", "t3", "t5", "o1", "fz", "cz", "pz", "fp2", "f4", "c4", "p4", "f8", "t4", "t6", "o2"], ["avl", "avr"], ["spo2"], ["fp1", "f3", "c3", "p3", "f7", "t3", "t5", "o1", "fz", "cz", "pz", "fp2", "f4", "c4", "p4", "f8", "t4", "t6", "o2"], ["avl", "avr"], ["spo2"], ["fp1", "f3", "c3", "p3", "f7", "t3", "t5", "o1", "fz", "cz", "pz", "fp2", "f4", "c4", "p4", "f8", "t4", "t6", "o2"], ["avl", "avr"], ["spo2"], ["fp1", "f3", "c3", "p3", "f7", "t3", "t5", "o1", "fz", "cz", "pz", "fp2", "f4", "c4", "p4", "f8", "t4", "t6", "o2"]  …  ["spo2"], ["fp1", "f3", "c3", "p3", "f7", "t3", "t5", "o1", "fz", "cz", "pz", "fp2", "f4", "c4", "p4", "f8", "t4", "t6", "o2"], ["avl", "avr"], ["spo2"], ["fp1", "f3", "c3", "p3", "f7", "t3", "t5", "o1", "fz", "cz", "pz", "fp2", "f4", "c4", "p4", "f8", "t4", "t6", "o2"], ["avl", "avr"], ["spo2"], ["fp1", "f3", "c3", "p3", "f7", "t3", "t5", "o1", "fz", "cz", "pz", "fp2", "f4", "c4", "p4", "f8", "t4", "t6", "o2"], ["avl", "avr"], ["spo2"]], sample_unit = ["microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt"  …  "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt", "microvolt"], sample_resolution_in_unit = [0.25, 1.0, 1.0, 1.0, 0.25, 0.25, 0.25, 1.0, 1.0, 0.25  …  0.25, 0.25, 1.0, 1.0, 0.25, 1.0, 1.0, 0.25, 1.0, 0.25], sample_offset_in_unit = [0, 0, -1, 1, -1, 0, 1, -1, -1, 1  …  1, 0, 0, 1, -1, -1, 1, 1, 1, 1], sample_type = ["int32", "int16", "int32", "float32", "float32", "float32", "float32", "int32", "int16", "int16"  …  "float32", "int16", "float32", "int16", "float32", "int32", "int16", "float32", "int16", "int16"], sample_rate = [143.5, 128.0, 128.0, 143.5, 143.5, 128.0, 128.0, 256.0, 128.0, 128.0  …  143.5, 128.0, 143.5, 143.5, 256.0, 256.0, 128.0, 143.5, 143.5, 143.5])

which kind of takes over the screen and is hard to read / not useful. With a super simple show method like

function Base.show(io::IO, table::Arrow.Table; max_cols = 20)
    nrows = length(Tables.getcolumn(table, 1))
    cols = Tables.columnnames(table)
    ncols = length(cols)
    print(io, "$(typeof(table)) with $(nrows) rows and $(ncols) columns:")
    for col in first(cols, max_cols)
        print(io, "\n  ", col, " (", Tables.columntype(table, col), ")")
    end
    if ncols > max_cols
        print(io, "\n  ⋮")
    end
end

we instead get

julia> table
Arrow.Table with 30 rows and 11 columns:
  recording (UUID)
  file_path (String)
  file_format (String)
  span (TimeSpan)
  kind (String)
  channels (Vector{String})
  sample_unit (String)
  sample_resolution_in_unit (Float64)
  sample_offset_in_unit (Int64)
  sample_type (String)
  sample_rate (Float64)

which is at least simple and readable, or

julia> show(stdout, table; max_cols=10)
Arrow.Table with 30 rows and 11 columns:
  recording (UUID)
  file_path (String)
  file_format (String)
  span (TimeSpan)
  kind (String)
  channels (Vector{String})
  sample_unit (String)
  sample_resolution_in_unit (Float64)
  sample_offset_in_unit (Int64)
  sample_type (String)
  ⋮

I see though there is already a show method to print Arrow.Table's like NamedTuples. I wonder if maybe seeing the values is easier for testing and things like that.

quinnj commented 3 years ago

I'd definitely welcome a PR here or in the Tables.jl improving the default printing of AbstractColumns/AbstractRow!