pkgw / rubbl

Rust + Hubble = astrophysics in Rust
Other
16 stars 5 forks source link

Concurrent column access causes a segfault #183

Open d3v-null opened 2 years ago

d3v-null commented 2 years ago

sigh here we go again. 🙄

it's the end of the day for me, but I'll keep investigating this tomorrow.

here's the trace

#0  0x0000000000000000 in ?? ()
#1  0x0000555555ae00c1 in rubbl_casacore::ColumnDesc::operator= (this=0x7fffc8035b50, that=...) at casacore/tables/Tables/ColumnDesc.cc:82
#2  0x0000555555ad7fb2 in rubbl_casacore::BaseColumn::columnDesc (this=0x7fffc8035b40) at casacore/tables/Tables/BaseColumn.cc:1070
#3  0x0000555555b34958 in rubbl_casacore::TableColumn::columnDesc (this=0x7ffff6822030) at casacore/tables/Tables/TableColumn.cc:124
#4  0x00005555559276ad in table_get_cell (table=..., col_name=..., row_number=52, data=0x7fffc807e330, exc=...) at src/glue.cc:1457
#5  0x00005555556a68b5 in rubbl_casatables::Table::get_cell_as_vec<f64> (self=0x7ffff6824b30, col_name=..., row=52) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/rubbl_casatables-0.6.0/src/lib.rs:1753
#6  0x0000555555752eb7 in marlu::io::ms::tests::test_write_antenna_row () at src/io/ms.rs:2928
#7  0x00005555556f77ba in marlu::io::ms::tests::test_write_antenna_row::{closure#0} () at src/io/ms.rs:2880
#8  0x0000555555896f0e in core::ops::function::FnOnce::call_once<marlu::io::ms::tests::test_write_antenna_row::{closure#0}, ()> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/ops/function.rs:227
...

areas of interest:

glue.cc:1457

    int
    table_get_cell(const GlueTable &table, const StringBridge &col_name,
                   const unsigned long row_number, void *data, ExcInfo &exc)
    {
        try {
            casacore::TableColumn col(table, bridge_string(col_name));
            const casacore::ColumnDesc &desc = col.columnDesc();    # <- segfault happens here.
            casacore::IPosition shape;

casacore/tables/Tables/ColumnDesc.cc:82

ColumnDesc& ColumnDesc::operator= (const ColumnDesc& that)
{
    if (this != &that) {
    if (allocated_p) {
        delete colPtr_p;
    }
    colPtr_p = that.colPtr_p;
    if (colPtr_p != 0) {
        colPtr_p = colPtr_p->clone();
    }
    allocated_p = True;
    }
    return *this;
}

info args

this = 0x7fffc8035b50
that = @0x7ffff6821f90: {static theirMutex = {itsMutex = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' <repeats 39 times>, __align = 0}}, colPtr_p = 0x7fffc8022380, allocated_p = false}
pkgw commented 2 years ago

This looks like it will be fun! It also looks like the crash is in some test code outside of Rubbl, so let me know if/how I can help ...

d3v-null commented 2 years ago

all good, I appreciate the moral support :P

so effectively, both threads 11 and 12 are instantiating the ArrayColumn object for column OFFSET of table /home/dev/Marlu/tests/data/1254670392_avg/1254670392.cotter.none.trunc.ms/ANTENNA at the same address baseTabPtr_p = 0x7fffc804d300 like this

casacore::ArrayColumn<CPPTYPE> col(table, bridge_string(col_name))

and they crash here:

image

the reason it's the same address is it's probably getting it from TableCache.

So what should we do? I guess we have a few options:

... ## Thread 26 (Thread 0x7ffff6e2f640 (LWP 616771) "io::ms::tests::"): ```txt #0 core::ptr::mut_ptr::{impl#0}::is_null (self=0x7fffdc049d10) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/ptr/mut_ptr.rs:39 #1 0x0000555555cc0f97 in core::slice::iter::{impl#172}::next (self=0x7ffff6e10900) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/slice/iter/macros.rs:142 #2 0x0000555555cad967 in core::str::validations::next_code_point> (bytes=0x7ffff6e10900) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/str/validations.rs:42 #3 0x0000555555cbe00f in core::str::iter::{impl#0}::next (self=0x7ffff6e10900) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/str/iter.rs:41 #4 0x0000555555ca6e6d in alloc::vec::spec_from_iter_nested::{impl#0}::from_iter (iterator=...) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/alloc/src/vec/spec_from_iter_nested.rs:23 #5 0x0000555555cab01b in alloc::vec::spec_from_iter::{impl#0}::from_iter (iterator=...) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/alloc/src/vec/spec_from_iter.rs:33 #6 0x0000555555caac97 in alloc::vec::{impl#18}::from_iter (iter=...) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/alloc/src/vec/mod.rs:2486 #7 0x0000555555cbda0b in core::iter::traits::iterator::Iterator::collect> (self=...) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/iter/traits/iterator.rs:1745 #8 0x0000555555cbaec0 in fitsio::tables::{impl#6}::from_str (s=...) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/fitsio-0.19.0/src/tables.rs:571 #9 0x0000555555cb1a5b in core::str::{impl#0}::parse (self=...) at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/str/mod.rs:2221 #10 0x0000555555cbce8f in fitsio::fitsfile::FitsFile::fetch_hdu_info (self=0x7ffff6e14400) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/fitsio-0.19.0/src/fitsfile.rs:398 type_buffer = alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc00c8f0, _marker: core::marker::PhantomData}, cap: 71, alloc: alloc::alloc::Global}, len: 71} name_buffer = alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc00c780, _marker: core::marker::PhantomData}, cap: 71, alloc: alloc::alloc::Global}, len: 71} i = 11 __next = 11 iter = core::ops::range::Range {start: 12, end: 19} column_descriptions = alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc04e240, _marker: core::marker::PhantomData}, cap: 19, alloc: alloc::alloc::Global}, len: 11} num_cols = 19 num_rows = 256 hdu_type = 2 status = 0 #11 0x0000555555bf7300 in fitsio::tables::{impl#14}::read_cell_value<&str> (fits_file=0x7ffff6e14400, name=..., idx=81) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/fitsio-0.19.0/src/tables.rs:109 #12 0x0000555555beaf76 in fitsio::hdu::FitsHdu::read_cell_value (self=0x7ffff6e15270, fits_file=0x7ffff6e14400, name=..., idx=81) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/fitsio-0.19.0/src/hdu.rs:910 #13 0x0000555555bd3040 in mwalib::rfinput::read_cell_value (metafits_fptr=0x7ffff6e14400, metafits_tile_table_hdu=0x7ffff6e15270, col_name=..., row=81) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/mwalib-0.11.0/src/rfinput/mod.rs:362 #14 0x0000555555bd1ef5 in mwalib::rfinput::Rfinput::read_metafits_values (metafits_fptr=0x7ffff6e14400, metafits_tile_table_hdu=0x7ffff6e15270, row=81) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/mwalib-0.11.0/src/rfinput/mod.rs:263 dipole_delays = alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc00c4a0, _marker: core::marker::PhantomData}, cap: 16, alloc: alloc::alloc::Global}, len: 16} digital_gains = alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc00c710, _marker: core::marker::PhantomData}, cap: 24, alloc: alloc::alloc::Global}, len: 24} flag = 0 height_m = 377.29098510742188 east_m = -78.697998046875 north_m = 258.43099975585938 length_string = alloc::string::String {vec: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc049910, _marker: core::marker::PhantomData}, cap: 10, alloc: alloc::alloc::Global}, len: 10}} pol = mwalib::rfinput::Pol::X tile_name = alloc::string::String {vec: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc00bfc0, _marker: core::marker::PhantomData}, cap: 7, alloc: alloc::alloc::Global}, len: 7}} tile_id = 14 antenna = 3 input = 81 #15 0x0000555555bd2a28 in mwalib::rfinput::Rfinput::populate_rf_inputs (num_inputs=256, metafits_fptr=0x7ffff6e14400, metafits_tile_table_hdu=..., coax_v_factor=1.204) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/mwalib-0.11.0/src/rfinput/mod.rs:319 input = 81 __next = 81 iter = core::ops::range::Range {start: 82, end: 256} rf_inputs = alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc0008d0, _marker: core::marker::PhantomData}, cap: 256, alloc: alloc::alloc::Global}, len: 81} #16 0x0000555555c08177 in mwalib::metafits_context::MetafitsContext::new_internal (metafits=0x7ffff6e20e68) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/mwalib-0.11.0/src/metafits_context/mod.rs:539 num_antennas = 128 num_rf_inputs = 256 good_time_unix_ms = 1570635178000 quack_time_duration_ms = 4000 obsid = 1254670392 metafits_tile_table_hdu = fitsio::hdu::FitsHdu {info: fitsio::hdu::HduInfo::TableInfo{column_descriptions: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc04f080, _marker: core::marker::PhantomData}, cap: 19, alloc: alloc::alloc::Global}, len: 19}, num_rows: 256}, number: 1} metafits_hdu = fitsio::hdu::FitsHdu {info: fitsio::hdu::HduInfo::ImageInfo{shape: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x8, _marker: core::marker::PhantomData}, cap: 0, alloc: alloc::alloc::Global}, len: 0}, image_type: fitsio::images::ImageType::UnsignedByte}, number: 0} metafits_fptr = fitsio::fitsfile::FitsFile {filename: alloc::string::String {vec: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc04a3e0, _marker: core::marker::PhantomData}, cap: 45, alloc: alloc::alloc::Global}, len: 45}}, open_mode: fitsio::fitsfile::FileOpenMode::READONLY, fptr: core::ptr::non_null::NonNull {pointer: 0x7fffdc04c000}} metafits_filename = alloc::string::String {vec: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc04db00, _marker: core::marker::PhantomData}, cap: 45, alloc: alloc::alloc::Global}, len: 45}} #17 0x0000555555bfe6ad in mwalib::correlator_context::CorrelatorContext::new (metafits_filename=0x7ffff6e20e68, gpubox_filenames=...) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/mwalib-0.11.0/src/correlator_context/mod.rs:139 #18 0x0000555555820546 in marlu::io::ms::tests::test_write_vis_from_mwalib () at src/io/ms.rs:4730 array_pos = core::option::Option::Some(marlu::pos::earth::LatLngHeight {longitude_rad: 2.0362897754687257, latitude_rad: -0.46606083776035967, height_metres: 377}) table_path = std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc04fa80, _marker: core::marker::PhantomData}, cap: 30, alloc: alloc::alloc::Global}, len: 23}}}} temp_dir = tempfile::dir::TempDir {path: core::option::Option::Some(std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffdc04bdf0, _marker: core::marker::PhantomData}, cap: 16, alloc: alloc::alloc::Global}, len: 15}}}})} #19 0x00005555556f7b0a in marlu::io::ms::tests::test_write_vis_from_mwalib::{closure#0} () at src/io/ms.rs:4719 #20 0x0000555555896afe in core::ops::function::FnOnce::call_once () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/ops/function.rs:227 #21 0x00005555558d17c3 in core::ops::function::FnOnce::call_once () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/ops/function.rs:227 #22 test::__rust_begin_short_backtrace () at library/test/src/lib.rs:578 #23 0x00005555558d0224 in alloc::boxed::{impl#44}::call_once<(), (dyn core::ops::function::FnOnce<(), Output=()> + core::marker::Send), alloc::alloc::Global> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/alloc/src/boxed.rs:1636 #24 core::panic::unwind_safe::{impl#23}::call_once<(), alloc::boxed::Box<(dyn core::ops::function::FnOnce<(), Output=()> + core::marker::Send), alloc::alloc::Global>> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/panic/unwind_safe.rs:271 #25 std::panicking::try::do_call + core::marker::Send), alloc::alloc::Global>>, ()> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/panicking.rs:403 #26 std::panicking::try<(), core::panic::unwind_safe::AssertUnwindSafe + core::marker::Send), alloc::alloc::Global>>> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/panicking.rs:367 #27 std::panic::catch_unwind + core::marker::Send), alloc::alloc::Global>>, ()> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/panic.rs:129 #28 test::run_test_in_process () at library/test/src/lib.rs:601 #29 test::run_test::run_test_inner::{closure#0} () at library/test/src/lib.rs:493 #30 0x000055555589dd7e in test::run_test::run_test_inner::{closure#1} () at library/test/src/lib.rs:520 #31 std::sys_common::backtrace::__rust_begin_short_backtrace () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/sys_common/backtrace.rs:125 #32 0x00005555558a25d8 in std::thread::{impl#0}::spawn_unchecked::{closure#0}::{closure#0} () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/thread/mod.rs:481 #33 core::panic::unwind_safe::{impl#23}::call_once<(), std::thread::{impl#0}::spawn_unchecked::{closure#0}::{closure#0}> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/panic/unwind_safe.rs:271 #34 std::panicking::try::do_call, ()> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/panicking.rs:403 #35 std::panicking::try<(), core::panic::unwind_safe::AssertUnwindSafe> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/panicking.rs:367 #36 std::panic::catch_unwind, ()> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/panic.rs:129 #37 std::thread::{impl#0}::spawn_unchecked::{closure#0} () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/std/src/thread/mod.rs:480 #38 core::ops::function::FnOnce::call_once () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/core/src/ops/function.rs:227 #39 0x00005555561d1eb3 in alloc::boxed::{impl#44}::call_once<(), dyn core::ops::function::FnOnce<(), Output=()>, alloc::alloc::Global> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/alloc/src/boxed.rs:1636 #40 alloc::boxed::{impl#44}::call_once<(), alloc::boxed::Box, alloc::alloc::Global>, alloc::alloc::Global> () at /rustc/09c42c45858d5f3aedfa670698275303a3d19afa/library/alloc/src/boxed.rs:1636 #41 std::sys::unix::thread::{impl#2}::new::thread_start () at library/std/src/sys/unix/thread.rs:106 #42 0x00007ffff7d5b259 in start_thread () from /usr/lib/libpthread.so.0 No symbol table info available. #43 0x00007ffff7b395e3 in clone () from /usr/lib/libc.so.6 No symbol table info available. ``` ## Thread 12 (Thread 0x7ffff662b640 (LWP 616757) "io::ms::tests::"): - `casacore/tables/Tables/ColumnDesc.cc:86` -> `colPtr_p = colPtr_p->clone();` ```txt #0 0x0000555555ae00f0 in rubbl_casacore::ColumnDesc::operator= (this=0x7fffc8022f20, that=...) at casacore/tables/Tables/ColumnDesc.cc:86 #1 0x0000555555ad7fb2 in rubbl_casacore::BaseColumn::columnDesc (this=0x7fffc8022f10) at casacore/tables/Tables/BaseColumn.cc:1070 #2 0x0000555555b34958 in rubbl_casacore::TableColumn::columnDesc (this=0x7ffff6620460) at casacore/tables/Tables/TableColumn.cc:124 #3 0x000055555592743a in table_get_cell_info (table=..., col_name=..., row_number=3, data_type=0x7ffff66205b0, n_dim=0x7ffff66205b4, dims=0x7ffff66205b8, exc=...) at src/glue.cc:1424 col = {_vptr.TableColumn = 0x5555565380f8 , baseTabPtr_p = 0x7fffc804d300, baseColPtr_p = 0x7fffc8022f10, colCachePtr_p = 0x7fffc807e880, canChangeShape_p = false, isColWritable_p = true} desc = @0x555555ca439d: {colPtr_p = 0x2454894824048948, allocated_p = 8} #4 0x00005555556a6688 in rubbl_casatables::Table::get_cell_as_vec (self=0x7ffff6623ae0, col_name=..., row=3) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/rubbl_casatables-0.6.0/src/lib.rs:1726 dims = [0, 0, 0, 0, 0, 0, 0, 0] n_dim = 0 data_type = rubbl_casatables::glue::GlueDataType::TpOther ccol_name = rubbl_casatables::glue::StringBridge {data: 0x7fffcc07fc50, n_bytes: 6} #5 0x0000555555762242 in marlu::io::ms::tests::test_write_antenna_row_mwa () at src/io/ms.rs:2985 row_idx = 3 __next = 3 iter = core::ops::range::Range {start: 4, end: 128} col_desc = rubbl_casatables::ColumnDescription {name: alloc::string::String {vec: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffcc018440, _marker: core::marker::PhantomData}, cap: 6, alloc: alloc::alloc::Global}, len: 6}}, data_type: rubbl_casatables::glue::GlueDataType::TpDouble, is_scalar: false, is_fixed_shape: true, shape: core::option::Option>::Some(alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffcc035ba0, _marker: core::marker::PhantomData}, cap: 4, alloc: alloc::alloc::Global}, len: 1}), keywords: rubbl_casatables::TableRecord {handle: 0x7fffcc0360d0, exc_info: rubbl_casatables::glue::ExcInfo {message: [0 ]}}} col_name = 0x7fffcc05cc60 __next = 0x7fffcc05cc60 iter = core::slice::iter::Iter {ptr: core::ptr::non_null::NonNull {pointer: 0x7fffcc05cc78}, end: 0x7fffcc05cd98, _marker: core::marker::PhantomData<&alloc::string::String>} expected_table = rubbl_casatables::Table {handle: 0x7fffcc021450, exc_info: rubbl_casatables::glue::ExcInfo {message: [0 ]}} ant_table = rubbl_casatables::Table {handle: 0x7fffcc021580, exc_info: rubbl_casatables::glue::ExcInfo {message: [0 ]}} ant_table = rubbl_casatables::Table {handle: 0x7fffcc057240, exc_info: rubbl_casatables::glue::ExcInfo {message: [0 ]}} ant_table_path = std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffcc052620, _marker: core::marker::PhantomData}, cap: 46, alloc: alloc::alloc::Global}, len: 31}}}} ms_writer = marlu::io::ms::MeasurementSetWriter {path: std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffcc000cf0, _marker: core::marker::PhantomData}, cap: 23, alloc: alloc::alloc::Global}, len: 23}}}}, phase_centre: marlu::pos::radec::RADec {ra: 0, dec: -0.47123889803846897}, array_pos: marlu::pos::earth::LatLngHeight {longitude_rad: 2.0362898668561042, latitude_rad: -0.46606084483863941, height_metres: 377.827}} phase_centre = marlu::pos::radec::RADec {ra: 0, dec: -0.47123889803846897} table_path = std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffcc000ee0, _marker: core::marker::PhantomData}, cap: 30, alloc: alloc::alloc::Global}, len: 23}}}} temp_dir = tempfile::dir::TempDir {path: core::option::Option::Some(std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffcc000ec0, _marker: core::marker::PhantomData}, cap: 16, alloc: alloc::alloc::Global}, len: 15}}}})} #6 0x00005555556f77da in marlu::io::ms::tests::test_write_antenna_row_mwa::{closure#0} () at src/io/ms.rs:2933 ``` ```txt -exec frame 0 -exec p *this $7 = {static theirMutex = {itsMutex = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' , __align = 0}}, colPtr_p = 0x7fffc8021cb0, allocated_p = true} -exec p that $4 = (const rubbl_casacore::ColumnDesc &) @0x7ffff66203b0: {static theirMutex = {itsMutex = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = '\000' , __align = 0}}, colPtr_p = 0x7fffc8021cb0, allocated_p = false} -exec p *that.colPtr_p $2 = {_vptr.BaseColumnDesc = 0x7ff833fb66a1, colName_p = {, std::allocator >> = "\005\000\006\000\a", static npos = 18446744073709551615}, comment_p = {, std::allocator >> = "#o\375\063\370\177\000\000\320\b\000\314\377\177\000\000ount to FEED REFERENCE point", static npos = 18446744073709551615}, dataManType_p = {, std::allocator >> = "StandardStMan", static npos = 18446744073709551615}, dataManGroup_p = {, std::allocator >> = "StandardStMan", static npos = 18446744073709551615}, dtype_p = rubbl_casacore::TpDouble, dtypeId_p = {, std::allocator >> = "double ", static npos = 18446744073709551615}, option_p = 5, nrdim_p = 1, shape_p = {size_p = 1, buffer_p = {3, 0, 0, 140736548969832}, data_p = 0x7fffc8021d70}, maxLength_p = 0, keySetPtr_p = 0x7fffc8021c60, isScalar_p = false, isArray_p = true, isTable_p = false} -exec frame 3 -exec x/s col_name.data 0x7fffcc07fc50: "OFFSET" -exec p table $10 = (const rubbl_casacore::Table &) @0x7fffcc021450: {_vptr.Table = 0x5555565380c8 , baseTabPtr_p = 0x7fffc804d300, isCounted_p = true, lastModCounter_p = 0, static scratchCallback_p = 0x0} -exec p *table.baseTabPtr_p $12 = {_vptr.BaseTable = 0x555556534600 , nrlink_p = 2, nrrow_p = 128, nrrowToAdd_p = 0, tdescPtr_p = 0x7fffc8049d00, name_p = {, std::allocator >> = "/home/dev/Marlu/tests/data/1254670392_avg/1254670392.cotter.none.trunc.ms/ANTENNA", static npos = 18446744073709551615}, option_p = 1, noWrite_p = false, delete_p = false, info_p = {type_p = {, std::allocator >> = "", static npos = 18446744073709551615}, subType_p = {, std::allocator >> = "", static npos = 18446744073709551615}, readme_p = {, std::allocator >> = "", static npos = 18446744073709551615}, writeIt_p = true}, madeDir_p = true, itsTraceId = -1} ``` ## Thread 11 (Thread 0x7ffff682c640 (LWP 616756) "io::ms::tests::"): - `casacore/tables/Tables/ColumnDesc.cc:82` -> `delete colPtr_p;` ```txt #0 0x0000555555ae00b9 in rubbl_casacore::ColumnDesc::operator= (this=0x7fffc8022f20, that=...) at casacore/tables/Tables/ColumnDesc.cc:82 #1 0x0000555555ad7fb2 in rubbl_casacore::BaseColumn::columnDesc (this=0x7fffc8022f10) at casacore/tables/Tables/BaseColumn.cc:1070 #2 0x0000555555aaa191 in rubbl_casacore::ArrayColumn::checkDataType (this=0x7ffff6822060) at ./casacore/tables/Tables/ArrayColumn.tcc:130 cd = @0x555555ae9e6e: {colPtr_p = 0xf8558b4820c08348, allocated_p = 100} dtype = 32767 #3 0x0000555555aa9b56 in rubbl_casacore::ArrayColumn::ArrayColumn (this=0x7ffff6822060, tab=..., columnName=...) at ./casacore/tables/Tables/ArrayColumn.tcc:69 #4 0x00005555559282c7 in table_get_cell (table=..., col_name=..., row_number=74, data=0x7fffc807e330, exc=...) at src/glue.cc:1500 col = { = {_vptr.TableColumn = 0x555556531178 +16>, baseTabPtr_p = 0x7fffc804d300, baseColPtr_p = 0x7fffc8022f10, colCachePtr_p = 0x7fffc807e880, canChangeShape_p = false, isColWritable_p = true}, canAccessSlice_p = false, canAccessColumn_p = false, canAccessColumnSlice_p = false, reaskAccessSlice_p = true, reaskAccessColumn_p = true, reaskAccessColumnSlice_p = true} array = { = {_vptr.ArrayBase = 0x7ffff68220d0, nels_p = 6, ndimen_p = 1397114447, contiguous_p = 69, length_p = {size_p = 1442255989, buffer_p = {0, 140737329111240, 24, 140737329111728}, data_p = 0x3}, inc_p = {size_p = 4135723696, buffer_p = {0, 24, 8, 140736549348144}, data_p = 0x18}, originalLength_p = {size_p = 3355960112, buffer_p = {24, 93824999900061, 140736549348144, 3}, data_p = 0x1}, steps_p = {size_p = 3, buffer_p = {0, 93824999918788, 140736549348144, 0}, data_p = 0x7fffc807e330}}, data_p = {pointerRep_p = std::shared_ptr> (use count 3, weak count -1) = {get() = 0x55555591b2fd +29>}}, begin_p = 0x7fffc807e330, end_p = 0x7fffc807e330} col = {_vptr.TableColumn = 0x5555565380f8 , baseTabPtr_p = 0x7fffc804d300, baseColPtr_p = 0x7fffc8022f10, colCachePtr_p = 0x7fffc807e880, canChangeShape_p = false, isColWritable_p = true} desc = @0x7fffc8022f20: {colPtr_p = 0x7fffc8021cb0, allocated_p = true} shape = {size_p = 1, buffer_p = {3, 140736549348144, 3, 24}, data_p = 0x7ffff6822098} #5 0x00005555556a68b5 in rubbl_casatables::Table::get_cell_as_vec (self=0x7ffff6824b30, col_name=..., row=74) at /home/dev/.cargo/registry/src/github.com-1ecc6299db9ec823/rubbl_casatables-0.6.0/src/lib.rs:1753 result = alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffc807e330, _marker: core::marker::PhantomData}, cap: 3, alloc: alloc::alloc::Global}, len: 0} n_items = 3 rv = 0 dims = [3, 0, 0, 0, 0, 0, 0, 0] n_dim = 1 data_type = rubbl_casatables::glue::GlueDataType::TpDouble ccol_name = rubbl_casatables::glue::StringBridge {data: 0x5555562024f7, n_bytes: 6} #6 0x0000555555752eb7 in marlu::io::ms::tests::test_write_antenna_row () at src/io/ms.rs:2928 row_idx = 74 __next = 74 iter = core::ops::range::Range {start: 75, end: 128} col_desc = rubbl_casatables::ColumnDescription {name: alloc::string::String {vec: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffc807e2c0, _marker: core::marker::PhantomData}, cap: 6, alloc: alloc::alloc::Global}, len: 6}}, data_type: rubbl_casatables::glue::GlueDataType::TpDouble, is_scalar: false, is_fixed_shape: true, shape: core::option::Option>::Some(alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffc801a5f0, _marker: core::marker::PhantomData}, cap: 4, alloc: alloc::alloc::Global}, len: 1}), keywords: rubbl_casatables::TableRecord {handle: 0x7fffc801a4b0, exc_info: rubbl_casatables::glue::ExcInfo {message: [0 ]}}} col_name = "OFFSET" __next = "OFFSET" iter = core::array::iter::IntoIter<&str, 8> {data: [core::mem::maybe_uninit::MaybeUninit<&str> {uninit: (), value: core::mem::manually_drop::ManuallyDrop<&str> {value: "OFFSET"}}, core::mem::maybe_uninit::MaybeUninit<&str> {uninit: (), value: core::mem::manually_drop::ManuallyDrop<&str> {value: "POSITION"}}, core::mem::maybe_uninit::MaybeUninit<&str> {uninit: (), value: core::mem::manually_drop::ManuallyDrop<&str> {value: "TYPE"}}, core::mem::maybe_uninit::MaybeUninit<&str> {uninit: (), value: core::mem::manually_drop::ManuallyDrop<&str> {value: "DISH_DIAMETER"}}, core::mem::maybe_uninit::MaybeUninit<&str> {uninit: (), value: core::mem::manually_drop::ManuallyDrop<&str> {value: "FLAG_ROW"}}, core::mem::maybe_uninit::MaybeUninit<&str> {uninit: (), value: core::mem::manually_drop::ManuallyDrop<&str> {value: "MOUNT"}}, core::mem::maybe_uninit::MaybeUninit<&str> {uninit: (), value: core::mem::manually_drop::ManuallyDrop<&str> {value: "NAME"}}, core::mem::maybe_uninit::MaybeUninit<&str> {uninit: (), value: core::mem::manually_drop::ManuallyDrop<&str> {value: "STATION"}}], alive: core::ops::range::Range {start: 1, end: 8}} expected_table = rubbl_casatables::Table {handle: 0x7fffc8016bc0, exc_info: rubbl_casatables::glue::ExcInfo {message: [0 ]}} ant_table = rubbl_casatables::Table {handle: 0x7fffc801cda0, exc_info: rubbl_casatables::glue::ExcInfo {message: [0 ]}} ant_table_path = std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffc80245c0, _marker: core::marker::PhantomData}, cap: 46, alloc: alloc::alloc::Global}, len: 31}}}} ms_writer = marlu::io::ms::MeasurementSetWriter {path: std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffc8000cf0, _marker: core::marker::PhantomData}, cap: 23, alloc: alloc::alloc::Global}, len: 23}}}}, phase_centre: marlu::pos::radec::RADec {ra: 0, dec: -0.47123889803846897}, array_pos: marlu::pos::earth::LatLngHeight {longitude_rad: 2.0362898668561042, latitude_rad: -0.46606084483863941, height_metres: 377.827}} phase_centre = marlu::pos::radec::RADec {ra: 0, dec: -0.47123889803846897} table_path = std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffc8000ee0, _marker: core::marker::PhantomData}, cap: 30, alloc: alloc::alloc::Global}, len: 23}}}} temp_dir = tempfile::dir::TempDir {path: core::option::Option::Some(std::path::PathBuf {inner: std::ffi::os_str::OsString {inner: std::sys::unix::os_str::Buf {inner: alloc::vec::Vec {buf: alloc::raw_vec::RawVec {ptr: core::ptr::unique::Unique {pointer: 0x7fffc8000ec0, _marker: core::marker::PhantomData}, cap: 16, alloc: alloc::alloc::Global}, len: 15}}}})} #7 0x00005555556f77ba in marlu::io::ms::tests::test_write_antenna_row::{closure#0} () at src/io/ms.rs:2880 ``` ```txt -exec frame 3 -exec info args this = 0x7ffff6822060 tab = @0x7fffc8016bc0: {_vptr.Table = 0x5555565380c8 , baseTabPtr_p = 0x7fffc804d300, isCounted_p = true, lastModCounter_p = 0, static scratchCallback_p = 0x0} columnName = @0x7ffff68220c0: {, std::allocator >> = "OFFSET", static npos = 18446744073709551615} ```
pkgw commented 2 years ago

Sigh. I guess we're going to need some kind of global mutex — I think it's unacceptable to have known usage patterns that can cause crashes.

Coming from there, I suspect that your third suggestion is the best way to go — if the table cache is causing problems, and I suspect that it is, I think that we'll want to enforce single-threaded access in a way that tracks the cache's indexing. I'd guess that a mutex restricted to Columns in particular might solve the current problem but that there would be other similar problems cropping up with other use cases.

Whatever the solution is, we should make sure to describe it well in the API docs in the appropriate places.