timescale / timescaledb

An open-source time-series SQL database optimized for fast ingest and complex queries. Packaged as a PostgreSQL extension.
https://www.timescale.com/
Other
17.95k stars 883 forks source link

[Bug]: Background worker-related assertion hit in CI #4068

Open erimatnor opened 2 years ago

erimatnor commented 2 years ago

What type of bug is this?

Crash

What subsystems and features are affected?

Background worker

What happened?

A test failed on a code assertion when running in CI: https://github.com/timescale/timescaledb/runs/5109972827?check_suite_focus=true

TimescaleDB version affected

2.6.0-dev

PostgreSQL version used

12.9

What operating system did you use?

Ubuntu 20.04

What installation method did you use?

Source

What platform did you run on?

Not applicable

Relevant log output and stack trace

[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
50  ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
Core was generated by `postgres: ts_bgw_db_scheduler_test_main                                       '.
Program terminated with signal SIGABRT, Aborted.
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
(gdb) #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
        set = {__val = {0, 140732841379872, 0, 1, 9, 12766376462930762752, 
            1024, 1, 94721280771328, 0, 94721280771600, 1, 94721280771592, 
            94721247749794, 94721208745996, 94721247748085}}
        pid = <optimized out>
        tid = <optimized out>
        ret = <optimized out>
#1  0x00007fd13cf84859 in __GI_abort () at abort.c:79
        save_stage = 1
        act = {__sigaction_handler = {sa_handler = 0x56260283c078, 
            sa_sigaction = 0x56260283c078}, sa_mask = {__val = {
              94721250933536, 1126, 94721250933536, 2305843013156653600, 
              12766376462930762752, 2305843013156653744, 
              12766376462930762752, 64, 94721247857206, 140732841380560, 
              12766376462930762752, 94721280771552, 2, 94721280771552, 
              140536649659840, 140536649663648}}, sa_flags = 1023296838, 
          sa_restorer = 0x5626044a5a30}
        sigs = {__val = {32, 0 <repeats 15 times>}}
#2  0x00005626026881fa in ExceptionalCondition (
    conditionName=conditionName@entry=0x56260283c078 "!(handle->slot < max_worker_processes)", errorType=errorType@entry=0x5626026dd01d "FailedAssertion", 
    fileName=fileName@entry=0x56260283bb20 "bgworker.c", 
    lineNumber=lineNumber@entry=1126) at assert.c:54
No locals.
#3  0x00005626024c6503 in GetBackgroundWorkerPid (handle=<optimized out>, 
    pidp=<optimized out>) at bgworker.c:1126
        slot = <optimized out>
        pid = <optimized out>
#4  GetBackgroundWorkerPid (handle=<optimized out>, pidp=<optimized out>)
    at bgworker.c:1121
        slot = <optimized out>
        pid = <optimized out>
#5  0x00005626024c6603 in WaitForBackgroundWorkerShutdown (
    handle=0x562604505cc0) at bgworker.c:1232
        pid = 16809982
        status = <optimized out>
        rc = <optimized out>
#6  0x00007fd1331dd5cd in mock_wait (until=1501000000)
    at /home/runner/work/timescaledb/timescaledb/test/src/bgw/timer_mock.c:56
        __func__ = "mock_wait"
#7  0x00007fd1331970ca in ts_timer_wait (until=1501000000)
    at /home/runner/work/timescaledb/timescaledb/src/bgw/timer.c:106
No locals.
#8  0x00007fd1331964be in ts_bgw_scheduler_process (
    run_for_interval_ms=2000000, 
    bgw_register=0x7fd1331dd427 <ts_timer_mock_register_bgw_handle>)
    at /home/runner/work/timescaledb/timescaledb/src/bgw/scheduler.c:768
        next_wakeup = 1501000000
        start = 0
        quit_time = 2000000000
        __func__ = "ts_bgw_scheduler_process"
#9  0x00007fd1331de110 in ts_bgw_db_scheduler_test_main (fcinfo=0xa582)
    at /home/runner/work/timescaledb/timescaledb/test/src/bgw/scheduler_mock.c:144
        db_oid = 42370
        ttl = 2000000
        user_oid = 16386
        __func__ = "ts_bgw_db_scheduler_test_main"
#10 0x00005626024c5fd2 in StartBackgroundWorker () at bgworker.c:911
        local_sigjmp_buf = {{__jmpbuf = {94721280730592, 
              -3927922126142007946, 0, 94721252424256, 94721280693680, 
              94721280727600, -7292197359484103306, -3927917664171551370}, 
            __mask_was_saved = 1, __saved_mask = {__val = {
                18446744066192964103, 0, 94721280385584, 94721252424256, 0, 
                94721280727600, 94721249314022, 94721280385584, 0, 
                94721280385584, 94721249341417, 94721280385800, 
                94721249313832, 94721252424256, 94721280730592, 0}}}}
        worker = 0x56260444c8d0
        entrypt = <optimized out>
        __func__ = "StartBackgroundWorker"
#11 0x00005626024d3943 in do_start_bgworker (rw=<optimized out>)
    at postmaster.c:5876
        worker_pid = <optimized out>
        worker_pid = <optimized out>
        __func__ = "do_start_bgworker"
        __errno_location = <optimized out>
        __errno_location = <optimized out>
#12 maybe_start_bgworkers () at postmaster.c:6101
        rw = <optimized out>
        num_launched = 0
        now = 0
        iter = {cur = 0x5626044a65e0, next = 0x5626044a1170, 
          prev = 0x5626029b7c80 <BackgroundWorkerList>}
#13 0x00005626024d4302 in sigusr1_handler (
    postgres_signal_arg=<optimized out>) at postmaster.c:5238
        save_errno = 4
        __func__ = "sigusr1_handler"
#14 <signal handler called>
No locals.
#15 0x00007fd13d0770da in __GI___select (nfds=nfds@entry=6, 
    readfds=readfds@entry=0x7ffeeb04daf0, writefds=writefds@entry=0x0, 
    exceptfds=exceptfds@entry=0x0, timeout=timeout@entry=0x7ffeeb04da50)
    at ../sysdeps/unix/sysv/linux/select.c:41
        resultvar = 18446744073709551612
        sc_ret = <optimized out>
#16 0x00005626024d4b5e in ServerLoop () at postmaster.c:1689
        timeout = {tv_sec = 59, tv_usec = 943137}
        rmask = {fds_bits = {48, 0 <repeats 15 times>}}
        selres = <optimized out>
        now = <optimized out>
        readmask = {fds_bits = {48, 0 <repeats 15 times>}}
        nSockets = 6
        last_lockfile_recheck_time = 1644327796
        last_touch_time = 1644327796
        __func__ = "ServerLoop"
#17 0x00005626024d678c in PostmasterMain (argc=argc@entry=8, 
    argv=argv@entry=0x56260444b500) at postmaster.c:1398
        opt = <optimized out>
        status = <optimized out>
        userDoption = <optimized out>
        listen_addr_saved = <optimized out>
        i = <optimized out>
        output_config_variable = <optimized out>
        __func__ = "PostmasterMain"
#18 0x00005626021fbfdd in main (argc=8, argv=0x56260444b500) at main.c:228
        do_check_root = <optimized out>
(gdb) quit
           PID: 11026 (postgres)
           UID: 1001 (runner)
           GID: 121 (docker)
        Signal: 6 (ABRT)
     Timestamp: Tue 2022-02-08 13:43:43 UTC (1h 2min ago)
  Command Line: postgres: ts_bgw_db_scheduler_test_main
    Executable: /home/runner/postgresql/bin/postgres
 Control Group: /system.slice/runner-provisioner.service
          Unit: runner-provisioner.service
         Slice: system.slice
       Boot ID: 07c7abd0bcc446aaa552b2a5f962d0f1
    Machine ID: 508c9fc7249941328eb8bb00fe4e8883
      Hostname: fv-az213-941
       Storage: /var/lib/systemd/coredump/core.postgres.1001.07c7abd0bcc446aaa552b2a5f962d0f1.11026.1644327823000000000000.lz4
       Message: Process 11026 (postgres) of user 1001 dumped core.

                Stack trace of thread 11026:
                #0  0x00007fd13cfa518b __GI_raise (libc.so.6 + 0x4618b)
                #1  0x00007fd13cf84859 __GI_abort (libc.so.6 + 0x25859)
                #2  0x00005626026881fa n/a (/home/runner/postgresql/bin/postgres + 0x5461fa)
                #3  0x04560fd0b12b47c0 n/a (n/a + 0x0)

### How can we reproduce the bug?

```bash
Seems to be a rare one-off so probably hard to reproduce.
horzsolt commented 2 years ago

Please increase the priority if you find another occurence of this issue.

akuzm commented 2 years ago

Still happening: https://github.com/timescale/timescaledb/actions/runs/3266582518/jobs/5370526465