matrixorigin / matrixone

Hyperconverged cloud-edge native database
https://docs.matrixorigin.cn/en
Apache License 2.0
1.79k stars 277 forks source link

[Bug]:ut TestInProgressTransfer failed #20282

Open daviszhen opened 9 hours ago

daviszhen commented 9 hours ago

Is there an existing issue for the same bug?

Branch Name

main

Commit ID

pr commit

Other Environment Information

- Hardware parameters:
- OS type:
- Others:

Actual Behavior

============================================
[TEST]: TestInProgressTransfer
[START_AT]: 2024-11-21T11:22:32Z
[END_AT]: 2024-11-21T11:22:34Z
[RESULT]: fail
[ELAPSED]: 1.70
[OUTPUTS]:
=== RUN   TestInProgressTransfer
2024/11/21 11:22:32.[37](https://github.com/matrixorigin/matrixone/actions/runs/11951813343/job/33316228119?pr=20214#step:10:38)1444 +0000 INFO db/open.go:74 open-tae {"operation": "Start", "operand": "open"}
2024/11/21 11:22:32.372542 +0000 INFO db/open.go:101 open-tae {"operation": "Config", "toml": "BulkTomestoneTxnThreshold = 10000\nMaxMessageSize = 0\nTransferTableTTL = \"1m0s\"\nIncrementalDedup = false\nIsStandalone = false\nLogStoreT = \"batchstore\"\n\n[storage-cfg]\n  block-max-rows = 8192\n  object-max-blocks = 256\n\n[checkpoint-cfg]\n  flush-inerterval = \"1m0s\"\n  transfer-interval = \"5s\"\n  metadata-check-inerterval = \"0s\"\n  checkpoint-min-count = 100000000\n  scan-interval = \"1h0m0s\"\n  checkpoint-incremental-interval = \"1h0m0s\"\n  checkpoint-global-interval = 10000000\n  overall-flush-mem-control = 1073741824\n  ForceUpdateGlobalInterval = false\n  GlobalVersionInterval = \"1h0m0s\"\n  GCCheckpointInterval = \"1m0s\"\n  DisableGCCheckpoint = false\n  ReservedWALEntryCount = 0\n  BlockRows = 10\n  Size = 0\n\n[scheduler-cfg]\n  io-workers = 16\n  async-workers = 2\n\n[gc-cfg]\n  gc-ttl = \"1h0m0s\"\n  scan-gc-interval = \"30m0s\"\n  disable-gc = false\n  check-gc = false\n  cache-size = 0\n  gc-merge-count = 40\n  gc-estimate-rows = 0\n  gc-probility = 0.0\n  gc-delete-timeout = \"10m0s\"\n  gc-delete-batch-size = 1000\n\n[LogtailCfg]\n  page-size = 256\n\n[MergeCfg]\n  CNMergeMemControlHint = 8589934592\n  CNTakeOverAll = false\n  CNTakeOverExceed = 8[38](https://github.com/matrixorigin/matrixone/actions/runs/11951813343/job/33316228119?pr=20214#step:10:39)86080000\n  CNStandaloneTake = false\n  DisableZMBasedMerge = false\n\n[CatalogCfg]\n  GCInterval = \"3m0s\"\n  DisableGC = false\n"}
2024/11/21 11:22:32.381185 +0000 INFO checkpoint/replay.go:353 open-tae {"replay": "checkpoint-catalog", "cost": "530ns"}

2024/11/21 11:22:33.841380 +0000 INFO jobs/flushTableTail.go:438 [FLUSH-END] {"task": "[FT-88]16385-test", "aobj-deletes": 1, "aobj-merge-rows": 1, "tombstone-rows": 0, "duration": "5.821285ms", "extra-info": {"a-objs": "e5ec2b1e52b9_0,", "a-tombstones": "", "to-objs": "a9ea2f938a77_0,"}}
2024/11/21 11:22:34.045120 +0000 INFO rpc/handle.go:961 op2 {"txn": "ctx[CBF29CE4842223251809F7F27305851C][1732188153993971069-1->9223372036854775807-4294967295][Active]: <nil>", "pk": "[1]: ", "rowid": "01934e75-a7ec-73db-b9d3-a9ea2f938a77-0-0-0"}
2024/11/21 11:22:34.046495 +0000 WARN rpc/handle.go:441 HandleCommit-SLOW-LOG {"commit-latency": "1.480696ms", "cn-txn": "cbf29ce4842223251809f7f27305851c/Active/S:1732188153993971069-1", "tn-txn": "ctx[CBF29CE4842223251809F7F27305851C][1732188153993971069-1->1732188154045585822-0][Committed]: <nil>"}
==================
WARNING: DATA RACE
Write at 0x00000a439f80 by goroutine 2172:
  github.com/matrixorigin/matrixone/pkg/util/fault.stopFaultMap()
      /home/runner/work/matrixone/matrixone/pkg/util/fault/fault.go:165 +0xf2
  github.com/matrixorigin/matrixone/pkg/util/fault.Disable()
      /home/runner/work/matrixone/matrixone/pkg/util/fault/fault.go:179 +0x38
  runtime.deferreturn()
      /opt/hostedtoolcache/go/1.23.0/x64/src/runtime/panic.go:605 +0x5d
  testing.tRunner()
      /opt/hostedtoolcache/go/1.23.0/x64/src/testing/testing.go:1690 +0x226
  testing.(*T).Run.gowrap1()
      /opt/hostedtoolcache/go/1.23.0/x64/src/testing/testing.go:1743 +0x44

Previous read at 0x00000a439f80 by goroutine 2282:
  github.com/matrixorigin/matrixone/pkg/util/fault.TriggerFault()
      /home/runner/work/matrixone/matrixone/pkg/util/fault/fault.go:197 +0x117
  github.com/matrixorigin/matrixone/pkg/objectio.PartitionStateInjected()
      /home/runner/work/matrixone/matrixone/pkg/objectio/injects.go:157 +0x12a
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/logtailreplay.(*PartitionState).HandleLogtailEntry()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtailreplay/partition_state.go:[128](https://github.com/matrixorigin/matrixone/actions/runs/11951813343/job/33316228119?pr=20214#step:10:129) +0xf1
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.consumeEntry()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail.go:51 +0x2fa
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.consumeLogTail()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:1976 +0x1af
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.updatePartitionOfPush()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:1906 +0x91e
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*Engine).consumeUpdateLogTail()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:1844 +0x[132](https://github.com/matrixorigin/matrixone/actions/runs/11951813343/job/33316228119?pr=20214#step:10:133)
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.cmdToConsumeLog.action()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:1812 +0x1a
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*cmdToConsumeLog).action()
      <autogenerated>:1 +0xe4
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*PushClient).createRoutineToConsumeLogTails.func1()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:1750 +0x16a
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*PushClient).createRoutineToConsumeLogTails.gowrap1()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:1772 +0x82

Goroutine 2172 (running) created at:
  testing.(*T).Run()
      /opt/hostedtoolcache/go/1.23.0/x64/src/testing/testing.go:1743 +0x825
  testing.runTests.func1()
      /opt/hostedtoolcache/go/1.23.0/x64/src/testing/testing.go:2168 +0x85
  testing.tRunner()
      /opt/hostedtoolcache/go/1.23.0/x64/src/testing/testing.go:1690 +0x226
  testing.runTests()
      /opt/hostedtoolcache/go/1.23.0/x64/src/testing/testing.go:2166 +0x8be
  testing.(*M).Run()
      /opt/hostedtoolcache/go/1.23.0/x64/src/testing/testing.go:2034 +0xf17
  main.main()
      _testmain.go:141 +0x164

Goroutine 2282 (running) created at:
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*PushClient).createRoutineToConsumeLogTails()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:1772 +0x289
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*PushClient).startConsumers()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:623 +0xae4
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*PushClient).connect()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:797 +0xa0
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*connector).run()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:261 +0x98
  github.com/matrixorigin/matrixone/pkg/vm/engine/disttae.(*Engine).InitLogTailPushModel.gowrap1()
      /home/runner/work/matrixone/matrixone/pkg/vm/engine/disttae/logtail_consumer.go:1557 +0x4f
==================
2024/11/21 11:22:34.047252 +0000 ERROR disttae/logtail_consumer.go:558 [logtail-consumer] receive an error from log tail client, err: context canceled
2024/11/21 11:22:34.047320 +0000 ERROR disttae/logtail_consumer.go:613 [logtail-consumer] receive one logtail failed, err: context canceled
2024/11/21 11:22:34.047384 +0000 INFO disttae/logtail_consumer.go:996 [logtail-consumer] GC partition_state process exit.
2024/11/21 11:22:34.047461 +0000 INFO disttae/logtail_consumer.go:257 [logtail-consumer] logtail consumer stopped
2024/11/21 11:22:34.047472 +0000 INFO db/open.go:384 tae metrics task exit
2024/11/21 11:22:34.047528 +0000 INFO disttae/logtail_consumer.go:668 [logtail-consumer] logtail consumer stopped
2024/11/21 11:22:34.047579 +0000 INFO disttae/logtail_consumer.go:927 [logtail-consumer] unsubscribe process exit.
2024/11/21 11:22:34.058485 +0000 INFO gc/manager.go:152 gc-loop is going to exit
2024/11/21 11:22:34.063869 +0000 INFO db/scanner.go:56 DBScanner Stopped
2024/11/21 11:22:34.069607 +0000 INFO db/scheduler.go:86 TaskScheduler Stopped
2024/11/21 11:22:34.069752 +0000 INFO txnbase/txnmgr.go:622 [Stop] {"txnmgr": {}}
    testing.go:[139](https://github.com/matrixorigin/matrixone/actions/runs/11951813343/job/33316228119?pr=20214#step:10:140)9: race detected during execution of test
--- FAIL: TestInProgressTransfer (1.70s)
steps.ut.conclusion: failure
FAIL_UT_CASES=TestInProgressTransfer,

Expected Behavior

no failure

Steps to Reproduce

https://github.com/matrixorigin/matrixone/actions/runs/11951813343/job/33316228119?pr=20214

Additional information

No response