hashicorp / nomad

Nomad is an easy-to-use, flexible, and performant workload orchestrator that can deploy a mix of microservice, batch, containerized, and non-containerized applications. Nomad is easy to operate and scale and has native Consul and Vault integrations.
https://www.nomadproject.io/
Other
14.87k stars 1.95k forks source link

multiple data-race detected #12098

Open shoenig opened 2 years ago

shoenig commented 2 years ago

While messing around with running CI in GHA

tests-nomad

WARNING: DATA RACE
Write at 0x00c00049a760 by goroutine 30:
  github.com/hashicorp/nomad/nomad/state.(*StateStore).upsertAllocsImpl()
      /home/runner/work/nomad/nomad/nomad/state/state_store.go:3378 +0x294
  github.com/hashicorp/nomad/nomad/state.(*StateStore).UpsertAllocs()
      /home/runner/work/nomad/nomad/nomad/state/state_store.go:3336 +0x129
  github.com/hashicorp/nomad/nomad/drainer.TestDrainingJobWatcher_DrainJobs()
      /home/runner/work/nomad/nomad/nomad/drainer/watch_jobs_test.go:192 +0xd6f
  testing.tRunner()
      /opt/hostedtoolcache/go/1.17.7/x64/src/testing/testing.go:1259 +0x22f
  testing.(*T).Run·dwrap·21()
      /opt/hostedtoolcache/go/1.17.7/x64/src/testing/testing.go:1306 +0x47

Previous read at 0x00c00049a760 by goroutine 62:
  github.com/hashicorp/nomad/nomad/drainer.(*drainingJobWatcher).getJobAllocsImpl()
      /home/runner/work/nomad/nomad/nomad/drainer/watch_jobs.go:466 +0x4f1
  github.com/hashicorp/nomad/nomad/drainer.(*drainingJobWatcher).getJobAllocsImpl-fm()
      /home/runner/work/nomad/nomad/nomad/drainer/watch_jobs.go:441 +0x59
  github.com/hashicorp/nomad/nomad/state.(*StateStore).BlockingQuery()
      /home/runner/work/nomad/nomad/nomad/state/state_store.go:308 +0xee
  github.com/hashicorp/nomad/nomad/drainer.(*drainingJobWatcher).getJobAllocs()
      /home/runner/work/nomad/nomad/nomad/drainer/watch_jobs.go:429 +0xfb
  github.com/hashicorp/nomad/nomad/drainer.(*drainingJobWatcher).watch()
      /home/runner/work/nomad/nomad/nomad/drainer/watch_jobs.go:151 +0x21e
  github.com/hashicorp/nomad/nomad/drainer.NewDrainingJobWatcher·dwrap·18()
      /home/runner/work/nomad/nomad/nomad/drainer/watch_jobs.go:88 +0x39

tests-client

WARNING: DATA RACE
Write at 0x00c00060c1c0 by goroutine 111:
  github.com/hashicorp/nomad/client.(*heartbeatStop).setLastOk()
      /home/runner/work/nomad/nomad/client/heartbeatstop.go:129 +0xfa
  github.com/hashicorp/nomad/client.(*Client).registerNode()
      /home/runner/work/nomad/nomad/client/client.go:1851 +0x5fe
  github.com/hashicorp/nomad/client.(*Client).retryRegisterNode()
      /home/runner/work/nomad/nomad/client/client.go:1803 +0x69
  github.com/hashicorp/nomad/client.(*Client).registerAndHeartbeat()
      /home/runner/work/nomad/nomad/client/client.go:1586 +0x5b
  github.com/hashicorp/nomad/client.(*Client).registerAndHeartbeat-fm()
      /home/runner/work/nomad/nomad/client/client.go:1584 +0x39
  github.com/hashicorp/nomad/client.(*group).Go.func1()
      /home/runner/work/nomad/nomad/client/client.go:3104 +0x9d

Previous write at 0x00c00060c1c0 by goroutine 161:
  github.com/hashicorp/nomad/client.(*heartbeatStop).watch()
      /home/runner/work/nomad/nomad/client/heartbeatstop.go:73 +0x7c
  github.com/hashicorp/nomad/client.NewClient·dwrap·16()
      /home/runner/work/nomad/nomad/client/client.go:484 +0x39
WARNING: DATA RACE
Read at 0x00c001496108 by goroutine 19:
  runtime.racereadrange()
      <autogenerated>:1 +0x1b
  github.com/hashicorp/nomad/nomad/structs.(*Task).Copy()
      /home/runner/work/nomad/nomad/nomad/structs/structs.go:6890 +0x778
  github.com/hashicorp/nomad/nomad/structs.(*TaskGroup).Copy()
      /home/runner/work/nomad/nomad/nomad/structs/structs.go:6124 +0xa8f
  github.com/hashicorp/nomad/nomad/structs.(*Job).Copy()
      /home/runner/work/nomad/nomad/nomad/structs/structs.go:4257 +0x94f
  github.com/hashicorp/nomad/nomad/state.(*StateStore).setJobStatus()
      /home/runner/work/nomad/nomad/nomad/state/state_store.go:4625 +0x127
  github.com/hashicorp/nomad/nomad/state.(*StateStore).setJobStatuses()
      /home/runner/work/nomad/nomad/nomad/state/state_store.go:4589 +0x2a7
  github.com/hashicorp/nomad/nomad/state.(*StateStore).upsertAllocsImpl()
      /home/runner/work/nomad/nomad/nomad/state/state_store.go:3456 +0x130d
  github.com/hashicorp/nomad/nomad/state.(*StateStore).UpsertPlanResults()
      /home/runner/work/nomad/nomad/nomad/state/state_store.go:401 +0xb0d
  github.com/hashicorp/nomad/nomad.(*nomadFSM).applyPlanResults()
      /home/runner/work/nomad/nomad/nomad/fsm.go:1005 +0x2f3
  github.com/hashicorp/nomad/nomad.(*nomadFSM).Apply()
      /home/runner/work/nomad/nomad/nomad/fsm.go:238 +0x9dc
  github.com/hashicorp/raft.(*Raft).runFSM.func1()
      /home/runner/go/pkg/mod/github.com/hashicorp/raft@v1.3.5/fsm.go:98 +0x3bc
  github.com/hashicorp/raft.(*Raft).runFSM.func2()
      /home/runner/go/pkg/mod/github.com/hashicorp/raft@v1.3.5/fsm.go:121 +0x8e8
  github.com/hashicorp/raft.(*Raft).runFSM()
      /home/runner/go/pkg/mod/github.com/hashicorp/raft@v1.3.5/fsm.go:231 +0x471
  github.com/hashicorp/raft.(*Raft).runFSM-fm()
      /home/runner/go/pkg/mod/github.com/hashicorp/raft@v1.3.5/fsm.go:77 +0x39
  github.com/hashicorp/raft.(*raftState).goFunc.func1()
      /home/runner/go/pkg/mod/github.com/hashicorp/raft@v1.3.5/state.go:146 +0x82

Previous write at 0x00c001496108 by goroutine 83:
  github.com/hashicorp/nomad/nomad/structs.(*Resources).Canonicalize()
      /home/runner/work/nomad/nomad/nomad/structs/structs.go:2420 +0xa8
  github.com/hashicorp/nomad/nomad/structs.(*Task).Canonicalize()
      /home/runner/work/nomad/nomad/nomad/structs/structs.go:6943 +0x378
  github.com/hashicorp/nomad/nomad/structs.(*TaskGroup).Canonicalize()
      /home/runner/work/nomad/nomad/nomad/structs/structs.go:6193 +0xb4a
  github.com/hashicorp/nomad/nomad/structs.(*Job).Canonicalize()
      /home/runner/work/nomad/nomad/nomad/structs/structs.go:4225 +0x1c5
  github.com/hashicorp/nomad/nomad/structs.(*Allocation).Canonicalize()
      /home/runner/work/nomad/nomad/nomad/structs/structs.go:9500 +0x1e4
  github.com/hashicorp/nomad/nomad/state.(*StateStore).UpsertPlanResults()
      /home/runner/work/nomad/nomad/nomad/state/state_store.go:398 +0xad3
  github.com/hashicorp/nomad/nomad.(*planner).applyPlan()
      /home/runner/work/nomad/nomad/nomad/plan_apply.go:313 +0x17db
  github.com/hashicorp/nomad/nomad.(*planner).planApply()
      /home/runner/work/nomad/nomad/nomad/plan_apply.go:167 +0x77b
  github.com/hashicorp/nomad/nomad.(*Server).establishLeadership·dwrap·219()
      /home/runner/work/nomad/nomad/nomad/leader.go:250 +0x39
WARNING: DATA RACE
Read at 0x00c001a8bd10 by goroutine 449:
  runtime.mapaccess2_faststr()
      /opt/hostedtoolcache/go/1.17.7/x64/src/runtime/map_faststr.go:107 +0x0
  github.com/hashicorp/nomad/client/pluginmanager/csimanager.(*csiManager).MounterForPlugin()
      /home/runner/work/nomad/nomad/client/pluginmanager/csimanager/manager.go:78 +0x7b
  github.com/hashicorp/nomad/client.(*CSI).NodeDetachVolume()
      /home/runner/work/nomad/nomad/client/csi_endpoint.go:464 +0x588
  runtime.call32()
      /opt/hostedtoolcache/go/1.17.7/x64/src/runtime/asm_amd64.s:626 +0x48
  reflect.Value.Call()
      /opt/hostedtoolcache/go/1.17.7/x64/src/reflect/value.go:339 +0xd7
  net/rpc.(*service).call()
      /opt/hostedtoolcache/go/1.17.7/x64/src/net/rpc/server.go:377 +0x28c
  net/rpc.(*Server).ServeRequest()
      /opt/hostedtoolcache/go/1.17.7/x64/src/net/rpc/server.go:498 +0x1a4
  github.com/hashicorp/nomad/client.(*Client).ClientRPC()
      /home/runner/work/nomad/nomad/client/rpc.go:36 +0x1a4
  github.com/hashicorp/nomad/client.TestCSINode_DetachVolume.func2()
      /home/runner/work/nomad/nomad/client/csi_endpoint_test.go:967 +0x327
  testing.tRunner()
      /opt/hostedtoolcache/go/1.17.7/x64/src/testing/testing.go:1259 +0x22f
  testing.(*T).Run·dwrap·21()
      /opt/hostedtoolcache/go/1.17.7/x64/src/testing/testing.go:1306 +0x47

Previous write at 0x00c001a8bd10 by goroutine 377:
  runtime.mapassign_faststr()
      /opt/hostedtoolcache/go/1.17.7/x64/src/runtime/map_faststr.go:202 +0x0
  github.com/hashicorp/nomad/client/pluginmanager/csimanager.(*csiManager).instancesForType()
      /home/runner/work/nomad/nomad/client/pluginmanager/csimanager/manager.go:197 +0x216
  github.com/hashicorp/nomad/client/pluginmanager/csimanager.(*csiManager).resyncPluginsFromRegistry()
      /home/runner/work/nomad/nomad/client/pluginmanager/csimanager/manager.go:135 +0x254
  github.com/hashicorp/nomad/client/pluginmanager/csimanager.(*csiManager).runLoop()
      /home/runner/work/nomad/nomad/client/pluginmanager/csimanager/manager.go:104 +0x2cb
  github.com/hashicorp/nomad/client/pluginmanager/csimanager.(*csiManager).Run·dwrap·3()
      /home/runner/work/nomad/nomad/client/pluginmanager/csimanager/manager.go:93 +0x39
WARNING: DATA RACE
Read at 0x00c0000900df by goroutine 26:
  github.com/hashicorp/nomad/client/dynamicplugins.TestPluginEventBroadcaster_SendsMessagesToAllClients.func2()
      /home/runner/work/nomad/nomad/client/dynamicplugins/registry_test.go:34 +0x39
  github.com/stretchr/testify/assert.Eventually.func1()
      /home/runner/go/pkg/mod/github.com/stretchr/testify@v1.7.0/assert/assertions.go:1655 +0x39

Previous write at 0x00c0000900df by goroutine 16:
  github.com/hashicorp/nomad/client/dynamicplugins.TestPluginEventBroadcaster_SendsMessagesToAllClients.func1()
      /home/runner/work/nomad/nomad/client/dynamicplugins/registry_test.go:24 +0x44
  github.com/hashicorp/nomad/client/dynamicplugins.TestPluginEventBroadcaster_SendsMessagesToAllClients·dwrap·10()
      /home/runner/work/nomad/nomad/client/dynamicplugins/registry_test.go:28 +0x58
tgross commented 2 years ago

Two fixes for the CSI-related items: https://github.com/hashicorp/nomad/pull/12553 https://github.com/hashicorp/nomad/pull/12554

tgross commented 2 years ago

Another possible in https://app.circleci.com/pipelines/github/hashicorp/nomad/28486/workflows/88d021ae-09b1-47b3-a777-e53777ea08ad/jobs/314220.

shoenig commented 2 years ago

New race in CaptureFiles

    operator_debug_test.go:437: [TEST] Client for test TestDebug_CapturedFiles ready, id: df6ec249-b6bd-bbe3-e0d3-9e6857b0bd85, region: global
    operator_debug_test.go:439: serverName: TestDebug_CapturedFiles.global, clientID, df6ec249-b6bd-bbe3-e0d3-9e6857b0bd85
2022-08-02T13:07:14.945Z [TRACE] client/client.go:1919: client: next heartbeat: period=13.097315477s
2022-08-02T13:07:14.945Z [DEBUG] agent/http.go:579: http: request complete: method=GET path=/v1/nodes duration="171.768µs"
2022-08-02T13:07:14.946Z [DEBUG] agent/http.go:579: http: request complete: method=GET path=/v1/nodes?prefix=df6ec249-b6bd-bbe3-e0d3-9e6857b0bd85 duration="156.114µs"
2022-08-02T13:07:14.946Z [DEBUG] agent/http.go:579: http: request complete: method=GET path=/v1/agent/members duration="129.636µs"
2022-08-02T13:07:14.947Z [DEBUG] agent/http.go:579: http: request complete: method=GET path=/v1/status/leader duration="146.175µs"
fatal error: concurrent map read and map write

goroutine 39028 [running]:
runtime.throw({0x2abda8e?, 0x296a0a0?})
    /usr/local/go/src/runtime/panic.go:992 +0x71 fp=0xc005f789f0 sp=0xc005f789c0 pc=0x43c2f1
runtime.mapaccess2(0xc0094c4bb0?, 0xc001e9ec30?, 0x4588680?)
    /usr/local/go/src/runtime/map.go:476 +0x205 fp=0xc005f78a30 sp=0xc005f789f0 pc=0x412945
reflect.mapaccess(0x254a060?, 0xc0094c4bb0?, 0xc0094c4bb0?)
    /usr/local/go/src/runtime/map.go:1331 +0x19 fp=0xc005f78a58 sp=0xc005f78a30 pc=0x468f19
reflect.Value.MapIndex({0x24c5160?, 0xc001e9ec80?, 0xc005a26640?}, {0x254a060, 0xc0094c4bb0, 0x94})
    /usr/local/go/src/reflect/value.go:1616 +0x151 fp=0xc005f78ac8 sp=0xc005f78a58 pc=0x4e33b1
github.com/mitchellh/reflectwalk.walkMap({0x24c5160?, 0xc001e9ec80?, 0xc005a26640?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:252 +0x315 fp=0xc005f78ba8 sp=0xc005f78ac8 pc=0x702275
github.com/mitchellh/reflectwalk.walk({0x24c5160?, 0xc001e9ec80?, 0xc005a26640?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:200 +0x5aa fp=0xc005f78c90 sp=0xc005f78ba8 pc=0x701b4a
github.com/mitchellh/reflectwalk.walkStruct({0x2925420?, 0xc001e9ec60?, 0xc005a26640?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:404 +0x3d5 fp=0xc005f78e10 sp=0xc005f78c90 pc=0x702c35
github.com/mitchellh/reflectwalk.walk({0x290e760?, 0xc007c1e040?, 0xc005a26640?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:206 +0x6af fp=0xc005f78ef8 sp=0xc005f78e10 pc=0x701c4f
github.com/mitchellh/reflectwalk.walkStruct({0x29f1740?, 0xc007c1e000?, 0xc005a26640?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:404 +0x3d5 fp=0xc005f79078 sp=0xc005f78ef8 pc=0x702c35
github.com/mitchellh/reflectwalk.walk({0x230c420?, 0xc002964530?, 0xc005a26640?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:206 +0x6af fp=0xc005f79160 sp=0xc005f79078 pc=0x701c4f
github.com/mitchellh/reflectwalk.walkStruct({0x29dc200?, 0xc002964400?, 0xc005a26640?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:404 +0x3d5 fp=0xc005f792e0 sp=0xc005f79160 pc=0x702c35
github.com/mitchellh/reflectwalk.walk({0x25862a0?, 0xc002964400?, 0xc005a26640?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:206 +0x6af fp=0xc005f793c8 sp=0xc005f792e0 pc=0x701c4f
github.com/mitchellh/reflectwalk.Walk({0x25862a0?, 0xc002964400?}, {0x296a0a0, 0xc005a26640})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/reflectwalk@v1.0.2/reflectwalk.go:99 +0x146 fp=0xc005f79438 sp=0xc005f793c8 pc=0x701526
github.com/mitchellh/copystructure.Config.Copy({0x0?, 0x0?, 0x0?}, {0x25862a0?, 0xc002964400})
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/copystructure@v1.2.0/copystructure.go:116 +0x188 fp=0xc005f794b0 sp=0xc005f79438 pc=0x703188
github.com/mitchellh/copystructure.Copy(...)
    /home/circleci/.go_workspace/pkg/mod/github.com/mitchellh/copystructure@v1.2.0/copystructure.go:36
github.com/hashicorp/nomad/command/agent.(*HTTPServer).AgentSelfRequest(0xc003bb88c0, {0x0?, 0xc005f79778?}, 0x4950d7?)
    /home/circleci/go/src/github.com/hashicorp/nomad/command/agent/agent_endpoint.go:84 +0x33e fp=0xc005f79758 sp=0xc005f794b0 pc=0x1fd499e
github.com/hashicorp/nomad/command/agent.(*HTTPServer).AgentSelfRequest-fm({0x3025740?, 0xc000fa9360?}, 0xc00096f704?)
    <autogenerated>:1 +0x3c fp=0xc005f79788 sp=0xc005f79758 pc=0x2021cfc
github.com/hashicorp/nomad/command/agent.(*HTTPServer).wrap.func1({0x3025740, 0xc000fa9360}, 0xc0044c3a00)
    /home/circleci/go/src/github.com/hashicorp/nomad/command/agent/http.go:581 +0x154 fp=0xc005f798a0 sp=0xc005f79788 pc=0x1fff954
net/http.HandlerFunc.ServeHTTP(0xc007aed880?, {0x3025740?, 0xc000fa9360?}, 0xc005f79908?)
    /usr/local/go/src/net/http/server.go:2084 +0x2f fp=0xc005f798c8 sp=0xc005f798a0 pc=0x803d2f
net/http.(*ServeMux).ServeHTTP(0x0?, {0x3025740, 0xc000fa9360}, 0xc0044c3a00)
    /usr/local/go/src/net/http/server.go:2462 +0x149 fp=0xc005f79918 sp=0xc005f798c8 pc=0x805b89
github.com/gorilla/handlers.CompressHandlerLevel.func1({0x3023d60?, 0xc000d0d260}, 0xc0044c3a00)
    /home/circleci/.go_workspace/pkg/mod/github.com/gorilla/handlers@v1.5.1/compress.go:141 +0x663 fp=0xc005f79a70 sp=0xc005f79918 pc=0x1f4b7a3
net/http.HandlerFunc.ServeHTTP(0x0?, {0x3023d60?, 0xc000d0d260?}, 0x4554960?)
    /usr/local/go/src/net/http/server.go:2084 +0x2f fp=0xc005f79a98 sp=0xc005f79a70 pc=0x803d2f
net/http.serverHandler.ServeHTTP({0xc006836d50?}, {0x3023d60, 0xc000d0d260}, 0xc0044c3a00)
    /usr/local/go/src/net/http/server.go:2916 +0x43b fp=0xc005f79b58 sp=0xc005f79a98 pc=0x8077db
net/http.(*conn).serve(0xc005a26000, {0x3025bd8, 0xc007a4e8a0})
    /usr/local/go/src/net/http/server.go:1966 +0x5d7 fp=0xc005f79fb8 sp=0xc005f79b58 pc=0x8027d7
net/http.(*Server).Serve.func3()
    /usr/local/go/src/net/http/server.go:3071 +0x2e fp=0xc005f79fe0 sp=0xc005f79fb8 pc=0x80812e
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1571 +0x1 fp=0xc005f79fe8 sp=0xc005f79fe0 pc=0x46fe21
created by net/http.(*Server).Serve
    /usr/local/go/src/net/http/server.go:3071 +0x4db