GoogleCloudPlatform / golang-samples

Sample apps and code written for Google Cloud in the Go programming language.
Apache License 2.0
4.31k stars 1.74k forks source link

spanner/spanner_snippets/spanner: TestCustomerManagedEncryptionKeys failed #2111

Closed flaky-bot[bot] closed 3 years ago

flaky-bot[bot] commented 3 years ago

Note: #2033 was also for this test, but it was closed more than 10 days ago. So, I didn't mark it flaky.


commit: 9bc859f48ef701a4b81794b5bb86b509b17e2e4c buildURL: Build Status, Sponge status: failed

Test output
/usr/local/go/src/testing/testing.go:1701 +0xe5
/usr/local/go/src/time/sleep.go:180 +0x45
/usr/local/go/src/testing/testing.go:1240 +0x2da
/usr/local/go/src/testing/testing.go:1512 +0x78
/usr/local/go/src/testing/testing.go:1194 +0xef
/usr/local/go/src/testing/testing.go:1510 +0x2fe
/usr/local/go/src/testing/testing.go:1418 +0x1eb
_testmain.go:51 +0x138
/go/pkg/mod/go.opencensus.io@v0.23.0/stats/view/worker.go:276 +0xcd
/go/pkg/mod/go.opencensus.io@v0.23.0/stats/view/worker.go:34 +0x68
/go/pkg/mod/github.com/googleapis/gax-go/v2@v2.0.5/invoke.go:55 +0xa5
/go/pkg/mod/cloud.google.com/go@v0.84.0/longrunning/longrunning.go:156 +0xe5
/go/pkg/mod/cloud.google.com/go@v0.84.0/longrunning/longrunning.go:142 +0xf3
/go/pkg/mod/cloud.google.com/go/spanner@v1.20.0/admin/database/apiv1/database_admin_client.go:1186 +0xaa
/tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/spanner_restore_backup_with_encryption_key.go:63 +0x51e
/tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:498 +0x91
/tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:578 +0xb6
/tmpfs/src/github/golang-samples/internal/testutil/retry.go:34 +0xd1
/tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:576 +0x13b
/tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:500 +0xc9b
/usr/local/go/src/testing/testing.go:1194 +0xef
/usr/local/go/src/testing/testing.go:1239 +0x2b3
/go/pkg/mod/google.golang.org/grpc@v1.38.0/balancer_conn_wrappers.go:69 +0xac
/go/pkg/mod/google.golang.org/grpc@v1.38.0/balancer_conn_wrappers.go:60 +0x172
/go/pkg/mod/google.golang.org/grpc@v1.38.0/clientconn.go:1214 +0x465
/go/pkg/mod/google.golang.org/grpc@v1.38.0/clientconn.go:844 +0x12a
/usr/local/go/src/runtime/netpoll.go:222 +0x55
/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
/usr/local/go/src/internal/poll/fd_unix.go:166 +0x1d5
/usr/local/go/src/net/fd_posix.go:55 +0x4f
/usr/local/go/src/net/net.go:183 +0x91
/usr/local/go/src/crypto/tls/conn.go:776 +0x63
/usr/local/go/src/bytes/buffer.go:204 +0xbe
/usr/local/go/src/crypto/tls/conn.go:798 +0xf3
/usr/local/go/src/crypto/tls/conn.go:605 +0x115
/usr/local/go/src/crypto/tls/conn.go:573
/usr/local/go/src/crypto/tls/conn.go:1276 +0x165
/usr/local/go/src/bufio/bufio.go:227 +0x222
/usr/local/go/src/io/io.go:328 +0x87
/usr/local/go/src/io/io.go:347
/go/pkg/mod/golang.org/x/net@v0.0.0-20210503060351-7fd8e65b6420/http2/frame.go:237 +0x89
/go/pkg/mod/golang.org/x/net@v0.0.0-20210503060351-7fd8e65b6420/http2/frame.go:492 +0xa5
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:1347 +0x1a5
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:346 +0xdd1
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/controlbuf.go:407 +0xff
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/controlbuf.go:527 +0x1dd
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:396 +0x7b
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:394 +0x12ae
/usr/local/go/src/runtime/netpoll.go:222 +0x55
/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
/usr/local/go/src/internal/poll/fd_unix.go:166 +0x1d5
/usr/local/go/src/net/fd_posix.go:55 +0x4f
/usr/local/go/src/net/net.go:183 +0x91
/usr/local/go/src/crypto/tls/conn.go:776 +0x63
/usr/local/go/src/bytes/buffer.go:204 +0xbe
/usr/local/go/src/crypto/tls/conn.go:798 +0xf3
/usr/local/go/src/crypto/tls/conn.go:605 +0x115
/usr/local/go/src/crypto/tls/conn.go:573
/usr/local/go/src/crypto/tls/conn.go:1276 +0x165
/usr/local/go/src/bufio/bufio.go:227 +0x222
/usr/local/go/src/io/io.go:328 +0x87
/usr/local/go/src/io/io.go:347
/go/pkg/mod/golang.org/x/net@v0.0.0-20210503060351-7fd8e65b6420/http2/frame.go:237 +0x89
/go/pkg/mod/golang.org/x/net@v0.0.0-20210503060351-7fd8e65b6420/http2/frame.go:492 +0xa5
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:1347 +0x1a5
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:346 +0xdd1
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/controlbuf.go:407 +0xff
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/controlbuf.go:527 +0x1dd
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:396 +0x7b
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:394 +0x12ae
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/controlbuf.go:407 +0xff
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/controlbuf.go:527 +0x1dd
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:396 +0x7b
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:394 +0x12ae
/go/pkg/mod/google.golang.org/grpc@v1.38.0/balancer_conn_wrappers.go:69 +0xac
/go/pkg/mod/google.golang.org/grpc@v1.38.0/balancer_conn_wrappers.go:60 +0x172
/go/pkg/mod/google.golang.org/grpc@v1.38.0/clientconn.go:1214 +0x465
/go/pkg/mod/google.golang.org/grpc@v1.38.0/clientconn.go:844 +0x12a
/usr/local/go/src/runtime/netpoll.go:222 +0x55
/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
/usr/local/go/src/internal/poll/fd_unix.go:166 +0x1d5
/usr/local/go/src/net/fd_posix.go:55 +0x4f
/usr/local/go/src/net/net.go:183 +0x91
/usr/local/go/src/crypto/tls/conn.go:776 +0x63
/usr/local/go/src/bytes/buffer.go:204 +0xbe
/usr/local/go/src/crypto/tls/conn.go:798 +0xf3
/usr/local/go/src/crypto/tls/conn.go:605 +0x115
/usr/local/go/src/crypto/tls/conn.go:573
/usr/local/go/src/crypto/tls/conn.go:1276 +0x165
/usr/local/go/src/bufio/bufio.go:227 +0x222
/usr/local/go/src/io/io.go:328 +0x87
/usr/local/go/src/io/io.go:347
/go/pkg/mod/golang.org/x/net@v0.0.0-20210503060351-7fd8e65b6420/http2/frame.go:237 +0x89
/go/pkg/mod/golang.org/x/net@v0.0.0-20210503060351-7fd8e65b6420/http2/frame.go:492 +0xa5
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:1347 +0x1a5
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:346 +0xdd1
/go/pkg/mod/google.golang.org/grpc@v1.38.0/clientconn.go:1214 +0x465
/go/pkg/mod/google.golang.org/grpc@v1.38.0/clientconn.go:844 +0x12a
/go/pkg/mod/google.golang.org/grpc@v1.38.0/balancer_conn_wrappers.go:69 +0xac
/go/pkg/mod/google.golang.org/grpc@v1.38.0/balancer_conn_wrappers.go:60 +0x172
/go/pkg/mod/google.golang.org/grpc@v1.38.0/clientconn.go:1214 +0x465
/go/pkg/mod/google.golang.org/grpc@v1.38.0/clientconn.go:844 +0x12a
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/controlbuf.go:407 +0xff
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/controlbuf.go:527 +0x1dd
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:396 +0x7b
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:394 +0x12ae
/usr/local/go/src/runtime/netpoll.go:222 +0x55
/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
/usr/local/go/src/internal/poll/fd_unix.go:166 +0x1d5
/usr/local/go/src/net/fd_posix.go:55 +0x4f
/usr/local/go/src/net/net.go:183 +0x91
/usr/local/go/src/crypto/tls/conn.go:776 +0x63
/usr/local/go/src/bytes/buffer.go:204 +0xbe
/usr/local/go/src/crypto/tls/conn.go:798 +0xf3
/usr/local/go/src/crypto/tls/conn.go:605 +0x115
/usr/local/go/src/crypto/tls/conn.go:573
/usr/local/go/src/crypto/tls/conn.go:1276 +0x165
/usr/local/go/src/bufio/bufio.go:227 +0x222
/usr/local/go/src/io/io.go:328 +0x87
/usr/local/go/src/io/io.go:347
/go/pkg/mod/golang.org/x/net@v0.0.0-20210503060351-7fd8e65b6420/http2/frame.go:237 +0x89
/go/pkg/mod/golang.org/x/net@v0.0.0-20210503060351-7fd8e65b6420/http2/frame.go:492 +0xa5
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:1347 +0x1a5
/go/pkg/mod/google.golang.org/grpc@v1.38.0/internal/transport/http2_client.go:346 +0xdd1
/go/pkg/mod/google.golang.org/grpc@v1.38.0/balancer_conn_wrappers.go:69 +0xac
/go/pkg/mod/google.golang.org/grpc@v1.38.0/balancer_conn_wrappers.go:60 +0x172
skuruppu commented 3 years ago

@olavloite would you have a moment to take a look at this one?

olavloite commented 3 years ago

It seems that the entire test run timed out while executing a restore operation for Spanner:

panic: test timed out after 1h0m0s

goroutine 7945 [running]:
testing.(*M).startAlarm.func1()
    /usr/local/go/src/testing/testing.go:1701 +0xe5
created by time.goFunc
    /usr/local/go/src/time/sleep.go:180 +0x45

goroutine 1 [chan receive, 58 minutes]:
testing.(*T).Run(0xc00086b680, 0xc4d114, 0x21, 0xc6d0c8, 0x498101)
    /usr/local/go/src/testing/testing.go:1240 +0x2da
testing.runTests.func1(0xc0003f8000)
    /usr/local/go/src/testing/testing.go:1512 +0x78
testing.tRunner(0xc0003f8000, 0xc00035fde0)
    /usr/local/go/src/testing/testing.go:1194 +0xef
testing.runTests(0xc00000ee10, 0x115dd20, 0x5, 0x5, 0xc02bab59c2792d5f, 0x3463109bb05, 0x116d360, 0xc3eb60)
    /usr/local/go/src/testing/testing.go:1510 +0x2fe
testing.(*M).Run(0xc000196a00, 0x0)
    /usr/local/go/src/testing/testing.go:1418 +0x1eb
main.main()
    _testmain.go:51 +0x138

goroutine 6 [select]:
go.opencensus.io/stats/view.(*worker).start(0xc000196580)
    /go/pkg/mod/go.opencensus.io@v0.23.0/stats/view/worker.go:276 +0xcd
created by go.opencensus.io/stats/view.init.0
    /go/pkg/mod/go.opencensus.io@v0.23.0/stats/view/worker.go:34 +0x68

goroutine 7474 [select, 2 minutes]:
github.com/googleapis/gax-go/v2.Sleep(0xd23848, 0xc0005ff1a0, 0x99cbd10d1, 0xd21890, 0xc00075c5a0)
    /go/pkg/mod/github.com/googleapis/gax-go/v2@v2.0.5/invoke.go:55 +0xa5
cloud.google.com/go/longrunning.(*Operation).wait(0xc000258078, 0xd23848, 0xc0005ff1a0, 0xd21890, 0xc00075c5a0, 0xc000525950, 0xc6d368, 0x0, 0x0, 0x0, ...)
    /go/pkg/mod/cloud.google.com/go@v0.84.0/longrunning/longrunning.go:156 +0xe5
cloud.google.com/go/longrunning.(*Operation).WaitWithInterval(0xc000258078, 0xd23848, 0xc0005ff1a0, 0xd21890, 0xc00075c5a0, 0xdf8475800, 0x0, 0x0, 0x0, 0xb04700, ...)
    /go/pkg/mod/cloud.google.com/go@v0.84.0/longrunning/longrunning.go:142 +0xf3
cloud.google.com/go/spanner/admin/database/apiv1.(*RestoreDatabaseOperation).Wait(0xc00045e020, 0xd23848, 0xc0005ff1a0, 0x0, 0x0, 0x0, 0x0, 0xc00045e020, 0x0)
    /go/pkg/mod/cloud.google.com/go/spanner@v1.20.0/admin/database/apiv1/database_admin_client.go:1186 +0xaa
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.restoreBackupWithCustomerManagedEncryptionKey(0xd23848, 0xc0005ff1a0, 0xd13c60, 0xc000434f00, 0xc000156070, 0x63, 0xc000256220, 0x1e, 0xc0001520e0, 0x6f, ...)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/spanner_restore_backup_with_encryption_key.go:63 +0x51e
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.TestCustomerManagedEncryptionKeys.func3(0xd23848, 0xc0005ff1a0, 0xd13c60, 0xc000434f00, 0xc000156070, 0x63, 0xc000256220, 0x1e, 0x7fe81883bf58, 0x8)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:498 +0x91
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.runBackupSampleWithRetry.func1(0xc00012c000)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:578 +0xb6
github.com/GoogleCloudPlatform/golang-samples/internal/testutil.Retry(0xc00086b680, 0xa, 0xdf8475800, 0xc000525cf8, 0x11559a0)
    /tmpfs/src/github/golang-samples/internal/testutil/retry.go:34 +0xd1
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.runBackupSampleWithRetry(0xd23848, 0xc0005ff1a0, 0xc00086b680, 0xc0001dbea8, 0xc000156070, 0x63, 0xc000256220, 0x1e, 0xc5e797, 0x3f, ...)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:576 +0x13b
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.TestCustomerManagedEncryptionKeys(0xc00086b680)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:500 +0xc9b
testing.tRunner(0xc00086b680, 0xc6d0c8)
    /usr/local/go/src/testing/testing.go:1194 +0xef
created by testing.(*T).Run
    /usr/local/go/src/testing/testing.go:1239 +0x2b3

There are however multiple other test failures, all related to setting / changing permissions, but none are for Spanner:

=== RUN   TestDatasets
    integration_test.go:54: updateDataSetAccessControl("golang_snippettest_dataset_07379da726c745d080917942d385eee5"): googleapi: Error 403: IAM setPolicy failed for Dataset golang-samples-tests-6:golang_snippettest_dataset_07379da726c745d080917942d385eee5: One or more users named in the policy do not belong to a permitted customer., policyViolation
--- FAIL: TestDatasets (6.49s)
FAIL
FAIL    github.com/GoogleCloudPlatform/golang-samples/bigquery/snippets/dataset 6.511s
=== RUN   TestPolicyTagManager
    integration_test.go:87: setIAMPolicy(projects/golang-samples-tests-6/locations/us/taxonomies/1520077384788401495/policyTags/8517990187981738822): SetIamPolicy: rpc error: code = FailedPrecondition desc = One or more users named in the policy do not belong to a permitted customer.
    integration_test.go:95: unexpected output ("Testing the permissions on projects/golang-samples-tests-6/locations/us/taxonomies/1520077384788401495/policyTags/8517990187981738822, of the 1 permissions probed, caller has 0 permissions\n") did not contain (of the 1 permissions probed, caller has 1 permissions: datacatalog.categories.fineGrainedGet)
--- FAIL: TestPolicyTagManager (1.19s)
FAIL
FAIL    github.com/GoogleCloudPlatform/golang-samples/datacatalog/snippets/policytagmanager 1.194s

PubSub:

=== RUN   TestIAM
    retry.go:44: FAILED after 10 attempts:
        subscription_test.go:184: addUsers: SetPolicy: rpc error: code = FailedPrecondition desc = One or more users named in the policy do not belong to a permitted customer.
    retry.go:44: FAILED after 10 attempts:
        subscription_test.go:195: want "group:cloud-logs@google.com" as viewer, policy=&{etag:"\x00 \x01"}
        subscription_test.go:198: want "allUsers" as viewer, policy=&{etag:"\x00 \x01"}
--- FAIL: TestIAM (24.23s)
=== RUN   TestIAM
    retry.go:44: FAILED after 10 attempts:
        topics_test.go:196: addUsers: SetPolicy: rpc error: code = FailedPrecondition desc = One or more users named in the policy do not belong to a permitted customer.
    retry.go:44: FAILED after 10 attempts:
        topics_test.go:207: want "group:cloud-logs@google.com" as viewer, policy=&{etag:"\x00 \x01"}
        topics_test.go:210: want "allUsers" as viewer, policy=&{etag:"\x00 \x01"}
--- FAIL: TestIAM (23.60s)
=== RUN   TestACL
    acl_test.go:59: addBucketOwner: ACLHandle.Set: googleapi: Error 412: One or more users named in the policy do not belong to a permitted customer., conditionNotMet
    acl_test.go:74: removeBucketOwner: ACLHandle.Delete: googleapi: Error 404: The specified key does not exist., notFound
--- FAIL: TestACL (3.48s)
FAIL
FAIL    github.com/GoogleCloudPlatform/golang-samples/storage/acl   3.482s
=== RUN   TestSetBucketPublicIAM
    buckets_test.go:522: setBucketPublicIAM: Bucket("golang-samples-tests-6-storage-buckets-tests").IAM().SetPolicy: googleapi: Error 412: One or more users named in the policy do not belong to a permitted customer., conditionNotMet
--- FAIL: TestSetBucketPublicIAM (3.09s)
=== RUN   TestDelete
--- PASS: TestDelete (2.13s)
FAIL
FAIL    github.com/GoogleCloudPlatform/golang-samples/storage/buckets   35.653s
=== CONT  TestHTTP2Server
    h2c.e2e_test.go:59: http2.Get: unexpected response status: 403 Forbidden
2021/06/19 18:31:42 Attempt #1: Running: operation [delete service] for service [h2c]...
2021/06/19 18:31:42 Attempt #1: Executing: operation [delete service] for service [h2c]: /tmp/google-cloud-sdk/bin/gcloud: --quiet run services delete h2c-20210619-182730 --project golang-samples-tests-6 --platform managed --region us-central1
2021/06/19 18:31:48 Attempt #1: Running: operation [delete container image] for service [h2c]...
2021/06/19 18:31:48 Attempt #1: Executing: operation [delete container image] for service [h2c]: /tmp/google-cloud-sdk/bin/gcloud: --quiet container images delete gcr.io/golang-samples-tests-6/h2c:20210619-182730
--- FAIL: TestHTTP2Server (76.75s)
olavloite commented 3 years ago

@skuruppu This seems to be a 'typical' problem with one of the backup tests: The operation took too long and caused the entire test run to time out. We have already tried a couple of tweaks for these tests to get them to work consistently, for example #2050, but there are still some flaky failures. One last option could be to execute each backup test on a separate Spanner instance, and execute these tests in parallel with each other and with the other tests. That would obviously require additional resources on the backend, but should at least in theory reduce the probability that this happens. Would that be an option?

flaky-bot[bot] commented 3 years ago

Looks like this issue is flaky. :worried:

I'm going to leave this open and stop commenting.

A human should fix and close this.


When run at the same commit (9bc859f48ef701a4b81794b5bb86b509b17e2e4c), this test passed in one build (Build Status, Sponge) and failed in another build (Build Status, Sponge).

skuruppu commented 3 years ago

@olavloite that's a good question. @codyoss is it possible for us to create a couple of extra instances to run the backups tests in parallel? I believe the test suites in this repo are using a one static instance for all tests. Please correct me if I'm wrong though.

olavloite commented 3 years ago

@olavloite that's a good question. @codyoss is it possible for us to create a couple of extra instances to run the backups tests in parallel? I believe the test suites in this repo are using a one static instance for all tests. Please correct me if I'm wrong though.

We could also just create the instance as part of the test process and then delete it afterwards. We already have a sample for creating an instance that does that, so the test user should have the permission. It's more a question whether anyone would mind the extra resource usage on the test project.

tbpg commented 3 years ago

If creating the instance on demand (or even just a few more static instances) makes the test pass consistently, let's do it! We should make sure to garbage collect as needed, of course.

skuruppu commented 3 years ago

@olavloite let me or @hengfengli know if you want any more input on this.

hengfengli commented 3 years ago

@olavloite Will #2122 solve this issue?

olavloite commented 3 years ago

@olavloite Will #2122 solve this issue?

Sorry, yes, I forgot to mention this issue in the description of #2122. #2122 was created specifically to solve this issue, so I think we can close this for now.

flaky-bot[bot] commented 3 years ago

Oops! Looks like this issue is still flaky. It failed again. :grimacing:

I reopened the issue, but a human will need to close it again.


commit: 0beb3a0450b80fbd31bb02ec538092ae509a5ba5 buildURL: Build Status, Sponge status: failed

Test output
    integration_test.go:520: failed to restore database with customer managed encryption key: restoreBackupWithCustomerManagedEncryptionKey.RestoreDatabase: rpc error: code = NotFound desc = Backup not found: projects/golang-samples-tests/instances/go-sample-8602f510-455a-4e/backups/enc-backup-1625246572-00f37a7a
hengfengli commented 3 years ago

@olavloite It failed again 😞 . I think we hit the limit:

rpc error: code = ResourceExhausted desc = Quota exceeded for quota metric 'Instance create requests' and limit 'Instance create requests per minute' of service 'spanner.googleapis.com' for consumer
olavloite commented 3 years ago

I can't find a specific limit for this type of requests in the documentation, so I assume that this is something that can also be configured per project. I've added a retry loop for the CreateInstance requests to see if that helps. @skuruppu Do you know if this is something that is configurable for the test project?

skuruppu commented 3 years ago

I don't think that particular quota is configurable (there's another one for the number of admin API requests that is configurable I think). In any case, adding a retry loop is the right approach.

hengfengli commented 3 years ago

As the retry logic #2135 has been merged, I will close this issue.

flaky-bot[bot] commented 3 years ago

Oops! Looks like this issue is still flaky. It failed again. :grimacing:

I reopened the issue, but a human will need to close it again.


commit: 42a8597d5beb089c313eb618ac1363af5e404be0 buildURL: Build Status, Sponge status: failed

Test output
    integration_test.go:520: failed to restore database with customer managed encryption key: restoreBackupWithCustomerManagedEncryptionKey.RestoreDatabase: rpc error: code = NotFound desc = Backup not found: projects/golang-samples-tests/instances/go-sample-af613441-d5e1-4c/backups/enc-backup-1625679029-4a68ae14
olavloite commented 3 years ago

It seems that two different test instances were deleted while the backups were being restored (or just before the restore operations started), as there are two backup tests that failed with the same error, but I don't quite understand how/why that happened...

hengfengli commented 3 years ago

@olavloite I think I have found the reason. For the test, the DB names are different when creating the backup and restoring the backup.

I ran the above two tests locally and get the following error (with some debugging print-outs):

=== RUN   TestBackupSample
=== PAUSE TestBackupSample
=== RUN   TestBackupCustomerManagedEncryptionKeys
=== PAUSE TestBackupCustomerManagedEncryptionKeys
=== CONT  TestBackupSample
=== CONT  TestBackupCustomerManagedEncryptionKeys
TestBackupSample dbName:projects/[hide-project-id]/instances/go-hengfeng-97b30181-a0e5-46/databases/smpl-1626055810-f168a628
TestCustomerManagedEncryptionKeys dbName:projects/[hide-project-id]/instances/go-hengfeng-dcac226b-f568-49/databases/smpl-1626055810-431d8206
    integration_test.go:522: failed to restore database with customer managed encryption key: restoreBackupWithCustomerManagedEncryptionKey.RestoreDatabase: rpc error: code = NotFound desc = Backup not found: projects/[hide-project-id]/instances/go-hengfeng-465c6a42-1d7c-41/backups/enc-backup-1626055842-4488037d
--- FAIL: TestBackupCustomerManagedEncryptionKeys (275.10s)
=== CONT  TestBackupSample
    integration_test.go:522: failed to restore a backup: rpc error: code = NotFound desc = Backup not found: projects[hide-project-id]/instances/go-hengfeng-5f1b9581-8df3-44/backups/backup-1626055810-f168a628
--- FAIL: TestBackupSample (304.73s)
FAIL
FAIL    github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner  304.944s
FAIL

As you can see, all of their instance IDs are different.

https://github.com/GoogleCloudPlatform/golang-samples/blob/03b672e88838a8d50647cd111c05d81ef20ff488/spanner/spanner_snippets/spanner/integration_test.go#L325-L327

dbName and restoreDBName are not in the same instance. When we restore the DB, we use restoreDBName and backupID so that we can't find the backup.

olavloite commented 3 years ago

dbName and restoreDBName are not in the same instance. When we restore the DB, we use restoreDBName and backupID so that we can't find the backup.

Thanks for finding that one. Are these tests only executed nightly? As I would expect this to fail every time.

hengfengli commented 3 years ago

dbName and restoreDBName are not in the same instance. When we restore the DB, we use restoreDBName and backupID so that we can't find the backup.

Thanks for finding that one. Are these tests only executed nightly? As I would expect this to fail every time.

No, I don't think so. Previously, it was set to _ = testutil.EndToEndTest(t), which will run nightly. But now, it has been updated to _ = testutil.SystemTest(t) and I think it runs every time.

hengfengli commented 3 years ago

Sorry, my bad. I totally forgot that it uses GOLANG_SAMPLES_E2E_TEST to decide if it needs to run instead of calling testutil.EndToEndTest(t). This is suggested by Tyler (https://github.com/GoogleCloudPlatform/golang-samples/issues/2033#issuecomment-817988935). testutil.EndToEndTest(t) will call t.Parallel, which causes a rate limit error of max restoring operations in an instance. GOLANG_SAMPLES_E2E_TEST is set to true nightly. So, the answer is YES, they will run nightly.

But, with your latest change, I think it may be possible to just call testutil.EndToEndTest(t) because they can run in parallel now.

flaky-bot[bot] commented 3 years ago

Oops! Looks like this issue is still flaky. It failed again. :grimacing:

I reopened the issue, but a human will need to close it again.


commit: 03e3c2b7adbd1f4814342439e60e1c40799aeff2 buildURL: Build Status, Sponge status: failed

hengfengli commented 3 years ago

@olavloite We get a time out error: panic: test timed out after 1h0m0s. We should call testutil.EndToEndTest(t) to let them run in parallel to speed up the tests.

hengfengli commented 3 years ago

Ignore my previous comment. I think you have already enabled them to run in parallel. So why does it take so long?

olavloite commented 3 years ago

AFAICT there are two tests still running when it times out, and both are waiting for a backup operation to finish:

goroutine 22 [select]:
github.com/googleapis/gax-go/v2.Sleep(0xd25808, 0xc0008420c0, 0x8245e6654, 0xd237c0, 0xc0004d7e60)
    /go/pkg/mod/github.com/googleapis/gax-go/v2@v2.0.5/invoke.go:55 +0xa5
cloud.google.com/go/longrunning.(*Operation).wait(0xc00017e420, 0xd25808, 0xc0008420c0, 0xd237c0, 0xc0004d7e60, 0xc0007a9b18, 0xc6ef48, 0x0, 0x0, 0x0, ...)
    /go/pkg/mod/cloud.google.com/go@v0.84.0/longrunning/longrunning.go:156 +0xe5
cloud.google.com/go/longrunning.(*Operation).WaitWithInterval(0xc00017e420, 0xd25808, 0xc0008420c0, 0xd237c0, 0xc0004d7e60, 0xdf8475800, 0x0, 0x0, 0x0, 0xb065c0, ...)
    /go/pkg/mod/cloud.google.com/go@v0.84.0/longrunning/longrunning.go:142 +0xf3
cloud.google.com/go/spanner/admin/database/apiv1.(*CreateBackupOperation).Wait(0xc00051a500, 0xd25808, 0xc0008420c0, 0x0, 0x0, 0x0, 0x0, 0xc00051a500, 0x0)
    /go/pkg/mod/cloud.google.com/go/spanner@v1.21.0/admin/database/apiv1/database_admin_client.go:1048 +0xaa
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.createBackup(0xd25808, 0xc0008420c0, 0xd15b60, 0xc0008f4ea0, 0xc000046230, 0x65, 0xc0002f02e0, 0x1a, 0x170e984, 0xed87fc43d, ...)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/spanner_create_backup.go:60 +0x5ed
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.runCreateBackupSample(0xd25808, 0xc0008420c0, 0xc000103500, 0xc6ece8, 0xc000046230, 0x65, 0xc0002f02e0, 0x1a, 0x170e984, 0xed87fc43d, ...)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:490 +0xdb
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.TestBackupSample(0xc000103500)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:336 +0x33a
testing.tRunner(0xc000103500, 0xc6ec88)
    /usr/local/go/src/testing/testing.go:1194 +0xef
created by testing.(*T).Run
    /usr/local/go/src/testing/testing.go:1239 +0x2b3
goroutine 24 [select]:
github.com/googleapis/gax-go/v2.Sleep(0xd25808, 0xc0002870e0, 0x8da2acb62, 0xd237c0, 0xc000438b40)
    /go/pkg/mod/github.com/googleapis/gax-go/v2@v2.0.5/invoke.go:55 +0xa5
cloud.google.com/go/longrunning.(*Operation).wait(0xc0002278d8, 0xd25808, 0xc0002870e0, 0xd237c0, 0xc000438b40, 0xc000b97ba8, 0xc6ef48, 0x0, 0x0, 0x0, ...)
    /go/pkg/mod/cloud.google.com/go@v0.84.0/longrunning/longrunning.go:156 +0xe5
cloud.google.com/go/longrunning.(*Operation).WaitWithInterval(0xc0002278d8, 0xd25808, 0xc0002870e0, 0xd237c0, 0xc000438b40, 0xdf8475800, 0x0, 0x0, 0x0, 0xb065c0, ...)
    /go/pkg/mod/cloud.google.com/go@v0.84.0/longrunning/longrunning.go:142 +0xf3
cloud.google.com/go/spanner/admin/database/apiv1.(*CreateBackupOperation).Wait(0xc000010208, 0xd25808, 0xc0002870e0, 0x0, 0x0, 0x0, 0x0, 0xc000010208, 0x0)
    /go/pkg/mod/cloud.google.com/go/spanner@v1.21.0/admin/database/apiv1/database_admin_client.go:1048 +0xaa
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.createBackupWithCustomerManagedEncryptionKey(0xd25808, 0xc0002870e0, 0xd15b60, 0xc00071c0f0, 0xc000046070, 0x65, 0xc0002f1240, 0x1e, 0xc000317110, 0x6f, ...)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/spanner_create_backup_with_encryption_key.go:66 +0x5c9
github.com/GoogleCloudPlatform/golang-samples/spanner/spanner_snippets/spanner.TestCustomerManagedEncryptionKeys(0xc000103b00)
    /tmpfs/src/github/golang-samples/spanner/spanner_snippets/spanner/integration_test.go:418 +0x71f
testing.tRunner(0xc000103b00, 0xc6eca0)
    /usr/local/go/src/testing/testing.go:1194 +0xef
created by testing.(*T).Run
    /usr/local/go/src/testing/testing.go:1239 +0x2b3

So I'm afraid that the simple answer is that there were two backup operations that did not finish within 1 hour on the backend.

hengfengli commented 3 years ago

I'll close this to see if it happens again.