apple / foundationdb

FoundationDB - the open source, distributed, transactional key-value store
https://apple.github.io/foundationdb/
Apache License 2.0
14.43k stars 1.31k forks source link

TracedTooManyLines failure in tests/rare/SwizzledLargeApiCorrectness.toml #7394

Closed sfc-gh-xwang closed 2 years ago

sfc-gh-xwang commented 2 years ago
sfc-gh-xwang commented 2 years ago

The recovery state is stuck in accepting_commits so that the ConsistencyCheck keep retrying. It seems that the cluster also can't recruit log system in one of the dc. An emergency transaction also changed the original region setting:

{  "Severity": "10", "Time": "49.193427", "DateTime": "2022-06-15T18:24:14Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "0", "Status": "reading_coordinated_state", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "51.371700", "DateTime": "2022-06-15T18:24:14Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "1", "Status": "locking_coordinated_state", "TLogs": "0", "ActiveGenerations": "1", "MyRecoveryCount": "3", "ForceRecovery": "0", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "52.779701", "DateTime": "2022-06-15T18:24:14Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "3", "Status": "reading_transaction_system_state", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "62.779701", "DateTime": "2022-06-15T18:24:14Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "5", "Status": "configuration_never_created", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "64.101794", "DateTime": "2022-06-15T18:24:14Z", "Type": "EmergencyTransactionMutation", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "MType": "0", "P1": "\\xff/conf/regions", "P2": "\\x01\\x00\\x01b\\xb0\\x00\\xdb\\x0fo\\x01\\x00\\x00{\"regions\":[{\"datacenters\":[{\"id\":\"0\",\"priority\":2},{\"id\":\"2\",\"priority\":1,\"satellite\":1},{\"id\":\"4\",\"priority\":1,\"satellite\":1}],\"satellite_redundancy_mode\":\"two_satellite_fast\"},{\"datacenters\":[{\"id\":\"1\",\"priority\":-1},{\"id\":\"3\",\"priority\":1,\"satellite\":1},{\"id\":\"5\",\"priority\":1,\"satellite\":1,\"satellite_logs\":5}],\"satellite_redundancy_mode\":\"two_satellite_fast\"}]}", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD" }
{  "Severity": "10", "Time": "64.101794", "DateTime": "2022-06-15T18:24:14Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "7", "Status": "recruiting_transaction_servers", "Conf": "{\"backup_worker_enabled\":0,\"blob_granules_enabled\":0,\"commit_proxies\":4,\"grv_proxies\":2,\"log_spill\":2,\"log_version\":7,\"perpetual_storage_wiggle\":1,\"perpetual_storage_wiggle_locality\":\"data_hall:0\",\"proxies\":6,\"redundancy_mode\":\"single\",\"regions\":[{\"datacenters\":[{\"id\":\"0\",\"priority\":2},{\"id\":\"2\",\"priority\":1,\"satellite\":1},{\"id\":\"4\",\"priority\":1,\"satellite\":1}],\"satellite_redundancy_mode\":\"two_satellite_fast\"},{\"datacenters\":[{\"id\":\"1\",\"priority\":-1},{\"id\":\"3\",\"priority\":1,\"satellite\":1},{\"...", "RequiredCommitProxies": "1", "RequiredGrvProxies": "1", "RequiredResolvers": "1", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "64.101794", "DateTime": "2022-06-15T18:24:14Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "8", "Status": "initializing_transaction_servers", "CommitProxies": "2", "GrvProxies": "1", "TLogs": "2", "Resolvers": "1", "SatelliteTLogs": "4", "OldLogRouters": "0", "StorageServers": "1", "BackupWorkers": "0", "PrimaryDcIds": "0", "RemoteDcIds": "1", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "65.675057", "DateTime": "2022-06-15T18:24:14Z", "Type": "EmergencyTransactionMutation", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "MType": "0", "P1": "\\xff/conf/regions", "P2": "\\x01\\x00\\x01b\\xb0\\x00\\xdb\\x0fo\\x01\\x00\\x00{\"regions\":[{\"datacenters\":[{\"id\":\"0\",\"priority\":2},{\"id\":\"2\",\"priority\":1,\"satellite\":1},{\"id\":\"4\",\"priority\":1,\"satellite\":1}],\"satellite_redundancy_mode\":\"two_satellite_fast\"},{\"datacenters\":[{\"id\":\"1\",\"priority\":-1},{\"id\":\"3\",\"priority\":1,\"satellite\":1},{\"id\":\"5\",\"priority\":1,\"satellite\":1,\"satellite_logs\":5}],\"satellite_redundancy_mode\":\"two_satellite_fast\"}]}", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD,CP,RV,SS" }
{  "Severity": "10", "Time": "66.587923", "DateTime": "2022-06-15T18:24:14Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "9", "Status": "recovery_transaction", "PrimaryLocality": "0", "DcId": "0", "ClusterId": "1391ef2ddf642d9d", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD,CP,RV,SS,TL", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "66.587923", "DateTime": "2022-06-15T18:24:14Z", "Type": "MutationRequiresRestart", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "M": "code: SetValue param1: \\xff/conf/regions param2: \\x01\\x00\\x01b\\xb0\\x00\\xdb\\x0fo\\x01\\x00\\x00{\"regions\":[{\"datacenters\":[{\"id\":\"0\",\"priority\":2},{\"id\":\"2\",\"priority\":1,\"satellite\":1},{\"id\":\"4\",\"priority\":1,\"satellite\":1}],\"satellite_redundancy_mode\":\"two_satellite_fast\"},{\"datacenters\":[{\"id\":\"1\",\"priority\":-1},{\"id\":\"3\",\"priority\":1,\"satellite\":1},{\"id\":\"5\",\"priority\":1,\"satellite\":1,\"satellite_logs\":5}],\"satellite_redundancy_mode\":\"two_satellite_fast\"}]}", "PrevValue": "(none)", "ToCommit": "0", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD,CP,RV,SS,TL" }
{  "Severity": "10", "Time": "68.263413", "DateTime": "2022-06-15T18:24:15Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "10", "Status": "writing_coordinated_state", "TLogList": "0: 57ac481d2fc0e3a3799b3c5347395ef7, 3ec14a8aa97ba742ec90d1f0a071c33f 1: f601afe3587c416fe25ffb348ac56468, dd70b710837f89cceac8a064120c6204, dac55ad71a5634608a97b908335a0638, 5f6b8e30097e020bc731c93255e2c1c6 2: 8aaa70699f9dad62b2709a22d6d2a7ed, 8b20d9b5b153f0113ce01a4a42de060e ", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD,CP,RV,SS,TL", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "69.676011", "DateTime": "2022-06-15T18:24:15Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "11", "Status": "accepting_commits", "StoreType": "memory-radixtree-beta", "RecoveryDuration": "3.08809", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD,CP,RV,SS,TL", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "69.676011", "DateTime": "2022-06-15T18:24:15Z", "Type": "MasterRecoveryState", "Machine": "[abcd::2:0:1:0]:1", "ID": "6e387025a7f1c65f", "StatusCode": "14", "Status": "fully_recovered", "ClusterId": "1391ef2ddf642d9d", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD,CP,RV,SS,TL", "TrackLatestType": "Original" }
{  "Severity": "10", "Time": "271.212108", "DateTime": "2022-06-15T18:24:20Z", "Type": "TrackTlogRecovery", "Machine": "[abcd::2:0:1:0]:1", "ID": "0000000000000000", "FinalUpdate": "0", "NewState.tlogs": "2", "NewState.OldTLogs": "5", "Expected.tlogs": "3", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "CC,CD,CP,RV,SS,TL" }
{  "Severity": "10", "Time": "350.494055", "DateTime": "2022-06-15T18:24:26Z", "Type": "QuietDatabaseWaitingOnFullRecovery", "Machine": "[abcd::3:4:3:3]:1", "ID": "0000000000000000", "ThreadID": "11639603881429370873", "LogGroup": "default", "Roles": "TS" }