Open matthew-richerson opened 1 month ago
2024-10-02T07:36:57.662-0700 DEBUG controllers.NnfNodeBlockStorage Command Run {"NnfNodeBlockStorage": {"name":"default-fluxjob-69363685977490432-0-xfs-0","namespace":"tuolumne265"}, "command": "nvme list -v --output-format=json"}
2024-10-02T07:36:58.009-0700 INFO controllers.NnfNodeBlockStorage Deleting storage pool {"NnfNodeBlockStorage": {"name":"default-fluxjob-69361627010434048-0-xfs-0","namespace":"tuolumne265"}, "Id": "default-fluxjob-69361627010434048-0-xfs-0-0"}
2024-10-02T07:36:58.044-0700 DEBUG ec.nvme.12.5 Attached namespace {"storageId": "12", "slot": 2, "serialNumber": "4D20A0CW0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.154-0700 DEBUG ec.nvme.12.5 Formatted namespace {"storageId": "12", "slot": 2, "serialNumber": "4D20A0CW0U61", "namespaceId": 5}
2024-10-02T07:36:58.173-0700 DEBUG ec.nvme.12.5 Detached namespace {"storageId": "12", "slot": 2, "serialNumber": "4D20A0CW0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.177-0700 DEBUG ec.nvme.11.5 Attached namespace {"storageId": "11", "slot": 6, "serialNumber": "4D20A0E30U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.287-0700 DEBUG ec.nvme.11.5 Formatted namespace {"storageId": "11", "slot": 6, "serialNumber": "4D20A0E30U61", "namespaceId": 5}
2024-10-02T07:36:58.307-0700 DEBUG ec.nvme.11.5 Detached namespace {"storageId": "11", "slot": 6, "serialNumber": "4D20A0E30U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.311-0700 DEBUG ec.nvme.10.5 Attached namespace {"storageId": "10", "slot": 5, "serialNumber": "4D20A0CS0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.420-0700 DEBUG ec.nvme.10.5 Formatted namespace {"storageId": "10", "slot": 5, "serialNumber": "4D20A0CS0U61", "namespaceId": 5}
2024-10-02T07:36:58.438-0700 DEBUG ec.nvme.10.5 Detached namespace {"storageId": "10", "slot": 5, "serialNumber": "4D20A0CS0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.441-0700 DEBUG ec.nvme.0.5 Attached namespace {"storageId": "0", "slot": 8, "serialNumber": "4D20A0D30U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.551-0700 DEBUG ec.nvme.0.5 Formatted namespace {"storageId": "0", "slot": 8, "serialNumber": "4D20A0D30U61", "namespaceId": 5}
2024-10-02T07:36:58.570-0700 DEBUG ec.nvme.0.5 Detached namespace {"storageId": "0", "slot": 8, "serialNumber": "4D20A0D30U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.574-0700 DEBUG ec.nvme.3.5 Attached namespace {"storageId": "3", "slot": 16, "serialNumber": "4D20A0CV0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.686-0700 DEBUG ec.nvme.3.5 Formatted namespace {"storageId": "3", "slot": 16, "serialNumber": "4D20A0CV0U61", "namespaceId": 5}
2024-10-02T07:36:58.703-0700 DEBUG ec.nvme.3.5 Detached namespace {"storageId": "3", "slot": 16, "serialNumber": "4D20A0CV0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.707-0700 DEBUG ec.nvme.15.5 Attached namespace {"storageId": "15", "slot": 10, "serialNumber": "4D20A0CY0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.828-0700 DEBUG ec.nvme.15.5 Formatted namespace {"storageId": "15", "slot": 10, "serialNumber": "4D20A0CY0U61", "namespaceId": 5}
2024-10-02T07:36:58.846-0700 DEBUG ec.nvme.15.5 Detached namespace {"storageId": "15", "slot": 10, "serialNumber": "4D20A0CY0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.850-0700 DEBUG ec.nvme.17.5 Attached namespace {"storageId": "17", "slot": 3, "serialNumber": "4D30A0QM0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.961-0700 DEBUG ec.nvme.17.5 Formatted namespace {"storageId": "17", "slot": 3, "serialNumber": "4D30A0QM0U61", "namespaceId": 5}
2024-10-02T07:36:58.979-0700 DEBUG ec.nvme.17.5 Detached namespace {"storageId": "17", "slot": 3, "serialNumber": "4D30A0QM0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:58.983-0700 DEBUG ec.nvme.4.5 Attached namespace {"storageId": "4", "slot": 17, "serialNumber": "4D20A0CT0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.092-0700 DEBUG ec.nvme.4.5 Formatted namespace {"storageId": "4", "slot": 17, "serialNumber": "4D20A0CT0U61", "namespaceId": 5}
2024-10-02T07:36:59.111-0700 DEBUG ec.nvme.4.5 Detached namespace {"storageId": "4", "slot": 17, "serialNumber": "4D20A0CT0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.115-0700 DEBUG ec.nvme.8.5 Attached namespace {"storageId": "8", "slot": 12, "serialNumber": "4D10A00C0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.226-0700 DEBUG ec.nvme.8.5 Formatted namespace {"storageId": "8", "slot": 12, "serialNumber": "4D10A00C0U61", "namespaceId": 5}
2024-10-02T07:36:59.247-0700 DEBUG ec.nvme.8.5 Detached namespace {"storageId": "8", "slot": 12, "serialNumber": "4D10A00C0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.251-0700 DEBUG ec.nvme.2.5 Attached namespace {"storageId": "2", "slot": 15, "serialNumber": "4D20A0DZ0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.364-0700 DEBUG ec.nvme.2.5 Formatted namespace {"storageId": "2", "slot": 15, "serialNumber": "4D20A0DZ0U61", "namespaceId": 5}
2024-10-02T07:36:59.389-0700 DEBUG ec.nvme.2.5 Detached namespace {"storageId": "2", "slot": 15, "serialNumber": "4D20A0DZ0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.393-0700 DEBUG ec.nvme.1.5 Attached namespace {"storageId": "1", "slot": 7, "serialNumber": "4D20A1210U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.503-0700 DEBUG ec.nvme.1.5 Formatted namespace {"storageId": "1", "slot": 7, "serialNumber": "4D20A1210U61", "namespaceId": 5}
2024-10-02T07:36:59.525-0700 DEBUG ec.nvme.1.5 Detached namespace {"storageId": "1", "slot": 7, "serialNumber": "4D20A1210U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.529-0700 DEBUG ec.nvme.14.5 Attached namespace {"storageId": "14", "slot": 9, "serialNumber": "4D20A0D20U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.637-0700 DEBUG ec.nvme.14.5 Formatted namespace {"storageId": "14", "slot": 9, "serialNumber": "4D20A0D20U61", "namespaceId": 5}
2024-10-02T07:36:59.657-0700 DEBUG ec.nvme.14.5 Detached namespace {"storageId": "14", "slot": 9, "serialNumber": "4D20A0D20U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.661-0700 DEBUG ec.nvme.16.5 Attached namespace {"storageId": "16", "slot": 11, "serialNumber": "4D20A0DY0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.774-0700 DEBUG ec.nvme.16.5 Formatted namespace {"storageId": "16", "slot": 11, "serialNumber": "4D20A0DY0U61", "namespaceId": 5}
2024-10-02T07:36:59.794-0700 DEBUG ec.nvme.16.5 Detached namespace {"storageId": "16", "slot": 11, "serialNumber": "4D20A0DY0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.799-0700 DEBUG ec.nvme.6.5 Attached namespace {"storageId": "6", "slot": 14, "serialNumber": "4D20A0D10U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.910-0700 DEBUG ec.nvme.6.5 Formatted namespace {"storageId": "6", "slot": 14, "serialNumber": "4D20A0D10U61", "namespaceId": 5}
2024-10-02T07:36:59.929-0700 DEBUG ec.nvme.6.5 Detached namespace {"storageId": "6", "slot": 14, "serialNumber": "4D20A0D10U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:36:59.933-0700 DEBUG ec.nvme.5.5 Attached namespace {"storageId": "5", "slot": 18, "serialNumber": "4D10A03B0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:37:00.043-0700 DEBUG ec.nvme.5.5 Formatted namespace {"storageId": "5", "slot": 18, "serialNumber": "4D10A03B0U61", "namespaceId": 5}
2024-10-02T07:37:00.063-0700 DEBUG ec.nvme.5.5 Detached namespace {"storageId": "5", "slot": 18, "serialNumber": "4D10A03B0U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:37:00.067-0700 DEBUG ec.nvme.9.5 Attached namespace {"storageId": "9", "slot": 4, "serialNumber": "4D20A0D00U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:37:00.177-0700 DEBUG ec.nvme.9.5 Formatted namespace {"storageId": "9", "slot": 4, "serialNumber": "4D20A0D00U61", "namespaceId": 5}
2024-10-02T07:37:00.197-0700 DEBUG ec.nvme.9.5 Detached namespace {"storageId": "9", "slot": 4, "serialNumber": "4D20A0D00U61", "namespaceId": 5, "controllerId": 1}
2024-10-02T07:37:00.201-0700 DEBUG ec.nvme.12.5 Format completed {"storageId": "12", "slot": 2, "serialNumber": "4D20A0CW0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.205-0700 DEBUG ec.nvme.11.5 Format completed {"storageId": "11", "slot": 6, "serialNumber": "4D20A0E30U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.209-0700 DEBUG ec.nvme.10.5 Format completed {"storageId": "10", "slot": 5, "serialNumber": "4D20A0CS0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.213-0700 DEBUG ec.nvme.0.5 Format completed {"storageId": "0", "slot": 8, "serialNumber": "4D20A0D30U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.218-0700 DEBUG ec.nvme.3.5 Format completed {"storageId": "3", "slot": 16, "serialNumber": "4D20A0CV0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.222-0700 DEBUG ec.nvme.15.5 Format completed {"storageId": "15", "slot": 10, "serialNumber": "4D20A0CY0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.227-0700 DEBUG ec.nvme.17.5 Format completed {"storageId": "17", "slot": 3, "serialNumber": "4D30A0QM0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.231-0700 DEBUG ec.nvme.4.5 Format completed {"storageId": "4", "slot": 17, "serialNumber": "4D20A0CT0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.235-0700 DEBUG ec.nvme.8.5 Format completed {"storageId": "8", "slot": 12, "serialNumber": "4D10A00C0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.240-0700 DEBUG ec.nvme.2.5 Format completed {"storageId": "2", "slot": 15, "serialNumber": "4D20A0DZ0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.244-0700 DEBUG ec.nvme.1.5 Format completed {"storageId": "1", "slot": 7, "serialNumber": "4D20A1210U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.248-0700 DEBUG ec.nvme.14.5 Format completed {"storageId": "14", "slot": 9, "serialNumber": "4D20A0D20U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.253-0700 DEBUG ec.nvme.16.5 Format completed {"storageId": "16", "slot": 11, "serialNumber": "4D20A0DY0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.257-0700 DEBUG ec.nvme.6.5 Format completed {"storageId": "6", "slot": 14, "serialNumber": "4D20A0D10U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.261-0700 DEBUG ec.nvme.5.5 Format completed {"storageId": "5", "slot": 18, "serialNumber": "4D10A03B0U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.266-0700 DEBUG ec.nvme.9.5 Format completed {"storageId": "9", "slot": 4, "serialNumber": "4D20A0D00U61", "namespaceId": 5, "utilization": 0}
2024-10-02T07:37:00.358-0700 DEBUG ec.nvme.12.5 Deleted namespace {"storageId": "12", "slot": 2, "serialNumber": "4D20A0CW0U61", "namespaceId": 5}
2024-10-02T07:37:00.449-0700 DEBUG ec.nvme.11.5 Deleted namespace {"storageId": "11", "slot": 6, "serialNumber": "4D20A0E30U61", "namespaceId": 5}
2024-10-02T07:37:00.602-0700 DEBUG ec.nvme.10.5 Deleted namespace {"storageId": "10", "slot": 5, "serialNumber": "4D20A0CS0U61", "namespaceId": 5}
2024-10-02T07:37:00.693-0700 DEBUG ec.nvme.0.5 Deleted namespace {"storageId": "0", "slot": 8, "serialNumber": "4D20A0D30U61", "namespaceId": 5}
2024-10-02T07:37:00.785-0700 DEBUG ec.nvme.3.5 Deleted namespace {"storageId": "3", "slot": 16, "serialNumber": "4D20A0CV0U61", "namespaceId": 5}
2024-10-02T07:37:00.877-0700 DEBUG ec.nvme.15.5 Deleted namespace {"storageId": "15", "slot": 10, "serialNumber": "4D20A0CY0U61", "namespaceId": 5}
2024-10-02T07:37:00.969-0700 DEBUG ec.nvme.17.5 Deleted namespace {"storageId": "17", "slot": 3, "serialNumber": "4D30A0QM0U61", "namespaceId": 5}
2024-10-02T07:37:01.060-0700 DEBUG ec.nvme.4.5 Deleted namespace {"storageId": "4", "slot": 17, "serialNumber": "4D20A0CT0U61", "namespaceId": 5}
2024-10-02T07:37:01.152-0700 DEBUG ec.nvme.8.5 Deleted namespace {"storageId": "8", "slot": 12, "serialNumber": "4D10A00C0U61", "namespaceId": 5}
2024-10-02T07:37:01.245-0700 DEBUG ec.nvme.2.5 Deleted namespace {"storageId": "2", "slot": 15, "serialNumber": "4D20A0DZ0U61", "namespaceId": 5}
2024-10-02T07:37:01.336-0700 DEBUG ec.nvme.1.5 Deleted namespace {"storageId": "1", "slot": 7, "serialNumber": "4D20A1210U61", "namespaceId": 5}
2024-10-02T07:37:01.427-0700 DEBUG ec.nvme.14.5 Deleted namespace {"storageId": "14", "slot": 9, "serialNumber": "4D20A0D20U61", "namespaceId": 5}
2024-10-02T07:37:01.519-0700 DEBUG ec.nvme.16.5 Deleted namespace {"storageId": "16", "slot": 11, "serialNumber": "4D20A0DY0U61", "namespaceId": 5}
2024-10-02T07:37:01.612-0700 DEBUG ec.nvme.6.5 Deleted namespace {"storageId": "6", "slot": 14, "serialNumber": "4D20A0D10U61", "namespaceId": 5}
2024-10-02T07:37:01.703-0700 DEBUG ec.nvme.5.5 Deleted namespace {"storageId": "5", "slot": 18, "serialNumber": "4D10A03B0U61", "namespaceId": 5}
2024-10-02T07:37:01.795-0700 DEBUG ec.nvme.9.5 Deleted namespace {"storageId": "9", "slot": 4, "serialNumber": "4D20A0D00U61", "namespaceId": 5}
2024-10-02T07:37:01.873-0700 INFO ec.nnf Deleted storage pool {"storagePoolId": "default-fluxjob-69361627010434048-0-xfs-0-0"}
2024-10-02T07:37:01.880-0700 INFO Observed a panic in reconciler: runtime error: invalid memory address or nil pointer dereference {"controller": "nnfnodeblockstorage", "controllerGroup": "nnf.cray.hpe.com", "controllerKind": "NnfNodeBlockStorage", "NnfNodeBlockStorage": {"name":"default-fluxjob-69362750882579456-0-xfs-0","namespace":"tuolumne265"}, "namespace": "tuolumne265", "name": "default-fluxjob-69362750882579456-0-xfs-0", "reconcileID": "93a8718a-7902-4518-a139-120fc4d60eb6"}
panic: runtime error: invalid memory address or nil pointer dereference [recovered]
On restart, nnf-ec fails to start up:
2024-10-02T07:37:04.913-0700 DEBUG ec.nnf Fabric ready {"eventId": "54", "eventMessage": "The fabric '%1' is ready", "eventArgs": ["Rabbit"]}
2024-10-02T07:37:04.913-0700 INFO ec.nnf recover volumes
2024-10-02T07:37:04.913-0700 INFO ec.nnf recover volumes
2024-10-02T07:37:04.913-0700 INFO ec.nnf recover volumes
2024-10-02T07:37:04.913-0700 ERROR ec.nnf namespace not found {"serialNumber": "4D20A0CW0U61", "namespaceId": 5, "error": "Error 404: Not Found, Retry-Delay: 0s"}
2024-10-02T07:37:04.913-0700 ERROR ec.nnf Failed to replay storage database {"eventId": "54", "eventMessage": "The fabric '%1' is ready", "eventArgs": ["Rabbit"], "error": "Error 404: Not Found, Retry-Delay: 0s"}
2024-10-02T07:57:04.947-0700 INFO ec.nnf Link dropped {"eventId": "57", "eventMessage": "Switch '%1' upstream link has gone down on port '%2'.", "eventArgs": ["0", "9"]}
2024-10-02T07:57:04.947-0700 INFO ec.nnf Link dropped {"eventId": "60", "eventMessage": "Switch '%1' upstream link has gone down on port '%2'.", "eventArgs": ["1", "2"]}