oxidecomputer / omicron

Omicron: Oxide control plane
Mozilla Public License 2.0
251 stars 39 forks source link

bgp config delete for invalid and valid `--name-or-id` leads to 500 Internal Server Error #6471

Closed elaine-oxide closed 1 month ago

elaine-oxide commented 2 months ago

I am running a4x2 with:

I was testing oxide system networking bgp config create and made several extra bgp configs. Below, as65547 is the original working one, the other ones are the extra ones that I created.

$ oxide system networking bgp config list
[
  {
    "asn": 65551,
    "description": "hello5",
    "id": "1c013259-97ff-426f-8414-7cacf08dcae4",
    "name": "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz12345678901",
    "time_created": "2024-08-29T00:07:02.774536Z",
    "time_modified": "2024-08-29T00:07:02.774536Z"
  }, {
    "asn": 65547,
    "description": "BGP config for AS 65547",
    "id": "a3b1a17e-7167-47a2-9dff-5ce8cb5ab38b",
    "name": "as65547",
    "time_created": "2024-08-28T00:16:10.327972Z",
    "time_modified": "2024-08-28T00:16:10.327972Z"
  }, {
    "asn": 65548,
    "description": "hello",
    "id": "5b278ef8-7da2-4c84-859e-e955bbb202ed",
    "name": "as65548",
    "time_created": "2024-08-28T23:28:31.655895Z",
    "time_modified": "2024-08-28T23:28:31.655895Z"
  }, {
    "asn": 65549,
    "description": "hello3",
    "id": "8773e937-dea8-422f-8dcf-5add050081eb",
    "name": "as65549",
    "time_created": "2024-08-28T23:37:09.307159Z",
    "time_modified": "2024-08-28T23:37:09.307159Z"
  }, {
    "asn": 65550,
    "description": "0",
    "id": "fef48d7f-97a3-4553-aade-42c1dd2386bb",
    "name": "as65550",
    "time_created": "2024-08-28T23:57:34.067397Z",
    "time_modified": "2024-08-28T23:57:34.067397Z"
  }, {
    "asn": 4294967295,
    "description": "hello6",
    "id": "26a80a18-a43c-4228-a62c-e4e6a9a54bb0",
    "name": "as65552",
    "time_created": "2024-08-29T00:14:51.310793Z",
    "time_modified": "2024-08-29T00:14:51.310793Z"
  }
]

I tried to delete a bgp config that is not in the above list (n does not exist).

$ oxide system networking bgp config delete --name-or-id n
error
Error Response: status: 500 Internal Server Error; headers: {"content-type": "application/json", "x-request-id": "9b686efe-4ca2-4be1-b754-713eee669861", "content-length": "124", "date": "Thu, 29 Aug 2024 00:23:28 GMT"}; value: Error { error_code: Some("Internal"), message: "Internal Server Error", request_id: "9b686efe-4ca2-4be1-b754-713eee669861" }

On a4x2 g3 (serves requests at the IP address that Oxide CLI connects to):

root@oxz_nexus_504e4b3c:~# cat $(svcs -L nexus) | grep 9b686efe-4ca2-4be1-b754-713eee669861 | looker
...
00:23:29.298Z ERRO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): failed to lookup bgp config by name
    actor_id = 6cdb2ad6-e115-4f19-b6ec-2d6ee12f3ab9
    authenticated = true
    error = DatabaseError(Unknown, "query `SELECT \\"bgp_config\\".\\"id\\" FROM \\"bgp_config\\" WHERE (\\"bgp_config\\".\\"name\\" = $1) LIMIT $2` contains a full table/index scan which is explicitly disallowed")
    file = nexus/db-queries/src/db/datastore/bgp.rs:273
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:61264
    req_id = 9b686efe-4ca2-4be1-b754-713eee669861
    uri = /v1/system/networking/bgp?name_or_id=n
00:23:29.300Z ERRO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): bgp_config_delete failed
    actor_id = 6cdb2ad6-e115-4f19-b6ec-2d6ee12f3ab9
    authenticated = true
    error = InternalError { internal_message: "failed to lookup bgp config by name" }
    file = nexus/db-queries/src/db/datastore/bgp.rs:312
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:61264
    req_id = 9b686efe-4ca2-4be1-b754-713eee669861
    uri = /v1/system/networking/bgp?name_or_id=n
00:23:29.300Z INFO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): request completed
    error_message_external = Internal Server Error
    error_message_internal = failed to lookup bgp config by name
    file = /home/elaine/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:902
    latency_us = 133956
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:61264
    req_id = 9b686efe-4ca2-4be1-b754-713eee669861
    response_code = 500
    uri = /v1/system/networking/bgp?name_or_id=n

I tried to delete hello5, which does not actually exist because that is the description field, not the id or name field.

$ oxide system networking bgp config delete --name-or-id hello5
error
Error Response: status: 500 Internal Server Error; headers: {"content-type": "application/json", "x-request-id": "a879aba9-0801-49c4-9e64-a7300d8f03c1", "content-length": "124", "date": "Thu, 29 Aug 2024 00:23:59 GMT"}; value: Error { error_code: Some("Internal"), message: "Internal Server Error", request_id: "a879aba9-0801-49c4-9e64-a7300d8f03c1" }

On a4x2 g3:

root@oxz_nexus_504e4b3c:~# cat $(svcs -L nexus) | grep a879aba9-0801-49c4-9e64-a7300d8f03c1 | looker
...
00:24:00.050Z ERRO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): failed to lookup bgp config by name
    actor_id = 6cdb2ad6-e115-4f19-b6ec-2d6ee12f3ab9
    authenticated = true
    error = DatabaseError(Unknown, "query `SELECT \\"bgp_config\\".\\"id\\" FROM \\"bgp_config\\" WHERE (\\"bgp_config\\".\\"name\\" = $1) LIMIT $2` contains a full table/index scan which is explicitly disallowed")
    file = nexus/db-queries/src/db/datastore/bgp.rs:273
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:60175
    req_id = a879aba9-0801-49c4-9e64-a7300d8f03c1
    uri = /v1/system/networking/bgp?name_or_id=hello5
00:24:00.052Z ERRO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): bgp_config_delete failed
    actor_id = 6cdb2ad6-e115-4f19-b6ec-2d6ee12f3ab9
    authenticated = true
    error = InternalError { internal_message: "failed to lookup bgp config by name" }
    file = nexus/db-queries/src/db/datastore/bgp.rs:312
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:60175
    req_id = a879aba9-0801-49c4-9e64-a7300d8f03c1
    uri = /v1/system/networking/bgp?name_or_id=hello5
00:24:00.052Z INFO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): request completed
    error_message_external = Internal Server Error
    error_message_internal = failed to lookup bgp config by name
    file = /home/elaine/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:902
    latency_us = 77101
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:60175
    req_id = a879aba9-0801-49c4-9e64-a7300d8f03c1
    response_code = 500
    uri = /v1/system/networking/bgp?name_or_id=hello5

I tried to delete a UUID that actually exists, which belongs to the bgp config that has the description hello5.

$ oxide system networking bgp config delete --name-or-id 1c013259-97ff-426f-8414-7cacf08dcae4
error
Error Response: status: 500 Internal Server Error; headers: {"content-type": "application/json", "x-request-id": "fdcd66d1-556d-48fa-a051-eaf55feefcfe", "content-length": "124", "date": "Thu, 29 Aug 2024 00:27:25 GMT"}; value: Error { error_code: Some("Internal"), message: "Internal Server Error", request_id: "fdcd66d1-556d-48fa-a051-eaf55feefcfe" }

On a4x2 g3:

root@oxz_nexus_504e4b3c:~# cat $(svcs -L nexus) | grep fdcd66d1-556d-48fa-a051-eaf55feefcfe | looker
...
00:27:26.133Z ERRO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): bgp_config_delete failed
    actor_id = 6cdb2ad6-e115-4f19-b6ec-2d6ee12f3ab9
    authenticated = true
    error = DatabaseError(Unknown, "query `SELECT COUNT(*) FROM \\"switch_port_settings_bgp_peer_config\\" WHERE (\\"switch_port_settings_bgp_peer_config\\".\\"bgp_config_id\\" = $1)` contains a full table/index scan which is explicitly disallowed")
    file = nexus/db-queries/src/db/datastore/bgp.rs:315
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:60307
    req_id = fdcd66d1-556d-48fa-a051-eaf55feefcfe
    uri = /v1/system/networking/bgp?name_or_id=1c013259-97ff-426f-8414-7cacf08dcae4
00:27:26.133Z INFO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): request completed
    error_message_external = Internal Server Error
    error_message_internal = unexpected database error: query `SELECT COUNT(*) FROM "switch_port_settings_bgp_peer_config" WHERE ("switch_port_settings_bgp_peer_config"."bgp_config_id" = $1)` contains a full table/index scan which is explicitly disallowed
    file = /home/elaine/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:902
    latency_us = 125951
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:60307
    req_id = fdcd66d1-556d-48fa-a051-eaf55feefcfe
    response_code = 500
    uri = /v1/system/networking/bgp?name_or_id=1c013259-97ff-426f-8414-7cacf08dcae4

I tried to delete the same bgp config that has the description hello5, but now referencing it by name.

$ oxide system networking bgp config delete --name-or-id abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz12345678901
error
Error Response: status: 500 Internal Server Error; headers: {"content-type": "application/json", "x-request-id": "b727f3ca-7423-4262-82ac-afee109f8e0c", "content-length": "124", "date": "Thu, 29 Aug 2024 00:35:14 GMT"}; value: Error { error_code: Some("Internal"), message: "Internal Server Error", request_id: "b727f3ca-7423-4262-82ac-afee109f8e0c" }

On a4x2 g3:

root@oxz_nexus_504e4b3c:~# cat $(svcs -L nexus) | grep b727f3ca-7423-4262-82ac-afee109f8e0c | looker
...
00:35:15.000Z ERRO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): failed to lookup bgp config by name
    actor_id = 6cdb2ad6-e115-4f19-b6ec-2d6ee12f3ab9
    authenticated = true
    error = DatabaseError(Unknown, "query `SELECT \\"bgp_config\\".\\"id\\" FROM \\"bgp_config\\" WHERE (\\"bgp_config\\".\\"name\\" = $1) LIMIT $2` contains a full table/index scan which is explicitly disallowed")
    file = nexus/db-queries/src/db/datastore/bgp.rs:273
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:61444
    req_id = b727f3ca-7423-4262-82ac-afee109f8e0c
    uri = /v1/system/networking/bgp?name_or_id=abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz12345678901
00:35:15.002Z ERRO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): bgp_config_delete failed
    actor_id = 6cdb2ad6-e115-4f19-b6ec-2d6ee12f3ab9
    authenticated = true
    error = InternalError { internal_message: "failed to lookup bgp config by name" }
    file = nexus/db-queries/src/db/datastore/bgp.rs:312
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:61444
    req_id = b727f3ca-7423-4262-82ac-afee109f8e0c
    uri = /v1/system/networking/bgp?name_or_id=abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz12345678901
00:35:15.002Z INFO 504e4b3c-a322-419f-9d48-c580888edcd5 (dropshot_external): request completed
    error_message_external = Internal Server Error
    error_message_internal = failed to lookup bgp config by name
    file = /home/elaine/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:902
    latency_us = 78054
    local_addr = 172.30.2.6:80
    method = DELETE
    remote_addr = 172.20.2.90:61444
    req_id = b727f3ca-7423-4262-82ac-afee109f8e0c
    response_code = 500
    uri = /v1/system/networking/bgp?name_or_id=abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz12345678901
internet-diglett commented 2 months ago

Looks like we're missing an index

error = DatabaseError(Unknown, "query `SELECT \\"bgp_config\\".\\"id\\" FROM \\"bgp_config\\" WHERE (\\"bgp_config\\".\\"name\\" = $1) LIMIT $2` contains a full table/index scan which is explicitly disallowed")