spandex-project / spandex_phoenix

Phoenix Instrumentation tracer
MIT License
82 stars 29 forks source link

Errors not being propogated to the top span #59

Open aselder opened 2 years ago

aselder commented 2 years ago

I'm using Phoenix 1.6 and the latest version all spandex stuff.

With the new telemetry setup, if I have an error occur inside the router_dispatch span, this span is correctly marked as having an error, but this does not propogate up the top level span, so DataDog doesn't mark the overall request as an error case:

[
    {
        "duration": 24492000,
        "error": 1,
        "meta": {
            "version": "unknown",
            "error.msg": "** (ArithmeticError) bad argument in arithmetic expression",
            "error.stack": "    (tenant_web 0.1.0) lib/tenant_web/controllers/root_controller.ex:5: TenantWeb.RootController.index/2\n    (tenant_web 0.1.0) lib/tenant_web/controllers/root_controller.ex:1: TenantWeb.RootController.action/2\n    (tenant_web 0.1.0) lib/tenant_web/controllers/root_controller.ex:1: TenantWeb.RootController.phoenix_controller_pipeline/2\n    (phoenix 1.6.9) lib/phoenix/router.ex:354: Phoenix.Router.__call__/2\n    (tenant_web 0.1.0) lib/tenant_web/endpoint.ex:1: TenantWeb.Endpoint.plug_builder_call/2\n    (tenant_web 0.1.0) lib/plug/debugger.ex:136: TenantWeb.Endpoint.\"call (overridable 3)\"/2\n    (tenant_web 0.1.0) lib/tenant_web/endpoint.ex:1: TenantWeb.Endpoint.call/2\n    (phoenix 1.6.9) lib/phoenix/endpoint/cowboy2_handler.ex:54: Phoenix.Endpoint.Cowboy2Handler.init/4\n    (cowboy 2.9.0) /Users/aselder/Development/vulcan/deps/cowboy/src/cowboy_handler.erl:37: :cowboy_handler.execute/2\n    (cowboy 2.9.0) /Users/aselder/Development/vulcan/deps/cowboy/src/cowboy_stream_h.erl:306: :cowboy_stream_h.execute/3\n    (cowboy 2.9.0) /Users/aselder/Development/vulcan/deps/cowboy/src/cowboy_stream_h.erl:295: :cowboy_stream_h.request_process/3\n    (stdlib 3.17.1) proc_lib.erl:226: :proc_lib.init_p_do_apply/3\n",
            "error.type": "Elixir.Plug.Conn.WrapperError"
        },
        "metrics": {
            "_sampling_priority_v1": 1
        },
        "name": "phx.router_dispatch",
        "parent_id": 411396100482281988,
        "resource": "Elixir.TenantWeb.RootController.index",
        "service": "tenant_web",
        "span_id": 1808697012183516486,
        "start": 1653586578512393000,
        "trace_id": 5900429464773048937,
        "type": "web"
    },
    {
        "duration": 1303333,
        "error": 0,
        "meta": {
            "param_count": "1",
            "version": "unknown",
            "sql.db": "repo_database",
            "sql.query": "SELECT t0.\"id\", t0.\"domain\", t0.\"url\", t0.\"name\", t0.\"business_unit_identifier\", t0.\"brand_id\", t0.\"region_id\", t0.\"datacenter\", t0.\"inserted_at\", t0.\"updated_at\" FROM \"tenants\" AS t0 WHERE (t0.\"domain\" = $1)",
            "sql.rows": "0"
        },
        "metrics": {
            "_sampling_priority_v1": 1
        },
        "name": "query",
        "parent_id": 1808697012183516486,
        "resource": "SELECT t0.\"id\", t0.\"domain\", t0.\"url\", t0.\"name\", t0.\"business_unit_identifier\", t0.\"brand_id\", t0.\"region_id\", t0.\"datacenter\", t0.\"inserted_at\", t0.\"updated_at\" FROM \"tenants\" AS t0 WHERE (t0.\"domain\" = $1)",
        "service": "core_db",
        "span_id": 6689573463970486841,
        "start": 1653586578529524667,
        "trace_id": 5900429464773048937,
        "type": "db"
    },
    {
        "duration": 2042,
        "error": 0,
        "meta": {
            "param_count": "1",
            "version": "unknown",
            "sql.db": "repo_database",
            "sql.query": "SELECT t0.\"id\", t0.\"domain\", t0.\"url\", t0.\"name\", t0.\"business_unit_identifier\", t0.\"brand_id\", t0.\"region_id\", t0.\"datacenter\", t0.\"inserted_at\", t0.\"updated_at\" FROM \"tenants\" AS t0 WHERE (t0.\"domain\" = $1)",
            "sql.rows": "0"
        },
        "metrics": {
            "_sampling_priority_v1": 1
        },
        "name": "decode",
        "parent_id": 6689573463970486841,
        "resource": "SELECT t0.\"id\", t0.\"domain\", t0.\"url\", t0.\"name\", t0.\"business_unit_identifier\", t0.\"brand_id\", t0.\"region_id\", t0.\"datacenter\", t0.\"inserted_at\", t0.\"updated_at\" FROM \"tenants\" AS t0 WHERE (t0.\"domain\" = $1)",
        "service": "core_db",
        "span_id": 5038131275005273517,
        "start": 1653586578530825958,
        "trace_id": 5900429464773048937,
        "type": "db"
    },
    {
        "duration": 575416,
        "error": 0,
        "meta": {
            "param_count": "1",
            "version": "unknown",
            "sql.db": "repo_database",
            "sql.query": "SELECT t0.\"id\", t0.\"domain\", t0.\"url\", t0.\"name\", t0.\"business_unit_identifier\", t0.\"brand_id\", t0.\"region_id\", t0.\"datacenter\", t0.\"inserted_at\", t0.\"updated_at\" FROM \"tenants\" AS t0 WHERE (t0.\"domain\" = $1)",
            "sql.rows": "0"
        },
        "metrics": {
            "_sampling_priority_v1": 1
        },
        "name": "run_query",
        "parent_id": 6689573463970486841,
        "resource": "SELECT t0.\"id\", t0.\"domain\", t0.\"url\", t0.\"name\", t0.\"business_unit_identifier\", t0.\"brand_id\", t0.\"region_id\", t0.\"datacenter\", t0.\"inserted_at\", t0.\"updated_at\" FROM \"tenants\" AS t0 WHERE (t0.\"domain\" = $1)",
        "service": "core_db",
        "span_id": 8956401152100997921,
        "start": 1653586578530250542,
        "trace_id": 5900429464773048937,
        "type": "db"
    },
    {
        "duration": 725875,
        "error": 0,
        "meta": {
            "param_count": "1",
            "version": "unknown",
            "sql.db": "repo_database",
            "sql.query": "SELECT t0.\"id\", t0.\"domain\", t0.\"url\", t0.\"name\", t0.\"business_unit_identifier\", t0.\"brand_id\", t0.\"region_id\", t0.\"datacenter\", t0.\"inserted_at\", t0.\"updated_at\" FROM \"tenants\" AS t0 WHERE (t0.\"domain\" = $1)",
            "sql.rows": "0"
        },
        "metrics": {
            "_sampling_priority_v1": 1
        },
        "name": "queue",
        "parent_id": 6689573463970486841,
        "resource": "SELECT t0.\"id\", t0.\"domain\", t0.\"url\", t0.\"name\", t0.\"business_unit_identifier\", t0.\"brand_id\", t0.\"region_id\", t0.\"datacenter\", t0.\"inserted_at\", t0.\"updated_at\" FROM \"tenants\" AS t0 WHERE (t0.\"domain\" = $1)",
        "service": "core_db",
        "span_id": 5622023597197703239,
        "start": 1653586578529524667,
        "trace_id": 5900429464773048937,
        "type": "db"
    },
    {
        "duration": 103833000,
        "error": 0,
        "meta": {
            "version": "unknown",
            "http.method": "GET",
            "http.status_code": "500",
            "http.url": "/"
        },
        "metrics": {
            "_sampling_priority_v1": 1
        },
        "name": "request",
        "parent_id": null,
        "resource": "GET /",
        "service": "tenant_web",
        "span_id": 411396100482281988,
        "start": 1653586578471186000,
        "trace_id": 5900429464773048937,
        "type": "web"
    }
]

Notice that the top span at the end the span list has error: 0. A failure should cascade up and make the entire request as an error.

atonse commented 2 years ago

We are experiencing the same thing. Latest versions of all the hexes, and the router_dispatch is the one the error gets attached to.

boon-wego commented 2 years ago

We are experiencing this too. Have reverted library upgrade until this is addressed

ludwikbukowski commented 2 years ago

The same. Can we expect some fixes?