project-receptor / python-receptor

Project Receptor is a flexible multi-service relayer with remote execution and orchestration capabilities linking controllers with executors across a mesh of nodes.
Other
32 stars 21 forks source link

Adding a diagnostics subsystem #193

Closed jhjaggars closed 4 years ago

jhjaggars commented 4 years ago

Adds a process that produces a periodic exposition of diagnostic data in $DATA_DIR/diagnostics.json.

The data looks something like this:

{
  "config": {
    "default_node_id": "node",
    "default_config": null,
    "default_data_dir": "/Users/jjaggars/.receptor",
    "default_debug": null,
    "default_max_workers": null,
    "default_logging_format": null,
    "auth_server_cert": null,
    "auth_server_key": null,
    "auth_server_ca_bundle": null,
    "auth_client_cert": null,
    "auth_client_key": null,
    "auth_client_verification_ca": null,
    "auth_server_cipher_list": null,
    "auth_client_cipher_list": null,
    "node_listen": null,
    "node_peers": [
      "rnp://localhost:8888"
    ],
    "node_server_disable": true,
    "node_stats_enable": null,
    "node_stats_port": null,
    "node_keepalive_interval": null,
    "node_groups": null,
    "node_ws_extra_headers": null,
    "func": "receptor.entrypoints.run_as_node",
    "ephemeral": false
  },
  "node_id": "node",
  "datetime": "2020-03-27T18:35:35.456782",
  "recent_errors": [
    {
      "@message": "['  File \"/Users/jjaggars/code/receptor/receptor/work.py\", line 102, in handle\\n    action_method, namespace = self.get_action_method(directive)\\n', '  File \"/Users/jjaggars/code/receptor/receptor/work.py\", line 80, in get_action_method\\n    worker_module = self.load_receptor_worker(namespace)\\n', '  File \"/Users/jjaggars/code/receptor/receptor/work.py\", line 35, in load_receptor_worker\\n    raise exceptions.UnknownDirective(f\"Error loading directive handlers for receptor.work\")\\n']",
      "@timestamp": "2020-03-27T18:35:35.456837",
      "@source_host": "Jesses-MBP-2.lan",
      "@fields": {
        "name": "receptor.work",
        "levelname": "ERROR",
        "levelno": 40,
        "pathname": "/Users/jjaggars/code/receptor/receptor/work.py",
        "filename": "work.py",
        "module": "work",
        "lineno": 135,
        "funcName": "handle",
        "created": 1585330427.289694,
        "msecs": 289.69407081604004,
        "relativeCreated": 9191.896915435791,
        "thread": 4703620544,
        "threadName": "MainThread",
        "processName": "MainProcess",
        "process": 92966,
        "node_id": "node",
        "message": "['  File \"/Users/jjaggars/code/receptor/receptor/work.py\", line 102, in handle\\n    action_method, namespace = self.get_action_method(directive)\\n', '  File \"/Users/jjaggars/code/receptor/receptor/work.py\", line 80, in get_action_method\\n    worker_module = self.load_receptor_worker(namespace)\\n', '  File \"/Users/jjaggars/code/receptor/receptor/work.py\", line 35, in load_receptor_worker\\n    raise exceptions.UnknownDirective(f\"Error loading directive handlers for {name}\")\\n']",
        "asctime": "2020-03-27 13:33:47,289"
      }
    },
    {
      "@message": "Error encountered while handling the response, replying with\n                    an error message (Error loading directive handlers for nop)",
      "@timestamp": "2020-03-27T18:35:35.456873",
      "@source_host": "Jesses-MBP-2.lan",
      "@fields": {
        "name": "receptor.work",
        "levelname": "ERROR",
        "levelno": 40,
        "pathname": "/Users/jjaggars/code/receptor/receptor/work.py",
        "filename": "work.py",
        "module": "work",
        "lineno": 133,
        "funcName": "handle",
        "created": 1585330427.286861,
        "msecs": 286.8609428405762,
        "relativeCreated": 9189.063787460327,
        "thread": 4703620544,
        "threadName": "MainThread",
        "processName": "MainProcess",
        "process": 92966,
        "node_id": "node",
        "message": "Error encountered while handling the response, replying with\n                    an error message (Error loading directive handlers for nop)",
        "asctime": "2020-03-27 13:33:47,286"
      }
    }
  ],
  "connections": [
    {
      "address": "127.0.0.1",
      "port": 8888,
      "compression": null,
      "cipher": null,
      "peercert": null,
      "sslcontext": null,
      "closed": false,
      "chunk_size": 65536,
      "node_id": "controller",
      "capabilities": {
        "worker_versions": {
          "receptor_stresstest": "1.0.0",
          "receptor_file": "0.1.0"
        },
        "max_work_threads": 8
      }
    }
  ],
  "routes": {
    "nodes": [
      "controller"
    ],
    "edges": [
      {
        "left": "controller",
        "right": "node",
        "cost": 1
      }
    ],
    "neighbors": [
      {
        "node_id": "controller",
        "items": [
          "node"
        ]
      },
      {
        "node_id": "node",
        "items": [
          "controller"
        ]
      }
    ],
    "table": [
      {
        "destination_node_id": "controller",
        "next_hop": "controller",
        "cost": 1
      }
    ]
  },
  "tasks": [
    {
      "state": "PENDING",
      "items": [
        {
          "name": "receptor.receptor.Receptor.shutdown_handler",
          "stack": [
            {
              "filename": "/Users/jjaggars/code/receptor/receptor/receptor.py",
              "line": "await asyncio.sleep(1)",
              "lineno": 206
            }
          ]
        },
        {
          "name": "receptor.connection.base.Worker.receive",
          "stack": [
            {
              "filename": "/Users/jjaggars/code/receptor/receptor/connection/base.py",
              "line": "async for msg in self.conn:",
              "lineno": 60
            }
          ]
        },
        {
          "name": "receptor.connection.base.Worker.watch_queue",
          "stack": [
            {
              "filename": "/Users/jjaggars/code/receptor/receptor/connection/base.py",
              "line": "item = await asyncio.wait_for(self.outbound.get(), 5.0)",
              "lineno": 104
            }
          ]
        },
        {
          "name": "receptor.connection.sock.connect",
          "stack": [
            {
              "filename": "/Users/jjaggars/code/receptor/receptor/connection/sock.py",
              "line": "await worker.client(t)",
              "lineno": 59
            }
          ]
        },
        {
          "name": "receptor.diagnostics.status",
          "stack": [
            {
              "filename": "/Users/jjaggars/Library/Caches/pypoetry/virtualenvs/receptor-7LbJpkJb-py3.6/bin/receptor",
              "line": "load_entry_point('receptor', 'console_scripts', 'receptor')()",
              "lineno": 11
            }
          ]
        },
        {
          "name": "receptor.buffers.file.DurableBuffer.manifest_writer",
          "stack": [
            {
              "filename": "/Users/jjaggars/code/receptor/receptor/buffers/file.py",
              "line": "await self._manifest_dirty.wait()",
              "lineno": 129
            }
          ]
        },
        {
          "name": "receptor.receptor.Receptor.message_handler",
          "stack": [
            {
              "filename": "/Users/jjaggars/code/receptor/receptor/receptor.py",
              "line": "data = await buf.get()",
              "lineno": 136
            }
          ]
        },
        {
          "name": "receptor.buffers.file.DurableBuffer.get",
          "stack": [
            {
              "filename": "/Users/jjaggars/code/receptor/receptor/buffers/file.py",
              "line": "item = await self.q.get()",
              "lineno": 74
            }
          ]
        },
        {
          "name": "receptor.receptor.Manifest.watch_expire",
          "stack": [
            {
              "filename": "/Users/jjaggars/code/receptor/receptor/receptor.py",
              "line": "await asyncio.sleep(600)",
              "lineno": 58
            }
          ]
        },
        {
          "name": "asyncio.locks.Event.wait",
          "stack": [
            {
              "filename": "/Users/jjaggars/.pyenv/versions/3.6.9/lib/python3.6/asyncio/locks.py",
              "line": "yield from fut",
              "lineno": 283
            }
          ]
        }
      ]
    }
  ],
  "metrics": [
    {
      "name": "python_gc_objects_collected",
      "type": "counter",
      "samples": [
        {
          "name": "python_gc_objects_collected_total",
          "labels": {
            "generation": "0"
          },
          "value": 13615,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "python_gc_objects_collected_total",
          "labels": {
            "generation": "1"
          },
          "value": 2610,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "python_gc_objects_collected_total",
          "labels": {
            "generation": "2"
          },
          "value": 691,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "python_gc_objects_uncollectable",
      "type": "counter",
      "samples": [
        {
          "name": "python_gc_objects_uncollectable_total",
          "labels": {
            "generation": "0"
          },
          "value": 0,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "python_gc_objects_uncollectable_total",
          "labels": {
            "generation": "1"
          },
          "value": 0,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "python_gc_objects_uncollectable_total",
          "labels": {
            "generation": "2"
          },
          "value": 0,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "python_gc_collections",
      "type": "counter",
      "samples": [
        {
          "name": "python_gc_collections_total",
          "labels": {
            "generation": "0"
          },
          "value": 125,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "python_gc_collections_total",
          "labels": {
            "generation": "1"
          },
          "value": 11,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "python_gc_collections_total",
          "labels": {
            "generation": "2"
          },
          "value": 1,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "python_info",
      "type": "gauge",
      "samples": [
        {
          "name": "python_info",
          "labels": {
            "version": "3.6.9",
            "implementation": "CPython",
            "major": "3",
            "minor": "6",
            "patchlevel": "9"
          },
          "value": 1,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "bytes_recv",
      "type": "counter",
      "samples": [
        {
          "name": "bytes_recv_total",
          "labels": {},
          "value": 2948,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "bytes_recv_created",
          "labels": {},
          "value": 1585330418.2000089,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "incoming_messages",
      "type": "counter",
      "samples": [
        {
          "name": "incoming_messages_total",
          "labels": {},
          "value": 1,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "incoming_messages_created",
          "labels": {},
          "value": 1585330418.2000299,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "connected_peers",
      "type": "gauge",
      "samples": [
        {
          "name": "connected_peers",
          "labels": {},
          "value": 1,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "work_events",
      "type": "counter",
      "samples": [
        {
          "name": "work_events_total",
          "labels": {},
          "value": 0,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "work_events_created",
          "labels": {},
          "value": 1585330418.200063,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "active_work",
      "type": "gauge",
      "samples": [
        {
          "name": "active_work",
          "labels": {},
          "value": 0,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "route_events",
      "type": "counter",
      "samples": [
        {
          "name": "route_events_total",
          "labels": {},
          "value": 1,
          "timestamp": null,
          "exemplar": null
        },
        {
          "name": "route_events_created",
          "labels": {},
          "value": 1585330418.200086,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "routing_table",
      "type": "info",
      "samples": [
        {
          "name": "routing_table_info",
          "labels": {
            "edges": "{('controller', 'node', 1)}"
          },
          "value": 1,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "receptor_info",
      "type": "info",
      "samples": [
        {
          "name": "receptor_info_info",
          "labels": {
            "node_id": "node",
            "receptor_version": "1.0.0"
          },
          "value": 1,
          "timestamp": null,
          "exemplar": null
        }
      ]
    },
    {
      "name": "worker_info",
      "type": "info",
      "samples": [
        {
          "name": "worker_info_info",
          "labels": {
            "plugins": "{'worker_versions': {'receptor_stresstest': '1.0.0', 'receptor_file': '0.1.0'}, 'max_work_threads': 8}"
          },
          "value": 1,
          "timestamp": null,
          "exemplar": null
        }
      ]
    }
  ]
}