quantcast / qfs

Quantcast File System
https://quantcast.atlassian.net
Apache License 2.0
643 stars 171 forks source link

webUI: add /query/meta endpoint and parse watchdog and vrStatus #246

Closed kristi closed 4 years ago

kristi commented 4 years ago

Add /query/meta endpoint to webui which returns a json version of the status. Parse rebalanceStatus and watchdog stats in webui.

Json status makes it easy for scripts to query the status of qfs.

Example response from http://localhost:22000/query/meta

{
  "evacuatingServers": [],
  "freeFsSpace": 183537364992,
  "serversByRack": {},
  "retiringServers": [],
  "config": {
    "metaServer.chunkServerPort": "20100",
    "metaServer.clientPort": "20000",
    "metaServer.clientThreadCount": "4",
    "metaServer.clusterKey": "myTestCluster",
    "metaServer.cpDir": "/Users/ktsukida/qfsbase/meta/checkpoints",
    "metaServer.log.logDir": "/Users/ktsukida/qfsbase/meta/logs",
    "metaServer.logDir": "/Users/ktsukida/qfsbase/meta/logs",
    "metaServer.minChunkservers": "1",
    "metaServer.msgLogWriter.logLevel": "DEBUG",
    "metaServer.msgLogWriter.maxLogFileSize": "1e6",
    "metaServer.msgLogWriter.maxLogFiles": "10",
    "metaServer.pidFile": "/Users/ktsukida/qfsbase/meta/metaserver.pid",
    "metaServer.recoveryInterval": "1",
    "metaServer.rootDirGroup": "20",
    "metaServer.rootDirMode": "0777",
    "metaServer.rootDirUser": "503"
  },
  "numReallyDownServers": 0,
  "vrStatus": {},
  "canNotBeUsedForPlacement": 0,
  "upServers": [
    {
      "load": 0,
      "overloaded": false,
      "ip": "127.0.0.1",
      "used": 0,
      "down": 0,
      "numDrives": 2,
      "replay": 0,
      "chunks": 0,
      "nblocks": "0",
      "nevacuate": 0,
      "numReplications": 0,
      "total": 250685575168,
      "retiring": 0,
      "nchunksToMove": "0",
      "port": "21001",
      "numWritableDrives": 2,
      "tiersCount": 1,
      "good": "1",
      "free": 61179121664,
      "util": 75.6,
      "nwrites": 0,
      "host": "127.0.0.1",
      "connected": 1,
      "nlost": "0",
      "tiers": "15:1:0:0:6.12e+10:2.51e+11:75.60",
      "displayName": "127.0.0.1:21001",
      "ncorrupt": 0,
      "md5sum": "e9b59b05d4d5108573e7d6c2b05e7d1b",
      "lostChunkDirs": "",
      "bytesevacuate": 0,
      "rack": 0,
      "numReadReplications": 0,
      "stopped": 0,
      "lastheard": 1
    },
    {
      "load": 0,
      "overloaded": false,
      "ip": "127.0.0.1",
      "used": 0,
      "down": 0,
      "numDrives": 1,
      "replay": 0,
      "chunks": 0,
      "nblocks": "0",
      "nevacuate": 0,
      "numReplications": 0,
      "total": 250685575168,
      "retiring": 0,
      "nchunksToMove": "0",
      "port": "21002",
      "numWritableDrives": 1,
      "tiersCount": 1,
      "good": "1",
      "free": 61179121664,
      "util": 75.6,
      "nwrites": 0,
      "host": "127.0.0.1",
      "connected": 1,
      "nlost": "0",
      "tiers": "15:1:0:0:6.12e+10:2.51e+11:75.60",
      "displayName": "127.0.0.1:21002",
      "ncorrupt": 0,
      "md5sum": "e9b59b05d4d5108573e7d6c2b05e7d1b",
      "lostChunkDirs": "",
      "bytesevacuate": 0,
      "rack": 0,
      "numReadReplications": 0,
      "stopped": 0,
      "lastheard": 1
    },
    {
      "load": 0,
      "overloaded": false,
      "ip": "127.0.0.1",
      "used": 0,
      "down": 0,
      "numDrives": 1,
      "replay": 0,
      "chunks": 0,
      "nblocks": "0",
      "nevacuate": 0,
      "numReplications": 0,
      "total": 250685575168,
      "retiring": 0,
      "nchunksToMove": "0",
      "port": "21003",
      "numWritableDrives": 1,
      "tiersCount": 1,
      "good": "1",
      "free": 61179121664,
      "util": 75.6,
      "nwrites": 0,
      "host": "127.0.0.1",
      "connected": 1,
      "nlost": "0",
      "tiers": "15:1:0:0:6.12e+10:2.51e+11:75.60",
      "displayName": "127.0.0.1:21003",
      "ncorrupt": 0,
      "md5sum": "e9b59b05d4d5108573e7d6c2b05e7d1b",
      "lostChunkDirs": "",
      "bytesevacuate": 0,
      "rack": 0,
      "numReadReplications": 0,
      "stopped": 0,
      "lastheard": 1
    }
  ],
  "tiersColumnNames": [
    "rack",
    "tier",
    "devices",
    "wr-chunks",
    "chunks",
    "space-available",
    "total-space",
    "%util.",
    "candidates"
  ],
  "downServers": [
    {
      "displayName": "127.0.0.1:21001",
      "host": "127.0.0.1",
      "down": "Mon Jul 20 07:03:51 2020",
      "reason": "replay: start servicing",
      "port": "21001",
      "stillDown": 0
    },
    {
      "displayName": "127.0.0.1:21002",
      "host": "127.0.0.1",
      "down": "Mon Jul 20 07:03:51 2020",
      "reason": "replay: start servicing",
      "port": "21002",
      "stillDown": 0
    },
    {
      "displayName": "127.0.0.1:21003",
      "host": "127.0.0.1",
      "down": "Mon Jul 20 07:03:51 2020",
      "reason": "replay: start servicing",
      "port": "21003",
      "stillDown": 0
    }
  ],
  "watchdog": {
    "wd.polls": "0",
    "wd.timeouts": "0",
    "wd.timerOverruns": "0",
    "wd.timerOverrunsUsecs": "0",
    "wd.0.name": "main",
    "wd.0.polls": "0",
    "wd.0.timeouts": "0",
    "wd.0.totalTimeouts": "0",
    "wd.0.changedAgoUsec": "1595253843378996",
    "wd.1.name": "LogWriter",
    "wd.1.polls": "0",
    "wd.1.timeouts": "0",
    "wd.1.totalTimeouts": "0",
    "wd.1.changedAgoUsec": "1595253843378998",
    "wd.2.name": "client",
    "wd.2.polls": "0",
    "wd.2.timeouts": "0",
    "wd.2.totalTimeouts": "0",
    "wd.2.changedAgoUsec": "1595253843379000",
    "wd.3.name": "client",
    "wd.3.polls": "0",
    "wd.3.timeouts": "0",
    "wd.3.totalTimeouts": "0",
    "wd.3.changedAgoUsec": "1595253843379002",
    "wd.4.name": "client",
    "wd.4.polls": "0",
    "wd.4.timeouts": "0",
    "wd.4.totalTimeouts": "0",
    "wd.4.changedAgoUsec": "1595253843379004",
    "wd.5.name": "client",
    "wd.5.polls": "0",
    "wd.5.timeouts": "0",
    "wd.5.totalTimeouts": "0",
    "wd.5.changedAgoUsec": "1595253843379006"
  },
  "systemInfo": {
    "logDiskWriteByteCount": 774,
    "csmapEntryBytes": 0,
    "dentrySize": 64,
    "maxClients": 13230,
    "sourceVersion": "cc3ac568-git@github.com:kristi/qfs.git/feature/spelling-timeouts@cc3ac568c3706a0a91211598d11c4e1f109732fd",
    "dentries": 13,
    "objStoreDeletes": 0,
    "totalSpace": 752056725504,
    "sumOfLogicalFileSizes": 0,
    "hibernatedServerCount": 0,
    "internalNodeSize": 4096,
    "totalBuffers": 262144,
    "usedSpace": 0,
    "vrPrimaryNodeId": -1,
    "fattrs": 5,
    "bTreeHeight": 1,
    "log10SecAvgUsec": 30,
    "csmapEntryAllocs": 0,
    "fattrAllocSize": 8388624,
    "cinfos": 0,
    "appendCacheSize": 0,
    "isInRecovery": false,
    "pendingRecovery": 0,
    "log15SecAvgUsec": 27,
    "usedBuffers": 6,
    "freeFsSpace": 183537364992,
    "csMaxGoodSlaveLoadAvg": 0,
    "startedAt": " Mon Jul 20 07:03:50 2020",
    "writableDrives": 4,
    "internalNodeAllocSize": 8388624,
    "buildVersion": "cc3ac568-cc3ac568c3706a0a91211598d11c4e1f109732fd-RelWithDebInfo-BF894F67",
    "log10SecAvgReqRate": 77,
    "replicationBacklog": 0,
    "csmapNodeSize": 72,
    "csMaxGoodMasterLoadAvg": 0,
    "logDiskWriteCount": 5,
    "logAvgReqRateDiv": 256,
    "maxChunkServers": 2016,
    "csMastersToRestart": 0,
    "logOpWrite10SecAvgUsec": 1,
    "objStoreDeletesInFlight": 0,
    "vrNodeId": -1,
    "chunks": 0,
    "logTimeUsec": 2299,
    "sockets": 17,
    "totalDrives": 4,
    "objStoreEnabled": 0,
    "internalNodes": 1,
    "allocatedRequests": 6,
    "logDiskWriteUsec": 145,
    "csToRestart": 0,
    "fattrSize": 104,
    "delayedRecovery": 0,
    "csMaxGoodCandidateLoadAvg": 0,
    "objStoreDeletesRetry": 0,
    "goodMasters": 2,
    "log5SecAvgUsec": 23,
    "clients": 1,
    "logPendingOpsCount": 0,
    "log15SecAvgReqRate": 73,
    "logExceedQueueDepthFailedCount": 0,
    "fileCount": 1,
    "csmapAllocSize": 0,
    "logTimeOpsCount": 11,
    "cinfoAllocSize": 0,
    "goodSlaves": 1,
    "logOpWrite15SecAvgUsec": 1,
    "chunkServers": 3,
    "cinfoSize": 40,
    "logOpWrite5SecAvgUsec": 0,
    "uptime": 13,
    "pendingReplication": 0,
    "csmapNodes": 0,
    "logExceedLogQueueDepthFailureCount300SecAvg": 0,
    "dirCount": 4,
    "replicationsCheck": 0,
    "objStoreDeletesStartedAgo": 0,
    "logTotalRequestCount": 11,
    "vrPrimaryFlag": 1,
    "vrActiveFlag": 1,
    "wormMode": "Disabled",
    "dentryAllocSize": 8388624,
    "log5SecAvgReqRate": 53,
    "replications": 0,
    "fileSystemId": 6106949049890591000,
    "logPendingAckByteCount": 0
  },
  "rebalanceStatus": {
    "RoundCount": " 0",
    "NoSource": " 0",
    "ServerNeeded": " 0",
    "NoServerFound": " 0",
    "RackNeeded": " 0",
    "NoRackFound": " 0",
    "NonLoadedServerNeeded": " 0",
    "NoNonLoadedServerFound": " 0",
    "Ok": " 0",
    "Scanned": " 0",
    "Busy": " 0",
    "BusyOther": " 0",
    "ReplicationStarted": " 0",
    "NoReplicationStarted": " 0",
    "ScanTimeout": " 0",
    "TotalNoSource": " 0",
    "TotalServerNeeded": " 0",
    "TotalNoServerFound": " 0",
    "TotalRackNeeded": " 0",
    "TotalNoRackFound": " 0",
    "TotalNonLoadedServerNeeded": " 0",
    "TotalNoNonLoadedServerFound": " 0",
    "TotalOk": " 0",
    "TotalScanned": " 0",
    "TotalBusy": " 0",
    "TotalBusyOther": " 0",
    "TotalReplicationStarted": " 0",
    "TotalNoReplicationStarted": " 0",
    "TotalScanTimeout": " 0",
    "Plan": " 0",
    "PlanNoDest": " 0",
    "PlanTimeout": " 0",
    "PlanScanned": " 0",
    "PlanNoChunk": " 0",
    "PlanNoSrc": " 0",
    "PlanBusy": " 0",
    "PlanBusyOther": " 0",
    "PlanCannotMove": " 0",
    "PlanReplicationStarted": " 0",
    "PlanNoReplicationStarted": " 0",
    "PlanLine": " 0",
    "PlanNoServer": " 0",
    "PlanAdded": " 0",
    "TotalPlanNoDest": " 0",
    "TotalPlanTimeout": " 0",
    "TotalPlanScanned": " 0",
    "TotalPlanNoChunk": " 0",
    "TotalPlanNoSrc": " 0",
    "TotalPlanBusy": " 0",
    "TotalPlanBusyOther": " 0",
    "TotalPlanCannotMove": " 0",
    "TotalPlanReplicationStarted": " 0",
    "TotalPlanNoReplicationStarted": " 0",
    "TotalPlanLine": " 0",
    "TotalPlanNoServer": " 0",
    "TotalPlanAdded": " 0"
  },
  "tiersInfo": [
    "all",
    "15",
    "3",
    "0",
    "0",
    "183537364992",
    "752056725504",
    "75.60",
    "3",
    "0",
    "15",
    "3",
    "0",
    "0",
    "183537364992",
    "752056725504",
    "75.60",
    "3"
  ],
  "goodNoRackAssignedCount": 0
}