scylladb / scylla-manager

The Scylla Manager
https://manager.docs.scylladb.com/stable/
Other
52 stars 34 forks source link

SM creates way too many clients #3935

Closed Michal-Leszczynski closed 2 months ago

Michal-Leszczynski commented 3 months ago

This are two fragments of SM logs:

75 node cluster ``` Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.061Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.064Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.065Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.065Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.067Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.071Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.071Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.089Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.090Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.091Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.092Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.092Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.093Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.093Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.094Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.095Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.096Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.097Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.097Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.099Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.099Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.100Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.100Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.101Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.102Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.120Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.125Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.137Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.137Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.138Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.141Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.143Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.146Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.148Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.149Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.154Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.155Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.158Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.159Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.164Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.164Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.164Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.165Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.169Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.170Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.187Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.189Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.190Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.190Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.191Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.191Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.192Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.192Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.193Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.198Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.200Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.202Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.204Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.205Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.206Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.254Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.312Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.328Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.344Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.353Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.356Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.406Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.445Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.456Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.457Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.460Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.474Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.474Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.477Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} Jun 12 18:46:26 : {"L":"INFO","T":"2024-06-12T18:46:26.478Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"oqoiPrLESQSih5IIEVoVGQ"} ```
12 node cluster ``` Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:09:10 {"L":"INFO","T":"2024-07-06T07:09:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"aVF99MYbQD-Z_5WJNtTLEw"} Jul 06 07:10:40 {"L":"INFO","T":"2024-07-06T07:10:40.184Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"H_17oH_3TqW6Ft7ely3Bbw"} Jul 06 07:15:40 {"L":"INFO","T":"2024-07-06T07:15:40.184Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"H_17oH_3TqW6Ft7ely3Bbw"} Jul 06 07:20:40 {"L":"INFO","T":"2024-07-06T07:20:40.184Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"H_17oH_3TqW6Ft7ely3Bbw"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.005Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.007Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.007Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.007Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.007Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.007Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} Jul 06 07:25:10 {"L":"INFO","T":"2024-07-06T07:25:10.006Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"","_trace_id":"XZ9ySWmCRLKKxWOmxvUE-g"} ```

It looks like SM is creating a client per node. My suspicion is that something strange happens with the config cache service.

@karol-kokoszka

karol-kokoszka commented 3 months ago

@Michal-Leszczynski yes, seems that it creates client per single host update. It could be shared among all hosts. https://github.com/scylladb/scylla-manager/blob/6c5b723b72a5128a6c6cbb4f66f474dbf51396c0/pkg/service/configcache/service.go#L152-L192

^^ It could accept client and just reuse it.

Michal-Leszczynski commented 3 months ago

@Michal-Leszczynski yes, seems that it creates client per single host update. It could be shared among all hosts.

I don't get it. The code pasted above shows that the client is created per cluster and not per host. Or am I missing something?

karol-kokoszka commented 3 months ago

It's for every host update.

karol-kokoszka commented 3 months ago

Actually you are right, it's per cluster.

karol-kokoszka commented 3 months ago

refinement notes

We just need to do the test and log information in the test environment on who triggered the client creation. It can be even simple stack trace dump.

import(
   "runtime/debug"
)
...    
debug.PrintStack()
Michal-Leszczynski commented 2 months ago
Logs fragment [SCT run](https://jenkins.scylladb.com/view/scylla-manager/job/manager-master/job/ubuntu22-sanity-test/518/) ``` Aug 06 21:37:36 {"L":"INFO","T":"2024-08-06T21:37:36.009Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"9b7189c3-0320-4bec-8623-abb23fb1a4ff","_trace_id":"IIq4FMpfS0C-PZx2gUt-2Q"} Aug 06 21:37:36 goroutine 2686 [running]: Aug 06 21:37:36 runtime/debug.Stack() Aug 06 21:37:36 runtime/debug/stack.go:24 +0x5e Aug 06 21:37:36 runtime/debug.PrintStack() Aug 06 21:37:36 runtime/debug/stack.go:16 +0x13 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster.(*Service).CreateClientNoCache(0xc000431a00, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster/service.go:135 +0x134 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/scyllaclient.(*CachedProvider).Client(0xc0001f54c0, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/scyllaclient/provider.go:61 +0x268 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster.(*Service).Client(0xc000431a00, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster/service.go:123 +0x145 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck.(*Service).pingREST(0xc000a7e560?, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}, ...) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck/service.go:357 +0x53 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck.runner.checkHosts.func1(0x1000000000001?) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck/runner.go:87 +0x18a Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel.Run.func1() Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel/parallel.go:72 +0xb2 Aug 06 21:37:36 created by github.com/scylladb/scylla-manager/v3/pkg/util/parallel.Run in goroutine 2691 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel/parallel.go:57 +0xe8 Aug 06 21:37:36 {"L":"INFO","T":"2024-08-06T21:37:36.009Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"9b7189c3-0320-4bec-8623-abb23fb1a4ff","_trace_id":"IIq4FMpfS0C-PZx2gUt-2Q"} Aug 06 21:37:36 goroutine 2687 [running]: Aug 06 21:37:36 runtime/debug.Stack() Aug 06 21:37:36 runtime/debug/stack.go:24 +0x5e Aug 06 21:37:36 runtime/debug.PrintStack() Aug 06 21:37:36 runtime/debug/stack.go:16 +0x13 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster.(*Service).CreateClientNoCache(0xc000431a00, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster/service.go:135 +0x134 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/scyllaclient.(*CachedProvider).Client(0xc0001f54c0, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/scyllaclient/provider.go:61 +0x268 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster.(*Service).Client(0xc000431a00, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster/service.go:123 +0x145 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck.(*Service).pingREST(0xc0009ab8ca?, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}, ...) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck/service.go:357 +0x53 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck.runner.checkHosts.func1(0x100000001?) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck/runner.go:87 +0x18a Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel.Run.func1() Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel/parallel.go:72 +0xb2 Aug 06 21:37:36 created by github.com/scylladb/scylla-manager/v3/pkg/util/parallel.Run in goroutine 2691 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel/parallel.go:57 +0xe8 Aug 06 21:37:36 {"L":"INFO","T":"2024-08-06T21:37:36.012Z","N":"cluster","M":"Creating new Scylla HTTP client","cluster_id":"9b7189c3-0320-4bec-8623-abb23fb1a4ff","_trace_id":"IIq4FMpfS0C-PZx2gUt-2Q"} Aug 06 21:37:36 goroutine 2685 [running]: Aug 06 21:37:36 runtime/debug.Stack() Aug 06 21:37:36 runtime/debug/stack.go:24 +0x5e Aug 06 21:37:36 runtime/debug.PrintStack() Aug 06 21:37:36 runtime/debug/stack.go:16 +0x13 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster.(*Service).CreateClientNoCache(0xc000431a00, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster/service.go:135 +0x134 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/scyllaclient.(*CachedProvider).Client(0xc0001f54c0, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/scyllaclient/provider.go:61 +0x268 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster.(*Service).Client(0xc000431a00, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/cluster/service.go:123 +0x145 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck.(*Service).pingREST(0xc0009ab8c4?, {0x2d80400, 0xc000ae17a0}, {{0x9b, 0x71, 0x89, 0xc3, 0x3, 0x20, 0x4b, ...}}, ...) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck/service.go:357 +0x53 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck.runner.checkHosts.func1(0xc000711b00?) Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck/runner.go:87 +0x18a Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel.Run.func1() Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel/parallel.go:72 +0xb2 Aug 06 21:37:36 created by github.com/scylladb/scylla-manager/v3/pkg/util/parallel.Run in goroutine 2691 Aug 06 21:37:36 github.com/scylladb/scylla-manager/v3/pkg/util/parallel/parallel.go:57 +0xe8 ```

This test scenario has 3 nodes and we can see 3 almost simultaneous client creation originating from healthcheck's PingRest method, which uses cluster svc cached clients:

// Client is the cached ProviderFunc.
func (p *CachedProvider) Client(ctx context.Context, clusterID uuid.UUID) (*Client, error) {
    p.mu.Lock()
    c, ok := p.clients[clusterID]
    p.mu.Unlock()

    // Cache hit
    if ok {
        // Check if hosts did not change before returning
        changed, err := c.client.CheckHostsChanged(ctx)
        if err != nil {
            p.logger.Error(ctx, "Cannot check if hosts changed", "error", err)
        }
        if c.ttl.After(timeutc.Now()) && !changed && err == nil {
            return c.client, nil
        }
    }

    // If not found or hosts changed create a new one
    client, err := p.inner(ctx, clusterID) // <- all calls get here before any of them manages to set the new client
    if err != nil {
        return nil, err
    }

    c = clientTTL{
        client: client,
        ttl:    timeutc.Now().Add(p.validity),
    }

    p.mu.Lock()
    p.clients[clusterID] = c
    p.mu.Unlock()

    return c.client, nil
}

The problem is that when there are 3 simultaneous calls to get invalidated client, all of those calls will result in client creation. Client creation should be done under mutex and other calls should just wait for it to finish and take the already created client.

Michal-Leszczynski commented 2 months ago

Another problem is that the function used for checking if hosts changed checks length of c.config.Hosts which may contain duplicates:

func (c *Client) CheckHostsChanged(ctx context.Context) (bool, error) {
    cur, err := c.hosts(ctx)
    if err != nil {
        return false, err
    }
    if len(cur) != len(c.config.Hosts) { // <- c.config.Hosts may contain duplicates
        return true, err
    }
    return !strset.New(c.config.Hosts...).Has(cur...), nil
}

This results in recreating client because cache provider believes that hosts have changed even though that's not the case.

Example fragment which shows that duplicates are possible:

    if c.Host != "" {
        config.Hosts = []string{c.Host}
    }
    config.Hosts = append(config.Hosts, c.KnownHosts...)
Michal-Leszczynski commented 2 months ago

The last part of the code which creates many clients (that are closed right after) is connected GetSession:

// GetSession returns CQL session to provided cluster.
func (s *Service) GetSession(ctx context.Context, clusterID uuid.UUID, opts ...SessionConfigOption) (session gocqlx.Session, err error) {
    s.logger.Info(ctx, "Get session", "cluster_id", clusterID)

    client, err := s.CreateClientNoCache(ctx, clusterID)
    if err != nil {
        return session, errors.Wrap(err, "get client")
    }
    defer logutil.LogOnError(ctx, s.logger, client.Close, "Couldn't close scylla client")

    cfg := gocql.NewCluster()
    for _, opt := range opts {
        if err := opt(ctx, clusterID, client, cfg); err != nil {
            return session, err
        }
    }
    // Fill hosts if they weren't specified by the options
    if len(cfg.Hosts) == 0 {
        sessionHosts, err := GetRPCAddresses(ctx, client, client.Config().Hosts)
        if err != nil {
            s.logger.Info(ctx, "Gets session", "err", err)
            if errors.Is(err, ErrNoRPCAddressesFound) {
                return session, err
            }
        }
        cfg.Hosts = sessionHosts
    }

    ni, err := client.AnyNodeInfo(ctx)

@karol-kokoszka do you know why we can't use the cached client here? Cached client checks for changed hosts. Also, session discovers hosts on its own, so it should be fine even if when we miss some of them.

karol-kokoszka commented 2 months ago

@karol-kokoszka do you know why we can't use the cached client here? Cached client checks for changed hosts. Also, session discovers hosts on its own, so it should be fine even if when we miss some of them.

Wanted to keep the cache available for healtcheck service only to just validate that everything is fine before applying it to other services. Let's change and test it in next release.