GoogleCloudPlatform / run-gmp-sidecar

Apache License 2.0
8 stars 7 forks source link

failed to register process metrics: process does not exist #13

Open andaag opened 5 months ago

andaag commented 5 months ago

Getting this with what I expect to be a fairly default setup. The metrics are already exposed on /metrics, but from my understanding the startup of the sidecart is failing because it can't mount /proc from the host container? (Maybe the grafana container is locked down/running as init=1?).

DEFAULT 2024-03-06T13:55:25.632332Z 2024/03/06 13:55:25 confgenerator: no user config file found, using default config
DEFAULT 2024-03-06T13:55:25.639002Z 2024/03/06 13:55:25 confgenerator: using port 65161 for self metrics
DEFAULT 2024-03-06T13:55:25.701472Z 2024/03/06 13:55:25 entrypoint: started OTel successfully
DEFAULT 2024-03-06T13:55:27.317251Z 2024-03-06T13:55:27.317Z info service@v0.94.0/telemetry.go:59 Setting up own telemetry...
DEFAULT 2024-03-06T13:55:27.317949Z 2024-03-06T13:55:27.319Z info service@v0.94.0/telemetry.go:104 Serving metrics {"address": "0.0.0.0:65161", "level": "Basic"}
DEFAULT 2024-03-06T13:55:27.345243Z 2024-03-06T13:55:27.345Z info filterprocessor@v0.93.0/metrics.go:98 Metric filter configured {"include match_type": "strict", "include expressions": [], "include metric names": ["otelcol_process_uptime", "otelcol_process_memory_rss", "otelcol_grpc_io_client_completed_rpcs", "otelcol_googlecloudmonitoring_point_count"], "include metrics with resource attributes": null, "exclude match_type": "", "exclude expressions": [], "exclude metric names": [], "exclude metrics with resource attributes": null}
DEFAULT 2024-03-06T13:55:27.346546Z Error: failed to register process metrics: process does not exist
DEFAULT 2024-03-06T13:55:27.346551Z 2024/03/06 13:55:27 application run finished with error: failed to register process metrics: process does not exist
DEFAULT 2024-03-06T13:56:33.698798Z 2024/03/06 13:56:33 entrypoint: terminated signal caught
DEFAULT 2024-03-06T13:56:33.698827Z 2024/03/06 13:56:33 entrypoint: sidecar exited
  template {
    metadata {
      annotations = {
        "autoscaling.knative.dev/minScale"          = "1"
        "autoscaling.knative.dev/maxScale"          = "5"
        "run.googleapis.com/cpu-throttling"         = "false"
        "run.googleapis.com/startup-cpu-boost"      = "true",
        "run.googleapis.com/vpc-access-connector"   = var.vpc_connector_id
        "run.googleapis.com/vpc-access-egress"      = "private-ranges-only"
        "run.googleapis.com/container-dependencies" = "{\"collector\":[\"app\"]}"
      }
    }
    spec {
      service_account_name = google_service_account.default.email

      volumes {
        name = "grafana-provisioning-datasources"
        secret {
          secret_name = google_secret_manager_secret.provisioning_datasources.secret_id
          items {
            key  = google_secret_manager_secret_version.provisioning_datasources.version
            path = "automatic.yaml"
          }
        }
      }

      volumes {
        name = "grafana-provisioning-alerting"
        secret {
          secret_name = google_secret_manager_secret.provisioning_alerting.secret_id
          items {
            key  = google_secret_manager_secret_version.provisioning_alerting.version
            path = "automatic.yaml"
          }
        }
      }

      containers {
        image = "grafana/grafana-enterprise:10.3.1"
        name  = "app"

        resources {
          limits = {
            cpu    = "1000m"
            memory = "2G"
          }
          requests = {
            cpu    = "1000m"
            memory = "1G"
          }
        }

        volume_mounts {
          name       = "grafana-provisioning-datasources"
          mount_path = "/etc/grafana/provisioning/datasources"
        }

        volume_mounts {
          name       = "grafana-provisioning-alerting"
          mount_path = "/etc/grafana/provisioning/alerting"
        }

        ports {
          name           = "http1"
          container_port = 8080
        }
        env {
          name  = "GF_INSTALL_PLUGINS"
          value = local.grafana_plugins
        }
        env {
          name  = "GF_DATABASE_TYPE"
          value = "postgres"
        }
        env {
          name  = "GF_DATABASE_HOST"
          value = var.postgresql_host
        }
        env {
          name  = "GF_DATABASE_NAME"
          value = var.postgresql_database
        }
        env {
          name  = "GF_DATABASE_USER"
          value = var.postgresql_user
        }
        env {
          name = "GF_DATABASE_PASSWORD"
          value_from {
            secret_key_ref {
              name = var.postgresql_password_name
              key  = "latest"
            }
          }
        }
        env {
          name  = "GF_SERVER_ROOT_URL"
          value = "<hidden>"
        }
        env {
          name  = "GF_SERVER_HTTP_PORT"
          value = "8080"
        }
        env {
          name  = "GF_AUTH_GOOGLE_ENABLED"
          value = "true"
        }
        env {
          name  = "GF_AUTH_GOOGLE_CLIENT_ID"
          value = "<redacted>"
        }
        env {
          name = "GF_AUTH_GOOGLE_CLIENT_SECRET"
          value_from {
            secret_key_ref {
              name = "grafana_oauth_secret"
              key  = "latest"
            }
          }
        }
        env {
          name  = "GF_AUTH_GOOGLE_USE_PKCE"
          value = "true"
        }
        env {
          name  = "GF_AUTH_GOOGLE_ALLOW_SIGN_UP"
          value = "true"
        }
        env {
          name  = "GF_AUTH_GOOGLE_AUTO_LOGIN"
          value = "true"
        }
        env {
          name  = "GF_AUTH_GOOGLE_SCOPES"
          value = "openid email profile https://www.googleapis.com/auth/cloud-identity.groups.readonly"
        }
        env {
          name  = "GF_AUTH_GOOGLE_ROLE_ATTRIBUTE_STRICT"
          value = "true"
        }
        env {
          name  = "GF_AUTH_GOOGLE_SKIP_ORG_ROLE_SYNC"
          value = "false"
        }
        env {
          name  = "GF_AUTH_GOOGLE_AUTO_ASSIGN_ORG_ROLE"
          value = "Editor"
        }
        env {
          name  = "GF_METRICS_ENABLED"
          value = "true"
        }
        env {
          name  = "GF_METRICS_DISABLE_TOTAL_STATS"
          value = "true"
        }

        startup_probe {
          failure_threshold     = 15
          initial_delay_seconds = 15
          timeout_seconds       = 3
          period_seconds        = 3

          http_get {
            path = "/robots.txt"
            port = 8080
          }
        }
        liveness_probe {
          failure_threshold     = 3
          initial_delay_seconds = 10
          timeout_seconds       = 3
          period_seconds        = 3

          http_get {
            path = "/robots.txt"
            port = 8080
          }
        }
      }
      containers {
        image = "us-docker.pkg.dev/cloud-ops-agents-artifacts/cloud-run-gmp-sidecar/cloud-run-gmp-sidecar:1.1.0"
        name  = "collector"
      }
      container_concurrency = 80
    }
  }
}
ridwanmsharif commented 5 months ago

Thanks for creating the issue. Are you running the gen1 or gen2 execution environment? I suspect this is an issue with gen1 but haven't been able to get a good repro of this yet myself

andaag commented 5 months ago

gen1