elastic / integrations

Elastic Integrations
https://www.elastic.co/integrations
Other
200 stars 429 forks source link

GCP: audit logs producing mapping issues when used with GCP Dataflow Templates #8315

Open narenswamy opened 12 months ago

narenswamy commented 12 months ago

Hi @kcreddy @andrewkroh @elastic/security-external-integrations (Team:Security-External Integrations)

I am getting the same error.

exception: "java.io.IOException: Error writing to Elasticsearch, some elements could not be inserted: Document id 0FnDb4sB82OIuFNhqSsv: [1:2101] object mapping for [protoPayload.response.status] tried to parse field [status] as object, but found a concrete value (document_parsing_exception)

I am using GCP integration version 2.27.0. Anyone please help us.

kcreddy commented 11 months ago

@narenswamy thanks for creating the issue. Looks like @efd6 already responded on the original one. We need sample document to confirm the parsing problem still exists.

narenswamy commented 11 months ago

Hi @kcreddy,

Please find the sample document. Please help me on this. { "protoPayload": { "@type": "type.googleapis.com/google.cloud.audit.AuditLog", "status": {}, "authenticationInfo": { "principalEmail": "compute@developer.gserviceaccount.com" }, "requestMetadata": { "callerIp": "173.1.0.1", "callerSuppliedUserAgent": "gcloud-java/1.133.1 Google-API-Java-Client/1.31.5 Google-HTTP-Java-Client/1.39.2 (gzip),gzip(gfe)", "callerNetwork": "//compute.googleapis.com/projects/idiq-sharedvpc/global/networks/__unknown__", "requestAttributes": {}, "destinationAttributes": {} }, "serviceName": "bigquery.googleapis.com", "methodName": "jobservice.jobcompleted", "resourceName": "projects/prod-env/jobs/pyxis3_prod_connect_merge_pyxis_raw_OrderDetail_1698391674900_0", "serviceData": { "@type": "type.googleapis.com/google.cloud.bigquery.logging.v1.AuditData", "jobCompletedEvent": { "eventName": "query_job_completed", "job": { "jobName": { "projectId": "prod-env", "jobId": "pyxis3_prod_connect_merge_pyxis_raw_OrderDetail_1698391674900_0", "location": "US" }, "jobConfiguration": { "query": { "query": "MERGEpyyis_raw.OrderDetailas T\nUSING (SELECT A.* FROM\n(SELECT * FROMpyxis_raw"), "destinationTable": { "projectId": "prod-env", "datasetId": "raw", "tableId": "Detail" }, "createDisposition": "CREATE_IF_NEEDED", "writeDisposition": "WRITE_EMPTY", "defaultDataset": {}, "queryPriority": "QUERY_INTERACTIVE", "statementType": "MERGE" } }, "jobStatus": { "state": "DONE", "error": {} }, "jobStatistics": { "createTime": "2023-10-27T07:28:49.531Z", "startTime": "2023-10-27T07:28:49.879Z", "endTime": "2023-10-27T07:28:53.216Z", "totalSlotMs": "175412", "referencedTables": [ { "projectId": "prod-env", "datasetId": "pyyis", "tableId": "rderDetail" }, { "projectId": "idiq-prod-env", "datasetId": "pyxis_raw", "tableId": "OrderDetail" } ], "totalTablesProcessed": 2, "queryOutputRowCount": "36", "reservation": "unreserved" } } } } }, "insertId": "yepw5oipopoipbe2pfhs", "resource": { "type": "bigquery_resource", "labels": { "project_id": "prod-env" } }, "timestamp": "2023-10-27T07:28:53.220882Z", "severity": "INFO", "logName": "projects/prod-env/logs/cloudaudit.googleapis.com%2Fdata_access", "receiveTimestamp": "2023-10-27T07:28:53.715193995Z"

efd6 commented 11 months ago

@narenswamy Would you be able to provide the complete context for the event there? That JSON is not valid.

narenswamy commented 11 months ago

@efd6,

Apologies, Please find the valid JSON.

{ "protoPayload": { "@type": "type.googleapis.com/google.cloud.audit.AuditLog", "status": {}, "authenticationInfo": { "principalEmail": "compute@developer.gserviceaccount.com" }, "requestMetadata": { "callerIp": "10.8.0.3", "callerSuppliedUserAgent": "gcloud-java/1.133.1 Google-API-Java-Client/1.31.5 Google-HTTP-Java-Client/1.39.2 (gzip),gzip(gfe)", "callerNetwork": "//compute.googleapis.com/projects/sharedvpc/global/networks/__unknown__", "requestAttributes": {}, "destinationAttributes": {} }, "serviceName": "bigquery.googleapis.com", "methodName": "jobservice.jobcompleted", "resourceName": "projects/prod-env/jobs/pyxis3_prod_connect_merge_pyxis_raw_OrderDetail_1698391674900_0", "serviceData": { "@type": "type.googleapis.com/google.cloud.bigquery.logging.v1.AuditData", "jobCompletedEvent": { "eventName": "query_job_completed", "job": { "jobName": { "projectId": "prod-env", "jobId": "pyyi3_prod_connect_merge_pyxis_raw_0", "location": "US" }, "jobConfiguration": { "query": { "query": "MERGEpyy_raw.OrderDetailas T\nUSING (orderDetail WHERE _batch_id = 1698391674900 AND _sequence_num)", "destinationTable": { "projectId": "prod-env", "datasetId": "pyxis_raw", "tableId": "OrderDetail" }, "createDisposition": "CREATE_IF_NEEDED", "writeDisposition": "WRITE_EMPTY", "defaultDataset": {}, "queryPriority": "QUERY_INTERACTIVE", "statementType": "MERGE" } }, "jobStatus": { "state": "DONE", "error": {} }, "jobStatistics": { "createTime": "2023-10-27T07:28:49.531Z", "startTime": "2023-10-27T07:28:49.879Z", "endTime": "2023-10-27T07:28:53.216Z", "totalBilledBytes": "613416960", "billingTier": 1, "totalSlotMs": "175412", "referencedTables": [ { "projectId": "prod-env", "datasetId": "pyyis", "tableId": "_staging_OrderDetail" }, { "projectId": "prod-env", "datasetId": "pyyis_raw", "tableId": "OrderDetail" } ], "totalTablesProcessed": 2, "queryOutputRowCount": "36", "reservation": "unreserved" } } } } }, "insertId": "yepw5bety2pfhs", "resource": { "type": "bigquery_resource", "labels": { "project_id": "prod-env" } }, "timestamp": "2023-10-27T07:28:53.220882Z", "severity": "INFO", "logName": "projects/prod-env/logs/cloudaudit.googleapis.com%2Fdata_access", "receiveTimestamp": "2023-10-27T07:28:53.715193995Z" }

efd6 commented 11 months ago

I am unable to get that document to fail to ingest in a test environment. It gives me the following event:

{
  "@timestamp": "2023-10-27T07:28:53.220Z",
  "agent": {
    "ephemeral_id": "e1944fde-a002-447c-ac58-7859ea12155c",
    "id": "747b3f2a-8b40-4ee3-9ddd-ec86e51f9342",
    "name": "docker-fleet-agent",
    "type": "filebeat",
    "version": "8.10.1"
  },
  "client": {
    "user": {
      "email": "compute@developer.gserviceaccount.com"
    }
  },
  "cloud": {
    "project": {
      "id": "prod-env"
    },
    "provider": "gcp"
  },
  "data_stream": {
    "dataset": "gcp.audit",
    "namespace": "ep",
    "type": "logs"
  },
  "ecs": {
    "version": "8.8.0"
  },
  "elastic_agent": {
    "id": "747b3f2a-8b40-4ee3-9ddd-ec86e51f9342",
    "snapshot": false,
    "version": "8.10.1"
  },
  "event": {
    "action": "jobservice.jobcompleted",
    "agent_id_status": "verified",
    "created": "2023-10-30T06:03:21.126Z",
    "dataset": "gcp.audit",
    "id": "yepw5bety2pfhs",
    "ingested": "2023-10-30T06:03:24Z",
    "kind": "event",
    "outcome": "unknown",
    "provider": "data_access"
  },
  "gcp": {
    "audit": {
      "resource_name": "projects/prod-env/jobs/pyxis3_prod_connect_merge_pyxis_raw_OrderDetail_1698391674900_0",
      "type": "type.googleapis.com/google.cloud.audit.AuditLog"
    }
  },
  "input": {
    "type": "gcp-pubsub"
  },
  "log": {
    "level": "INFO",
    "logger": "projects/prod-env/logs/cloudaudit.googleapis.com%2Fdata_access"
  },
  "service": {
    "name": "bigquery.googleapis.com"
  },
  "source": {
    "ip": "10.8.0.3"
  },
  "tags": [
    "forwarded",
    "gcp-audit"
  ],
  "user_agent": {
    "device": {
      "name": "Spider"
    },
    "name": "Google-HTTP-Java-Client",
    "original": "gcloud-java/1.133.1 Google-API-Java-Client/1.31.5 Google-HTTP-Java-Client/1.39.2 (gzip),gzip(gfe)",
    "version": "1.39.2"
  }
}
narenswamy commented 11 months ago

@efd6

Please check the below sample documents. { "protoPayload": { "@type": "type.googleapis.com/google.cloud.audit.AuditLog", "status": {}, "authenticationInfo": { "principalEmail": "compute@developer.gserviceaccount.com", "serviceAccountDelegationInfo": [ { "firstPartyPrincipal": { "principalEmail": "compute-system.iam.gserviceaccount.com" } } ] }, "requestMetadata": { "callerIp": "10.0.0.4", "callerSuppliedUserAgent": "gcloud-java/1.133.1 Google-API-Java-Client/1.31.5 Google-HTTP-Java-Client/1.39.2 (gzip),gzip(gfe)", "callerNetwork": "//compute.googleapis.com/projects/vpc/global/networks/__unknown__", "requestAttributes": {}, "destinationAttributes": {} }, "serviceName": "bigquery.googleapis.com", "methodName": "tableservice.delete", "authorizationInfo": [ { "resource": "projects/prod-env/datasets/py/tables/Header", "permission": "bigquery.tables.delete", "granted": true, "resourceAttributes": {} } ], "resourceName": "projects/prod-env/datasets/py/tables/Header" }, "insertId": "-l2qibyddrmf", "resource": { "type": "bigquery_resource", "labels": { "project_id": "prod-env" } }, "timestamp": "2023-10-31T06:42:55.792521Z", "severity": "NOTICE", "logName": "projects/prod-env/logs/cloudaudit.googleapis.com%2Factivity", "receiveTimestamp": "2023-10-31T06:42:56.568789936Z" }

another document:

{ "protoPayload": { "@type": "type.googleapis.com/google.cloud.audit.AuditLog", "status": {}, "authenticationInfo": { "principalEmail": "fusion@prod-env.iam.gserviceaccount.com", "serviceAccountDelegationInfo": [ { "firstPartyPrincipal": { "principalEmail": "s@compute-system.iam.gserviceaccount.com" } } ] }, "requestMetadata": { "callerIp": "3.20.16.11", "callerSuppliedUserAgent": "gcloud-java/1.137.1 Google-API-Java-Client/1.32.1 Google-HTTP-Java-Client/1.39.2 (gzip),gzip(gfe)", "callerNetwork": "//compute.googleapis.com/projects/prod-env/global/networks/__unknown__", "requestAttributes": {}, "destinationAttributes": {} }, "serviceName": "bigquery.googleapis.com", "methodName": "tableservice.update", "authorizationInfo": [ { "resource": "projects/env/datasets/store/tables/records", "permission": "bigquery.tables.update", "granted": true, "resourceAttributes": {} } ], "resourceName": "projects/prod-env/datasets/store/tables/records", "serviceData": { "@type": "type.googleapis.com/google.cloud.bigquery.logging.v1.AuditData", "tableUpdateResponse": { "resource": { "tableName": { "projectId": "prod-env", "datasetId": "store", "tableId": "records" }, "info": {}, "view": {}, "createTime": "2023-05-22T18:45:14.007Z", "schemaJson": "{\n \"type\": \"BOOLEAN\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"scores\",\n \"type\": \"BOOLEAN\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"entity\",\n \"type\": \"BOOLEAN\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"score\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"presence\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"visited\",\n \"type\": \"BOOLEAN\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"request\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"response\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"feedback\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"offers\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"upgrade\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"status\",\n \"type\": \"INTEGER\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"status\",\n \"type\": \"INTEGER\",\n \"mode\": \"NULLABLE\"\n }]\n}", "updateTime": "2023-10-31T04:08:17.688Z" } }, "tableUpdateRequest": { "resource": { "tableName": { "projectId": "prod-env", "datasetId": "store", "tableId": "_records" }, "info": {}, "view": {}, "schemaJson": "{\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"status\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"downloaded\",\n \"type\": \"BOOLEAN\",\n \"mode\": \"NULLABLE\"\n }, {\n \"name\": \"onboarding\",\n \"type\": \"STRING\",\n \"mode\": \"NULLABLE\"\n }]\n}" } } } }, "insertId": "-5jmjv0demkd", "resource": { "type": "bigquery_resource", "labels": { "project_id": "prod-env" } }, "timestamp": "2023-10-31T05:08:52.955766Z", "severity": "NOTICE", "logName": "projects/prod-env/logs/cloudaudit.googleapis.com%2Factivity", "receiveTimestamp": "2023-10-31T05:08:53.962008209Z" }

efd6 commented 11 months ago

Those also both parse and ingest without issue.

{
  "@timestamp": "2023-10-31T06:42:55.792Z",
  "agent": {
    "ephemeral_id": "118e3f20-f76d-4fe7-b5d5-e4b6616fb49d",
    "id": "747b3f2a-8b40-4ee3-9ddd-ec86e51f9342",
    "name": "docker-fleet-agent",
    "type": "filebeat",
    "version": "8.10.1"
  },
  "client": {
    "user": {
      "email": "compute@developer.gserviceaccount.com"
    }
  },
  "cloud": {
    "project": {
      "id": "prod-env"
    },
    "provider": "gcp"
  },
  "data_stream": {
    "dataset": "gcp.audit",
    "namespace": "ep",
    "type": "logs"
  },
  "ecs": {
    "version": "8.8.0"
  },
  "elastic_agent": {
    "id": "747b3f2a-8b40-4ee3-9ddd-ec86e51f9342",
    "snapshot": false,
    "version": "8.10.1"
  },
  "event": {
    "action": "tableservice.delete",
    "agent_id_status": "verified",
    "category": [
      "network",
      "configuration"
    ],
    "created": "2023-10-31T07:39:07.561Z",
    "dataset": "gcp.audit",
    "id": "-l2qibyddrmf",
    "ingested": "2023-10-31T07:39:10Z",
    "kind": "event",
    "outcome": "success",
    "provider": "activity",
    "type": [
      "access",
      "allowed"
    ]
  },
  "gcp": {
    "audit": {
      "authorization_info": [
        {
          "granted": true,
          "permission": "bigquery.tables.delete",
          "resource": "projects/prod-env/datasets/py/tables/Header"
        }
      ],
      "resource_name": "projects/prod-env/datasets/py/tables/Header",
      "type": "type.googleapis.com/google.cloud.audit.AuditLog"
    }
  },
  "input": {
    "type": "gcp-pubsub"
  },
  "log": {
    "level": "NOTICE",
    "logger": "projects/prod-env/logs/cloudaudit.googleapis.com%2Factivity"
  },
  "service": {
    "name": "bigquery.googleapis.com"
  },
  "source": {
    "ip": "10.0.0.4"
  },
  "tags": [
    "forwarded",
    "gcp-audit"
  ],
  "user_agent": {
    "device": {
      "name": "Spider"
    },
    "name": "Google-HTTP-Java-Client",
    "original": "gcloud-java/1.133.1 Google-API-Java-Client/1.31.5 Google-HTTP-Java-Client/1.39.2 (gzip),gzip(gfe)",
    "version": "1.39.2"
  }
}
{
  "@timestamp": "2023-10-31T05:08:52.955Z",
  "agent": {
    "ephemeral_id": "d7e964a9-9b02-4d5e-9460-8fc6bc44ff99",
    "id": "747b3f2a-8b40-4ee3-9ddd-ec86e51f9342",
    "name": "docker-fleet-agent",
    "type": "filebeat",
    "version": "8.10.1"
  },
  "client": {
    "user": {
      "email": "fusion@prod-env.iam.gserviceaccount.com"
    }
  },
  "cloud": {
    "project": {
      "id": "prod-env"
    },
    "provider": "gcp"
  },
  "data_stream": {
    "dataset": "gcp.audit",
    "namespace": "ep",
    "type": "logs"
  },
  "ecs": {
    "version": "8.8.0"
  },
  "elastic_agent": {
    "id": "747b3f2a-8b40-4ee3-9ddd-ec86e51f9342",
    "snapshot": false,
    "version": "8.10.1"
  },
  "event": {
    "action": "tableservice.update",
    "agent_id_status": "verified",
    "category": [
      "network",
      "configuration"
    ],
    "created": "2023-10-31T07:40:49.709Z",
    "dataset": "gcp.audit",
    "id": "-5jmjv0demkd",
    "ingested": "2023-10-31T07:40:52Z",
    "kind": "event",
    "outcome": "success",
    "provider": "activity",
    "type": [
      "access",
      "allowed"
    ]
  },
  "gcp": {
    "audit": {
      "authorization_info": [
        {
          "granted": true,
          "permission": "bigquery.tables.update",
          "resource": "projects/env/datasets/store/tables/records"
        }
      ],
      "resource_name": "projects/prod-env/datasets/store/tables/records",
      "type": "type.googleapis.com/google.cloud.audit.AuditLog"
    }
  },
  "input": {
    "type": "gcp-pubsub"
  },
  "log": {
    "level": "NOTICE",
    "logger": "projects/prod-env/logs/cloudaudit.googleapis.com%2Factivity"
  },
  "service": {
    "name": "bigquery.googleapis.com"
  },
  "source": {
    "ip": "3.20.16.11"
  },
  "tags": [
    "forwarded",
    "gcp-audit"
  ],
  "user_agent": {
    "device": {
      "name": "Spider"
    },
    "name": "Google-HTTP-Java-Client",
    "original": "gcloud-java/1.137.1 Google-API-Java-Client/1.32.1 Google-HTTP-Java-Client/1.39.2 (gzip),gzip(gfe)",
    "version": "1.39.2"
  }
}
narenswamy commented 11 months ago

Hi @efd6,

I am trying to ingest the audit log using GCP dataflow I dont know why i am getting the "status" parse error. I have installed the Elastic GCP integrations. I am not using any agents for data ingestions, its direct ingestion from dataflow to elasticsearch.

image

efd6 commented 11 months ago

Ah, so you are not passing it through an ingest pipeline? Without the ingest processing, the fields will not be correct. You will need to set up an agent to do this.

narenswamy commented 11 months ago

Hi @efd6

I am attempting to follow this guide for receiving GCP audit logs: https://cloud.google.com/blog/products/data-analytics/export-google-cloud-data-into-elasticsearch-with-dataflow-templates

Any suggestions? Because, they did not mentioned any Elastic Agents.

efd6 commented 11 months ago

I wouldn't expect them to since that is a google page. The configuration instructions for ingesting GCP documents into Elasticsearch using the integration are here.

narenswamy commented 11 months ago

@efd6

oh, I have tried below link as well and got same error.😢

https://www.elastic.co/guide/en/observability/current/gcp-dataflow.html

kcreddy commented 11 months ago

@narenswamy that link also doesn't use the Elastic Agent with GCP integration. The link @efd6 provided in earlier comment uses Elastic Agent with GCP integration to ingest data, instead of Dataflow template.

The fix https://github.com/elastic/integrations/pull/3583 was made to the ingest pipeline and so if you use Elastic Agent, you can get this fix for the field mapping issue.

efd6 commented 11 months ago

I have asked internally about this but have not received an answer. The document there is for observability and I am not sure that audit logs are expected to work with that approach.

andrewkroh commented 11 months ago

To me it looks like this dataflow template sends the data over the bulk api to the integration's data stream (when configured appropriately). So I wonder if there is some difference between the format from the Agent. Like perhaps the json is already decoded instead of being stringified in the message field.

https://cloud.google.com/dataflow/docs/guides/templates/provided/pubsub-to-elasticsearch https://github.com/GoogleCloudPlatform/DataflowTemplates

andrewkroh commented 11 months ago

Like perhaps the json is already decoded instead of being stringified in the message field.

I think this code is reinforces my suspicion.

https://github.com/GoogleCloudPlatform/DataflowTemplates/blob/84530b72c33eb9804d6142142109c512eaab0a79/v2/googlecloud-to-elasticsearch/src/main/java/com/google/cloud/teleport/v2/elasticsearch/templates/PubSubToElasticsearch.java#L181-L190

In the configuration options there does appear to be a way to override the function. See javascriptTextTransformFunctionName and javascriptTextTransformGcsPath. Perhaps it's possible to substitute in another function that outputs JSON like

{
  "message": "<protobuf data as json string>"
}

That would make the document compatible with the default logs-gcp.audit pipeline.

narenswamy commented 11 months ago

@andrewkroh

Any suggestions?

andrewkroh commented 10 months ago

Nothing more than trying out the options I mentioned in my last post.

I found some UDF examples in Google's repo at https://github.com/GoogleCloudPlatform/DataflowTemplates/tree/main/v2/common/src/main/resources/udf-samples. I think the transform_log.js would be a good starting point.

narenswamy commented 10 months ago

@andrewkroh

I have created UDF code for parse error, below code is fine?

function process(inJson) { var obj = JSON.parse(inJson); var includePubsubMessage = obj.data && obj.attributes; var data = includePubsubMessage ? obj.data : obj;

// For syslog or application logs, return only the source raw logs if (data.protoPayload) { return data.protoPayload; // Return string value, and skip JSON.stringify }

// Check if protoPayload.response.status is an object and convert it to a string if (data.protoPayload && data.protoPayload.response && typeof data.protoPayload.response.status === 'object') { data.protoPayload.response.status = JSON.stringify(data.protoPayload.response.status); }

return JSON.stringify(obj); }

bturquet commented 10 months ago

Hi @narenswamy , see my comments here:

I believe this UDF function could do the job:

/**
 * Sample UDF function to handle different response.status types.
 *
 * @param {string} inJson input JSON event (stringified)
 * @return {string} outJson output JSON event (stringified)
 */
function process(inJson) {
    var obj = JSON.parse(inJson);
    var includePubsubMessage = obj.data && obj.attributes;
    var data = includePubsubMessage ? obj.data : obj;

    if (data.protoPayload && data.protoPayload.response && data.protoPayload.response.status){

        // if response.status is not an object, convert it to status_value to align with Ingest Pipeline
        if (!(data.protoPayload.response.status instanceof Object)) {
            statusvalue= data.protoPayload.response.status ;
            delete data.protoPayload.response.status;
            data.protoPayload.response.status_value = statusvalue
        }

        // if response.status is an object, convert it to string and add it to status_value
        else if (data.protoPayload.response.status instanceof Object) {
            data.protoPayload.response.status_value="" 
            for(key in data.protoPayload.response.status) {
            newtext= key+":"+data.protoPayload.response.status[key]
            data.protoPayload.response.status_value= data.protoPayload.response.status_value.concat(" ", newtext);
            }
            data.protoPayload.response.status_value = data.protoPayload.response.status_value.substring(0, data.protoPayload.response.status_value.length - 1);
            delete data.protoPayload.response.status;

        }
    }  

    return JSON.stringify(obj);
}