domainaware / parsedmarc

A Python package and CLI for parsing aggregate and forensic DMARC reports
https://domainaware.github.io/parsedmarc/
Apache License 2.0
1.01k stars 218 forks source link

DMARC report fields missing from ES #86

Closed bhozar closed 5 years ago

bhozar commented 5 years ago

I have a lot of DMARC reports from Yahoo! ingested into the system which are not populating some fields:

The results I see in ElasticSearch:

{
    "_id": "TGrgaGsBXZoSj4maA7ET",
    "_type": "doc",
    "_index": "dmarc_aggregate-2019-06-17",
    "xml_schemea": "draft",
    "org_name": "yahoo! inc",
    "org_email": "postmaster@dmarc.yahoo.com",
    "report_id": "1560820250.881400",
    "date_range": [
        "2019-06-17T00:00:00.000Z",
        "2019-06-17T23:59:59.000Z"
    ],
    "published_policy": {
        "domain": "example.com",
        "adkim": "r",
        "aspf": "r",
        "p": "none",
        "sp": "none",
        "pct": 100,
        "fo": "0"
    },
    "source_ip_address": "52.142.190.111",
    "source_country": "GB",
    "source_reverse_dns": "mailrelay.example.com",
    "source_base_domain": "example.com",
    "message_count": 3,
    "disposition": "none",
    "dkim_aligned": false,
    "spf_aligned": false,
    "header_from": "example.com",
    "envelope_from": "mailrelay.example.com",
    "spf_results": [
        {
            "domain": "mailrelay.example.com",
            "scope": "mfrom",
            "result": "none"
        }
    ],
    "passed_dmarc": false
}

The original DKIM report:

<?xml version="1.0"?>   
<feedback>  
  <report_metadata> 
    <org_name>Yahoo! Inc.</org_name>    
    <email>postmaster@dmarc.yahoo.com</email>   
    <report_id>1560820250.881400</report_id>    
    <date_range>    
      <begin>1560729600</begin> 
      <end>1560815999 </end>    
    </date_range>   
  </report_metadata>    
  <policy_published>    
    <domain>example.com</domain>    
    <adkim>r</adkim>    
    <aspf>r</aspf>  
    <p>none</p> 
    <pct>100</pct>  
  </policy_published>   
  <record>  
    <row>   
      <source_ip>52.142.190.111</source_ip> 
      <count>3</count>  
      <policy_evaluated>    
        <disposition>none</disposition> 
        <dkim>fail</dkim>   
        <spf>fail</spf> 
      </policy_evaluated>   
    </row>  
    <identifiers>   
      <header_from>example.com</header_from>    
    </identifiers>  
    <auth_results>  
      <dkim>    
        <domain></domain>   
        <result>neutral</result>    
      </dkim>   
      <spf> 
        <domain>mailrelay.example.com</domain>  
        <result>none</result>   
      </spf>    
    </auth_results> 
  </record> 
  <record>  
    <row>   
      <source_ip>52.142.190.111</source_ip> 
      <count>1</count>  
      <policy_evaluated>    
        <disposition>none</disposition> 
        <dkim>pass</dkim>   
        <spf>pass</spf> 
      </policy_evaluated>   
    </row>  
    <identifiers>   
      <header_from>example.com</header_from>    
    </identifiers>  
    <auth_results>  
      <dkim>    
        <domain>example.com</domain>    
        <result>pass</result>   
      </dkim>   
      <spf> 
        <domain>example.com</domain>    
        <result>pass</result>   
      </spf>    
    </auth_results> 
  </record> 
</feedback> 

To me it looks like the fields should populate from the report.

seanthegeek commented 5 years ago

Hi,

DKIM results are only provided for messages that have been DKIM signed.

The report you provided was two row elements, only one of which has a dkim element inauth_results, because the other messages were not DKIM signed at all.

Each row element in the DMARC report is saved in Elasticsearch as a separate event, only one of which contains dkim_results.

image