microsoft / msticpy

Microsoft Threat Intelligence Security Tools
Other
1.72k stars 310 forks source link

[Bug]: KeyError: 'label' when using df.mp.whois #727

Open j-mie opened 8 months ago

j-mie commented 8 months ago

Describe the bug Exception when running df.mp.whois

To Reproduce Steps to reproduce the behavior:

  1. df.mp.whois(ip_column="ip")

I'm not sure of the IP address causing the exception, there's a large amount of IPs in the dataframe.

Expected behavior No exception :)

Screenshots and/or Traceback

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [63], in <cell line: 1>()
----> 1 df.mp.whois(ip_column="ip")

File /opt/conda/lib/python3.9/site-packages/msticpy/init/mp_pandas_accessors.py:245, in MsticpyCoreAccessor.whois(self, ip_column, **kwargs)
    219 def whois(self, ip_column, **kwargs):
    220     """
    221     Extract IoCs from either a pandas DataFrame.
    222 
   (...)
    243 
    244     """
--> 245     return get_whois_df(data=self._df, ip_column=ip_column, **kwargs)

File /opt/conda/lib/python3.9/site-packages/msticpy/context/ip_utils.py:353, in get_whois_df(***failed resolving arguments***)
    324 """
    325 Retrieve Whois ASN information for DataFrame of IP Addresses.
    326 
   (...)
    350 
    351 """
    352 del show_progress
--> 353 whois_data = ip_whois(data[ip_column].drop_duplicates())
    354 if (
    355     isinstance(whois_data, pd.DataFrame)
    356     and not whois_data.empty
    357     and "query" in whois_data.columns
    358 ):
    359     data = data.merge(
    360         whois_data,  # type: ignore
    361         how="left",
   (...)
    364         suffixes=("", "_whois"),
    365     )

File /opt/conda/lib/python3.9/site-packages/msticpy/context/ip_utils.py:466, in ip_whois(ip, ip_address, raw, query_rate, retry_count)
    464         if rate_limit:
    465             sleep(query_rate)
--> 466         whois_results[ip_addr] = _whois_lookup(
    467             ip_addr, raw=raw, retry_count=retry_count
    468         ).properties
    469     return _whois_result_to_pandas(whois_results)
    470 if isinstance(ip, (str, IpAddress)):

File /opt/conda/lib/python3.9/site-packages/msticpy/context/ip_utils.py:593, in _whois_lookup(ip_addr, raw, retry_count)
    591 if not asn_items or not registry_url:
    592     return _IpWhoIsResult(None)
--> 593 return _add_rdap_data(
    594     ipwhois_result=ipwhois_result,  # type: ignore
    595     rdap_reg_url=f"{registry_url}{ip_addr}",
    596     retry_count=retry_count,
    597     raw=raw,
    598 )

File /opt/conda/lib/python3.9/site-packages/msticpy/context/ip_utils.py:610, in _add_rdap_data(ipwhois_result, rdap_reg_url, retry_count, raw)
    608 if rdap_data.status_code == 200:
    609     rdap_data_content = rdap_data.json()
--> 610     net = _create_net(rdap_data_content)
    611     ipwhois_result.properties["nets"] = [net]
    612     for link in rdap_data_content["links"]:

File /opt/conda/lib/python3.9/site-packages/msticpy/context/ip_utils.py:685, in _create_net(data)
    683     updated = item["eventDate"] if item["eventAction"] == "registration" else None
    684 for entity in data["entities"]:
--> 685     address = _find_address(entity)  # type: ignore
    686 regex = r"[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*"
    687 emails = re.findall(regex, str(data))

File /opt/conda/lib/python3.9/site-packages/msticpy/context/ip_utils.py:663, in _find_address(entity)
    661     for vcard_sub in vcard:
    662         if vcard_sub[0] == "adr":
--> 663             return vcard_sub[1]["label"]
    664 return None

KeyError: 'label'

Environment (please complete the following information):

ianhelle commented 8 months ago

Yeah - that seems slightly undefensive coding. We should fix this to return something like "no-label" if it's not in the data