secynic / ipwhois

Retrieve and parse whois data for IPv4 and IPv6 addresses
https://ipwhois.readthedocs.io/en/latest
BSD 2-Clause "Simplified" License
554 stars 121 forks source link

RDAP: Roles missing from objects, while present in entities #299

Open CasperRTR opened 3 years ago

CasperRTR commented 3 years ago

Hello @secynic

I believe I have found a bug in 1.2.0, where roles are missing from parsed objects, while they are present in entities. Might be related to #161

Example;

from ipwhois import IPWhois
ipwhois_con = IPWhois(socket.gethostbyname('213.212.61.0'))
res = ipwhois_con.lookup_rdap(inc_raw=True)

res does not contain roles, while the raw object does within its entities. Might this be specific for ripencc?

For now, as a quick fix, I have implemented this as a patch and overriden some standard methods:

from ipwhois import rdap
from ipwhois import InvalidEntityObject, HTTPLookupError
from ipwhois.rdap import _RDAPContact, BOOTSTRAP_URL, RIR_RDAP, _RDAPEntity

def patched_ipwhois_rdap_entity_parse(self):
    """
    The function for parsing the JSON response to the vars dictionary.
    """
    try:
        self.vars['handle'] = self.json['handle'].strip()
    except (KeyError, ValueError, TypeError):
        raise InvalidEntityObject('Handle is missing for RDAP entity')
    for v in ['roles', 'country']:
        try:
            self.vars[v] = self.json[v]
        except (KeyError, ValueError):
            pass
    try:
        vcard = self.json['vcardArray'][1]
        c = _RDAPContact(vcard)
        c.parse()
        self.vars['contact'] = c.vars
    except (KeyError, ValueError, TypeError):
        pass
    try:
        self.vars['events_actor'] = self.summarize_events(
            self.json['asEventActor'])
    except (KeyError, ValueError, TypeError):
        pass
    self.vars['entities'] = []
    try:
        for ent in self.json['entities']:
            if ent['handle'] not in self.vars['entities']:
                self.vars['entities'].append(ent['handle'])
            if self.vars['handle'] == ent['handle'] and not self.vars['roles'] and 'roles' in ent.keys():
                self.vars['roles'] = ent['roles']
    except (KeyError, ValueError, TypeError):
        pass
    if not self.vars['entities']:
        self.vars['entities'] = None
    self._parse()

def patched_ipwhois_rdap_get_entity(self, entity=None, roles=None, inc_raw=False, retry_count=3,
                    asn_data=None, bootstrap=False, rate_limit_timeout=120):
    """
    The function for retrieving and parsing information for an entity via
    RDAP (HTTP).

    Args:
        entity (:obj:`str`): The entity name to lookup.
        roles (:obj:`dict`): The mapping of entity handles to roles.
        inc_raw (:obj:`bool`, optional): Whether to include the raw
            results in the returned dictionary. Defaults to False.
        retry_count (:obj:`int`): The number of times to retry in case
            socket errors, timeouts, connection resets, etc. are
            encountered. Defaults to 3.
        asn_data (:obj:`dict`): Result from
            :obj:`ipwhois.asn.IPASN.lookup`. Optional if the bootstrap
            parameter is True.
        bootstrap (:obj:`bool`): If True, performs lookups via ARIN
            bootstrap rather than lookups based on ASN data. Defaults to
            False.
        rate_limit_timeout (:obj:`int`): The number of seconds to wait
            before retrying when a rate limit notice is returned via
            rdap+json. Defaults to 120.

    Returns:
        namedtuple:

        :result (dict): Consists of the fields listed in the
            ipwhois.rdap._RDAPEntity dict. The raw result is included for
            each object if the inc_raw parameter is True.
        :roles (dict): The mapping of entity handles to roles.
    """
    result = {}
    if bootstrap:
        entity_url = '{0}/entity/{1}'.format(
            BOOTSTRAP_URL, entity)
    else:
        tmp_reg = asn_data['asn_registry']
        entity_url = RIR_RDAP[tmp_reg]['entity_url']
        entity_url = str(entity_url).format(entity)
    try:
        # RDAP entity query
        response = self._net.get_http_json(
            url=entity_url, retry_count=retry_count,
            rate_limit_timeout=rate_limit_timeout
        )
        # Parse the entity
        result_ent = _RDAPEntity(response)
        result_ent.parse()
        result = result_ent.vars
        try:
            result['roles'] = roles[entity]
        except KeyError:  # pragma: no cover
            if 'roles' in result.keys() and result['roles']:
                roles[entity] = result['roles']

        try:
            for tmp in response['entities']:
                if tmp['handle'] not in roles:
                    roles[tmp['handle']] = tmp['roles']
        except (IndexError, KeyError):
            pass
        if inc_raw:
            result['raw'] = response
    except (HTTPLookupError, InvalidEntityObject):
        pass
    return_tuple = namedtuple('return_tuple', ['result', 'roles'])
    return return_tuple(result, roles)

rdap._RDAPEntity.parse = patched_ipwhois_rdap_entity_parse
rdap.RDAP._get_entity = patched_ipwhois_rdap_get_entity
secynic commented 4 days ago

I tested against dev and it looks fine to me. Can you confirm?