nixawk / pentest-wiki

PENTEST-WIKI is a free online security knowledge library for pentesters / researchers. If you have a good idea, please share it with others.
MIT License
3.37k stars 915 forks source link

[information gathering] parse masscan xml #20

Open nixawk opened 6 years ago

nixawk commented 6 years ago
#!/usr/bin/python
# -*- coding: utf-8 -*-

from lxml import etree

class MasscanParser(object):

    def parse_host_port_protocol(self, masscan_data=None, uniq=True):

        if not masscan_data:
            raise Exception("No report data to parse: please provide a valid XML masscan report.")

        if not isinstance(masscan_data, str):
            raise Exception("Wrong masscan_data type given as argument: cannot parse data.")

        try:
            root = etree.HTML(masscan_data)
        except:
            raise Exception("Wrong XML structure: cannot parse data.")

        host = root.xpath('//host')

        data = map(
            lambda x: (x[0][0], x[1][0][0], x[1][0][1]), # host, port, protocol
            zip(
                map(self.parse_xml_host_address, host), 
                map(self.parse_xml_port, host)
            )
        )

        if uniq: data = list(set(data))   # get unique data

        up, down, total = self.parse_xml_hostsup(root)

        print('Available: %s, Up: %s, Down: %s, Total: %s' % (
            len(data), up, down, total
        ))

        return data

    def parse_xml_host_address(self, host_tag):
        address = host_tag.xpath('address')
        return map(lambda x: x.get('addr'), address)

    def parse_xml_port(self, host_tag):
        ports = host_tag.xpath('ports/port')
        return map(lambda x: (int(x.get('portid')), x.get('protocol')), ports)

    def parse_xml_hostsup(self, root):
        hostsup = root.xpath('//hosts')

        if hostsup:
            hostsup = hostsup[0]
            return (
                hostsup.get('up'),
                hostsup.get('down'),
                hostsup.get('total')
            )
        else:
            return 0, 0, 0

    def parse_masscan_xml(self, xmlfile, uniq=True):
        import os

        if not os.path.exists(xmlfile): return []

        return self.parse_host_port_protocol(open(xmlfile).read(), uniq=uniq)

if __name__ == '__main__':
    import sys

    if len(sys.argv) != 2:
        print("[*] python %s <masscan.xml>" % sys.argv[0])
        sys.exit(0)

    msparser = MasscanParser()
    msprdata = msparser.parse_masscan_xml(sys.argv[1])

    for host, port, protocol in msprdata:
        print(host, port, protocol)
nixawk commented 6 years ago
$ python2.7 masscan-xml.py
[*] python masscan-xml.py <masscan.xml>

$ python2.7 masscan-xml.py masscan.xml
Available: 2, Up: 2, Down: 0, Total: 2
('192.168.1.100', 9200, 'tcp')
('8.8.8.8', 9200, 'tcp')