cuckoosandbox / cuckoo

Cuckoo Sandbox is an automated dynamic malware analysis system
http://www.cuckoosandbox.org
Other
5.57k stars 1.71k forks source link

Geo IP Lookups? #1273

Open keithjjones opened 7 years ago

keithjjones commented 7 years ago

It would be really useful to have geography information with IP addresses and host names (that resolve) within the Django interface and report.json. I searched for "geo" in the code base and couldn't find any reference. I also couldn't find another issue dedicated to it. Is this something that could be added in the future? The free versions of Maxmind database should be able to do this pretty easily.

jbremer commented 7 years ago

We're actually working on this right now :) so, to be continued I guess ;-)

keithjjones commented 7 years ago

Nice!

1623664 commented 7 years ago

I have a solution to Your question.

1623664 commented 7 years ago

modules/processing/network.py

# Copyright (C) 2010-2013 Claudio Guarnieri.
# Copyright (C) 2014-2016 Cuckoo Foundation.
# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org
# See the file 'docs/LICENSE' for copying permission.

import hashlib
import logging
import json
import os
import re
import socket
import struct
import tempfile
import urlparse
import geoip2.database

from lib.cuckoo.common.abstracts import Processing
from lib.cuckoo.common.config import Config
from lib.cuckoo.common.constants import LATEST_HTTPREPLAY, CUCKOO_ROOT
from lib.cuckoo.common.dns import resolve
from lib.cuckoo.common.irc import ircMessage
from lib.cuckoo.common.objects import File
from lib.cuckoo.common.utils import convert_to_printable, versiontuple
from lib.cuckoo.common.exceptions import CuckooProcessingError

try:
    import dpkt
    HAVE_DPKT = True
except ImportError:
    HAVE_DPKT = False

try:
    import httpreplay
    import httpreplay.cut

    # Be less verbose about httpreplay logging messages.
    logging.getLogger("httpreplay").setLevel(logging.CRITICAL)

    HAVE_HTTPREPLAY = True
except ImportError:
    HAVE_HTTPREPLAY = False

# Imports for the batch sort.
# http://stackoverflow.com/questions/10665925/how-to-sort-huge-files-with-python
# http://code.activestate.com/recipes/576755/
import heapq
from itertools import islice
from collections import namedtuple

Keyed = namedtuple("Keyed", ["key", "obj"])
Packet = namedtuple("Packet", ["raw", "ts"])

log = logging.getLogger(__name__)
cfg = Config()

# Urge users to upgrade to the latest version.
_v = getattr(httpreplay, "__version__", None) if HAVE_HTTPREPLAY else None
if _v and versiontuple(_v) < versiontuple(LATEST_HTTPREPLAY):
    log.warning(
        "You are using version %s of HTTPReplay, rather than the latest "
        "version %s, which may not handle various corner cases and/or TLS "
        "cipher suites correctly. This could result in not getting all the "
        "HTTP/HTTPS streams that are available or corrupt some streams that "
        "were not handled correctly before. Please upgrade it to the latest "
        "version (`pip install --upgrade httpreplay`).",
        _v, LATEST_HTTPREPLAY,
    )

class Pcap(object):
    """Reads network data from PCAP file."""
    ssl_ports = 443,

    notified_dpkt = False

    def __init__(self, filepath, options):
        """Creates a new instance.
        @param filepath: path to PCAP file
        @param options: config options
        """
        self.filepath = filepath
        self.options = options

        # List of all hosts.
        self.hosts = []
        # List containing all non-private IP addresses.
        self.unique_hosts = []
        # List of unique domains.
        self.unique_domains = []
        self.unique_geolocation = []
        # List containing all TCP packets.
        self.tcp_connections = []
        self.tcp_connections_seen = set()
        # Lookup table to identify connection requests to services or IP
        # addresses that are no longer available.
        self.tcp_connections_dead = {}
        self.dead_hosts = {}
        self.alive_hosts = {}
        # List containing all UDP packets.
        self.udp_connections = []
        self.udp_connections_seen = set()
        # List containing all ICMP requests.
        self.icmp_requests = []
        # List containing all HTTP requests.
        self.http_requests = {}
        # List containing all TLS/SSL3 key combinations.
        self.tls_keys = []
        # List containing all DNS requests.
        self.dns_requests = {}
        self.dns_answers = set()
        # List containing all SMTP requests.
        self.smtp_requests = []
        # Reconstructed SMTP flow.
        self.smtp_flow = {}
        # List containing all IRC requests.
        self.irc_requests = []
        # Dictionary containing all the results of this processing.
        self.results = {}
        # List containing all whitelist entries.
        self.whitelist = self._build_whitelist()
        # List for holding whitelisted IP-s according to DNS responses
        self.whitelist_ips = []
        # state of whitelisting
        self.whitelist_enabled = self._build_whitelist_conf()
        # List of known good DNS servers
        self.known_dns = self._build_known_dns()

    def _build_whitelist(self):
        result = []
        whitelist_path = os.path.join(
            CUCKOO_ROOT, "data", "whitelist", "domain.txt"
        )
        for line in open(whitelist_path, 'rb'):
            result.append(line.strip())
        return result

    def _build_whitelist_conf(self):
        """Check if whitelisting is enabled."""
        if not self.options.get("whitelist-dns"):
            log.debug("Whitelisting Disabled.")
            return False

        return True

    def _is_whitelisted(self, conn, hostname):
        """Checks if whitelisting conditions are met"""
        # is whitelistng enabled ?
        if not self.whitelist_enabled:
            return False

        # is DNS recording coming from allowed NS server
        if not self.known_dns:
            pass
        elif (conn.get("src") in self.known_dns or 
              conn.get("dst") in self.known_dns):
            pass
        else:
            return False

        # is hostname whitelisted
        if hostname not in self.whitelist:
            return False

        return True

    def _build_known_dns(self):
        """Build known DNS list."""
        result = []
        _known_dns = self.options.get("allowed-dns")
        if _known_dns is not None:
            for r in _known_dns.split(","):
                result.append(r.strip())
            return result

        return []

    def _dns_gethostbyname(self, name):
        """Get host by name wrapper.
        @param name: hostname.
        @return: IP address or blank
        """
        if cfg.processing.resolve_dns:
            ip = resolve(name)
        else:
            ip = ""
        return ip

    def _is_private_ip(self, ip):
        """Check if the IP belongs to private network blocks.
        @param ip: IP address to verify.
        @return: boolean representing whether the IP belongs or not to
                 a private network block.
        """
        networks = [
            "0.0.0.0/8",
        "8.8.8.8",
            "10.0.0.0/8",
            "100.64.0.0/10",
            "127.0.0.0/8",
            "169.254.0.0/16",
            "172.16.0.0/12",
            "192.0.0.0/24",
            "192.0.2.0/24",
            "192.88.99.0/24",
            "192.168.0.0/16",
            "198.18.0.0/15",
            "198.51.100.0/24",
            "203.0.113.0/24",
            "240.0.0.0/4",
            "255.255.255.255/32",
            "224.0.0.0/4"
        ]

        for network in networks:
            try:
                ipaddr = struct.unpack(">I", socket.inet_aton(ip))[0]

                netaddr, bits = network.split("/")

                network_low = struct.unpack(">I", socket.inet_aton(netaddr))[0]
                network_high = network_low | (1 << (32 - int(bits))) - 1

                if ipaddr <= network_high and ipaddr >= network_low:
                    return True
            except:
                continue

        return False

    def _add_hosts(self, connection):
        """Add IPs to unique list.
        @param connection: connection data
        """
        try:
            # TODO: Perhaps this block should be removed.
            # If there is a packet from a non-local IP address, which hasn't
            # been seen before, it means that the connection wasn't initiated
            # during the time of the current analysis.
            if connection["src"] not in self.hosts:
                ip = convert_to_printable(connection["src"])

                # We consider the IP only if it hasn't been seen before.
                if ip not in self.hosts:
                    # If the IP is not a local one, this might be a leftover
                    # packet as described in issue #249.
                    if self._is_private_ip(ip):
                        self.hosts.append(ip)

            if connection["dst"] not in self.hosts:
                ip = convert_to_printable(connection["dst"])

                if ip not in self.hosts:
                    self.hosts.append(ip)

                    # We add external IPs to the list, only the first time
                    # we see them and if they're the destination of the
                    # first packet they appear in.
                    if not self._is_private_ip(ip) and ip not in self.whitelist_ips:
                        self.unique_hosts.append(ip)
        except:
            pass

    def _tcp_dissect(self, conn, data):
        """Runs all TCP dissectors.
        @param conn: connection.
        @param data: payload data.
        """
        if self._check_http(data):
            self._add_http(data, conn["dport"])

        # SMTP.
        if conn["dport"] == 25:
            self._reassemble_smtp(conn, data)

        # IRC.
        if conn["dport"] != 21 and self._check_irc(data):
            self._add_irc(data)

        # HTTPS.
        if conn["dport"] in self.ssl_ports or conn["sport"] in self.ssl_ports:
            self._https_identify(conn, data)

    def _udp_dissect(self, conn, data):
        """Runs all UDP dissectors.
        @param conn: connection.
        @param data: payload data.
        """
        # Select DNS and MDNS traffic.
        if conn["dport"] == 53 or conn["sport"] == 53 or conn["dport"] == 5353 or conn["sport"] == 5353:
            if self._check_dns(data):
                self._add_dns(conn, data)

    def _check_icmp(self, icmp_data):
        """Checks for ICMP traffic.
        @param icmp_data: ICMP data flow.
        """
        try:
            return isinstance(icmp_data, dpkt.icmp.ICMP) and \
                len(icmp_data.data) > 0
        except:
            return False

    def _icmp_dissect(self, conn, data):
        """Runs all ICMP dissectors.
        @param conn: connection.
        @param data: payload data.
        """

        if self._check_icmp(data):
            # If ICMP packets are coming from the host, it probably isn't
            # relevant traffic, hence we can skip from reporting it.
            if conn["src"] == cfg.resultserver.ip:
                return

            entry = {}
            entry["src"] = conn["src"]
            entry["dst"] = conn["dst"]
            entry["type"] = data.type

            # Extract data from dpkg.icmp.ICMP.
            try:
                entry["data"] = convert_to_printable(data.data.data)
            except:
                entry["data"] = ""

            self.icmp_requests.append(entry)

    def _check_dns(self, udpdata):
        """Checks for DNS traffic.
        @param udpdata: UDP data flow.
        """
        try:
            dpkt.dns.DNS(udpdata)
        except:
            return False

        return True

    def _add_dns(self, conn, udpdata):
        """Adds a DNS data flow.
        @param udpdata: UDP data flow.
        """
        dns = dpkt.dns.DNS(udpdata)

        # DNS query parsing.
        query = {}
        # Temporary list for found A or AAAA responses.
        _ip = []

        if dns.rcode == dpkt.dns.DNS_RCODE_NOERR or \
                dns.qr == dpkt.dns.DNS_R or \
                dns.opcode == dpkt.dns.DNS_QUERY or True:
            # DNS question.
            try:
                q_name = dns.qd[0].name
                q_type = dns.qd[0].type
            except IndexError:
                return False

            # DNS RR type mapping.
            # See: http://www.iana.org/assignments/dns-parameters/dns-parameters.xhtml#dns-parameters-4
            # See: https://github.com/kbandla/dpkt/blob/master/dpkt/dns.py#L42
            query["request"] = q_name
            if q_type == dpkt.dns.DNS_A:
                query["type"] = "A"
            elif q_type == dpkt.dns.DNS_AAAA:
                query["type"] = "AAAA"
            elif q_type == dpkt.dns.DNS_CNAME:
                query["type"] = "CNAME"
            elif q_type == dpkt.dns.DNS_MX:
                query["type"] = "MX"
            elif q_type == dpkt.dns.DNS_PTR:
                query["type"] = "PTR"
            elif q_type == dpkt.dns.DNS_NS:
                query["type"] = "NS"
            elif q_type == dpkt.dns.DNS_SOA:
                query["type"] = "SOA"
            elif q_type == dpkt.dns.DNS_HINFO:
                query["type"] = "HINFO"
            elif q_type == dpkt.dns.DNS_TXT:
                query["type"] = "TXT"
            elif q_type == dpkt.dns.DNS_SRV:
                query["type"] = "SRV"
            elif q_type == dpkt.dns.DNS_ANY:
                # For example MDNS requests have q_type=255.
                query["type"] = "All"
            else:
                # Some requests are not parsed by dpkt.
                query["type"] = "None"

            # DNS answer.
            query["answers"] = []
            for answer in dns.an:
                ans = {}
                if answer.type == dpkt.dns.DNS_A:
                    ans["type"] = "A"
                    try:
                        ans["data"] = socket.inet_ntoa(answer.rdata)
                        _ip.append(ans["data"])
                    except socket.error:
                        continue
                elif answer.type == dpkt.dns.DNS_AAAA:
                    ans["type"] = "AAAA"
                    try:
                        ans["data"] = socket.inet_ntop(socket.AF_INET6,
                                                       answer.rdata)
                        _ip.append(ans["data"])
                    except (socket.error, ValueError):
                        continue
                elif answer.type == dpkt.dns.DNS_CNAME:
                    ans["type"] = "CNAME"
                    ans["data"] = answer.cname
                elif answer.type == dpkt.dns.DNS_MX:
                    ans["type"] = "MX"
                    ans["data"] = answer.mxname
                elif answer.type == dpkt.dns.DNS_PTR:
                    ans["type"] = "PTR"
                    ans["data"] = answer.ptrname
                elif answer.type == dpkt.dns.DNS_NS:
                    ans["type"] = "NS"
                    ans["data"] = answer.nsname
                elif answer.type == dpkt.dns.DNS_SOA:
                    ans["type"] = "SOA"
                    ans["data"] = ",".join([answer.mname,
                                           answer.rname,
                                           str(answer.serial),
                                           str(answer.refresh),
                                           str(answer.retry),
                                           str(answer.expire),
                                           str(answer.minimum)])
                elif answer.type == dpkt.dns.DNS_HINFO:
                    ans["type"] = "HINFO"
                    ans["data"] = " ".join(answer.text)
                elif answer.type == dpkt.dns.DNS_TXT:
                    ans["type"] = "TXT"
                    ans["data"] = " ".join(answer.text)

                # TODO: add srv handling
                query["answers"].append(ans)

            if self._is_whitelisted(conn, q_name):
                log.debug("DNS target {0} whitelisted. Skipping ...".format(q_name))
                self.whitelist_ips = self.whitelist_ips + _ip
                return True

            self._add_domain(query["request"])

            reqtuple = query["type"], query["request"]
            if reqtuple not in self.dns_requests:
                self.dns_requests[reqtuple] = query
            else:
                new_answers = set((i["type"], i["data"]) for i in query["answers"]) - self.dns_answers
                self.dns_requests[reqtuple]["answers"] += [dict(type=i[0], data=i[1]) for i in new_answers]

        return True

    def _add_domain(self, domain):
        """Add a domain to unique list.
        @param domain: domain name.
        """
        filters = [
            ".*\\.windows\\.com$",
            ".*\\.in\\-addr\\.arpa$"
        ]

        regexps = [re.compile(filter) for filter in filters]
        for regexp in regexps:
            if regexp.match(domain):
                return

        for entry in self.unique_domains:
            if entry["domain"] == domain:
                return

    reader = geoip2.database.Reader('/home/cuckoo/cuckoo/data/GeoIP.mmdb')
        try:
            country = ''
            isocode = ''
            response = reader.city(self._dns_gethostbyname(domain))
            country = response.country.name
            isocode = response.country.iso_code
        except:
            country = ''
            isocode = ''

        self.unique_domains.append({"domain": domain,
                                    "ip": self._dns_gethostbyname(domain), 
                    "country": country

                })

    if domain != '8.8.8.8':
            self.unique_geolocation.append({
         "isocode": isocode,
            })

    def _check_http(self, tcpdata):
        """Checks for HTTP traffic.
        @param tcpdata: TCP data flow.
        """
        try:
            r = dpkt.http.Request()
            r.method, r.version, r.uri = None, None, None
            r.unpack(tcpdata)
        except dpkt.dpkt.UnpackError:
            if r.method is not None or r.version is not None or \
                    r.uri is not None:
                return True
            return False

        return True

    def _add_http(self, tcpdata, dport):
        """Adds an HTTP flow.
        @param tcpdata: TCP data flow.
        @param dport: destination port.
        """
        if tcpdata in self.http_requests:
            self.http_requests[tcpdata]["count"] += 1
            return True

        try:
            http = dpkt.http.Request()
            http.unpack(tcpdata)
        except dpkt.dpkt.UnpackError:
            pass

        try:
            entry = {"count": 1}

            if "host" in http.headers:
                entry["host"] = convert_to_printable(http.headers["host"])
            else:
                entry["host"] = ""

            entry["port"] = dport

            # Manually deal with cases when destination port is not the
            # default one and it is not included in host header.
            netloc = entry["host"]
            if dport != 80 and ":" not in netloc:
                netloc += ":" + str(entry["port"])

            entry["data"] = convert_to_printable(tcpdata)
            url = urlparse.urlunparse(("http", netloc, http.uri,
                                       None, None, None))
            entry["uri"] = convert_to_printable(url)
            entry["body"] = convert_to_printable(http.body)
            entry["path"] = convert_to_printable(http.uri)

            if "user-agent" in http.headers:
                entry["user-agent"] = \
                    convert_to_printable(http.headers["user-agent"])

            entry["version"] = convert_to_printable(http.version)
            entry["method"] = convert_to_printable(http.method)

            self.http_requests[tcpdata] = entry
        except Exception:
            return False

        return True

    def _https_identify(self, conn, data):
        """Extract a combination of the Session ID, Client Random, and Server
        Random in order to identify the accompanying master secret later."""
        if not hasattr(dpkt.ssl, "TLSRecord"):
            if not Pcap.notified_dpkt:
                Pcap.notified_dpkt = True
                log.warning("Using an old version of dpkt that does not "
                            "support TLS streams (install the latest with "
                            "`pip install dpkt`)")
            return

        try:
            record = dpkt.ssl.TLSRecord(data)
        except dpkt.NeedData:
            return
        except:
            log.exception("Error reading possible TLS Record")
            return

        # Is this a valid TLS packet?
        if record.type not in dpkt.ssl.RECORD_TYPES:
            return

        try:
            record = dpkt.ssl.RECORD_TYPES[record.type](record.data)
        except dpkt.ssl.SSL3Exception:
            return
        except dpkt.NeedData:
            log.exception("Incomplete possible TLS Handshake record found")
            return

        # Is this a TLSv1 Handshake packet?
        if not isinstance(record, dpkt.ssl.TLSHandshake):
            return

        # We're only interested in the TLS Server Hello packets.
        if not isinstance(record.data, dpkt.ssl.TLSServerHello):
            return

        # Extract the server random and the session id.
        self.tls_keys.append({
            "server_random": record.data.random.encode("hex"),
            "session_id": record.data.session_id.encode("hex"),
        })

    def _reassemble_smtp(self, conn, data):
        """Reassemble a SMTP flow.
        @param conn: connection dict.
        @param data: raw data.
        """
        if conn["dst"] in self.smtp_flow:
            self.smtp_flow[conn["dst"]] += data
        else:
            self.smtp_flow[conn["dst"]] = data

    def _process_smtp(self):
        """Process SMTP flow."""
        for conn, data in self.smtp_flow.iteritems():
            # Detect new SMTP flow.
            if data.startswith(("EHLO", "HELO")):
                self.smtp_requests.append({"dst": conn, "raw": data})

    def _check_irc(self, tcpdata):
        """
        Checks for IRC traffic.
        @param tcpdata: tcp data flow
        """
        try:
            req = ircMessage()
        except Exception:
            return False

        return req.isthereIRC(tcpdata)

    def _add_irc(self, tcpdata):
        """
        Adds an IRC communication.
        @param tcpdata: TCP data in flow
        @param dport: destination port
        """

        try:
            reqc = ircMessage()
            reqs = ircMessage()
            filters_sc = ["266"]
            self.irc_requests = self.irc_requests + \
                reqc.getClientMessages(tcpdata) + \
                reqs.getServerMessagesFilter(tcpdata, filters_sc)
        except Exception:
            return False

        return True

    def run(self):
        """Process PCAP.
        @return: dict with network analysis data.
        """
    networks = [
                "0.0.0.0",
        "8.8.8.8",
        "192.168.56.255 ",
        "224.0.0.252",
        "239.255.255.250"
        ]
        try:
            file = open(self.filepath, "rb")
        except (IOError, OSError):
            log.error("Unable to open %s" % self.filepath)
            return self.results

        try:
            pcap = dpkt.pcap.Reader(file)
        except dpkt.dpkt.NeedData:
            log.error("Unable to read PCAP file at path \"%s\".",
                      self.filepath)
            return self.results
        except ValueError:
            log.error("Unable to read PCAP file at path \"%s\". File is "
                      "corrupted or wrong format." % self.filepath)
            return self.results

        offset = file.tell()
        first_ts = None
        for ts, buf in pcap:
            if not first_ts:
                first_ts = ts

            try:
                ip = iplayer_from_raw(buf, pcap.datalink())

                connection = {}
                if isinstance(ip, dpkt.ip.IP):
                    connection["src"] = socket.inet_ntoa(ip.src)
                    connection["dst"] = socket.inet_ntoa(ip.dst)
                elif isinstance(ip, dpkt.ip6.IP6):
                    connection["src"] = socket.inet_ntop(socket.AF_INET6,
                                                         ip.src)
                    connection["dst"] = socket.inet_ntop(socket.AF_INET6,
                                                         ip.dst)
                else:
                    offset = file.tell()
                    continue

                self._add_hosts(connection)

                if ip.p == dpkt.ip.IP_PROTO_TCP:
                    tcp = ip.data
                    if not isinstance(tcp, dpkt.tcp.TCP):
                        tcp = dpkt.tcp.TCP(tcp)

                    if tcp.data:
                        connection["sport"] = tcp.sport
                        connection["dport"] = tcp.dport
                        self._tcp_dissect(connection, tcp.data)

                        src, sport, dst, dport = (connection["src"], connection["sport"], connection["dst"], connection["dport"])
            if dst != networks:
                            if not ((dst, dport, src, sport) in self.tcp_connections_seen or (src, sport, dst, dport) in self.tcp_connections_seen):
                                    self.tcp_connections.append((src, sport, dst, dport, offset, ts-first_ts))
                                    self.tcp_connections_seen.add((src, sport, dst, dport))

                            self.alive_hosts[dst, dport] = True
                    else:
                        ipconn = (
                            connection["src"], tcp.sport,
                            connection["dst"], tcp.dport,
                        )
                        seqack = self.tcp_connections_dead.get(ipconn)
                        if seqack == (tcp.seq, tcp.ack):
                            host = connection["dst"], tcp.dport
                            self.dead_hosts[host] = self.dead_hosts.get(host, 1) + 1

                        self.tcp_connections_dead[ipconn] = tcp.seq, tcp.ack

                elif ip.p == dpkt.ip.IP_PROTO_UDP:
                    udp = ip.data
                    if not isinstance(udp, dpkt.udp.UDP):
                        udp = dpkt.udp.UDP(udp)

                    if len(udp.data) > 0:
                        connection["sport"] = udp.sport
                        connection["dport"] = udp.dport
                        self._udp_dissect(connection, udp.data)

                        src, sport, dst, dport = (connection["src"], connection["sport"], connection["dst"], connection["dport"])
            if dst != networks:
                            if not ((dst, dport, src, sport) in self.udp_connections_seen or (src, sport, dst, dport) in self.udp_connections_seen):
                                    self.udp_connections.append((src, sport, dst, dport, offset, ts-first_ts))
                                    self.udp_connections_seen.add((src, sport, dst, dport))

                elif ip.p == dpkt.ip.IP_PROTO_ICMP:
                    icmp = ip.data
                    if not isinstance(icmp, dpkt.icmp.ICMP):
                        icmp = dpkt.icmp.ICMP(icmp)

                    self._icmp_dissect(connection, icmp)

                offset = file.tell()
            except AttributeError:
                continue
            except dpkt.dpkt.NeedData:
                continue
            except Exception as e:
                log.exception("Failed to process packet: %s", e)

        file.close()

        # Post processors for reconstructed flows.
        self._process_smtp()

        # Build results dict.
        self.results["hosts"] = self.unique_hosts
        self.results["domains"] = self.unique_domains
        self.results["tcp"] = [conn_from_flowtuple(i) for i in self.tcp_connections]
        self.results["udp"] = [conn_from_flowtuple(i) for i in self.udp_connections]
        self.results["icmp"] = self.icmp_requests
        self.results["http"] = self.http_requests.values()
        self.results["tls"] = self.tls_keys
        self.results["dns"] = self.dns_requests.values()
        self.results["smtp"] = self.smtp_requests
        self.results["irc"] = self.irc_requests
    self.results["geolocation"] = self.unique_geolocation

        self.results["dead_hosts"] = []

        # Report each IP/port combination as a dead host if we've had to retry
        # at least 3 times to connect to it and if no successful connections
        # were detected throughout the analysis.
        for (ip, port), count in self.dead_hosts.items():
            if count < 3 or (ip, port) in self.alive_hosts:
                continue

            # Report once.
            if (ip, port) not in self.results["dead_hosts"]:
                self.results["dead_hosts"].append((ip, port))

        return self.results

class Pcap2(object):
    """Interprets the PCAP file through the httpreplay library which parses
    the various protocols, decrypts and decodes them, and then provides us
    with the high level representation of it."""

    def __init__(self, pcap_path, tlsmaster, network_path):
        self.pcap_path = pcap_path
        self.network_path = network_path

        self.handlers = {
            25: httpreplay.cut.smtp_handler,
            80: httpreplay.cut.http_handler,
            8000: httpreplay.cut.http_handler,
            8080: httpreplay.cut.http_handler,
            443: lambda: httpreplay.cut.https_handler(tlsmaster),
            4443: lambda: httpreplay.cut.https_handler(tlsmaster),
        }

    def run(self):
        results = {
            "http_ex": [],
            "https_ex": [],
        }

        if not os.path.exists(self.network_path):
            os.mkdir(self.network_path)

        r = httpreplay.reader.PcapReader(open(self.pcap_path, "rb"))
        r.tcp = httpreplay.smegma.TCPPacketStreamer(r, self.handlers)

        l = sorted(r.process(), key=lambda x: x[1])
        for s, ts, protocol, sent, recv in l:
            srcip, srcport, dstip, dstport = s

            if protocol == "http" or protocol == "https":
                request = sent.raw.split("\r\n\r\n", 1)[0]
                response = recv.raw.split("\r\n\r\n", 1)[0]

                md5 = hashlib.md5(recv.body or "").hexdigest()
                sha1 = hashlib.sha1(recv.body or "").hexdigest()

                filepath = os.path.join(self.network_path, sha1)
                open(filepath, "wb").write(recv.body or "")

                results["%s_ex" % protocol].append({
                    "src": srcip, "sport": srcport,
                    "dst": dstip, "dport": dstport,
                    "protocol": protocol,
                    "method": sent.method,
                    "host": sent.headers.get("host", dstip),
                    "uri": sent.uri,
                    "request": request.decode("latin-1"),
                    "response": response.decode("latin-1"),
                    "md5": md5,
                    "sha1": sha1,
                    "path": filepath,
                })

        return results

class NetworkAnalysis(Processing):
    """Network analysis."""

    order = 2
    key = "network"

    def run(self):
        results = {}

        # Include any results provided by the mitm script.
        results["mitm"] = []
        if os.path.exists(self.mitmout_path):
            for line in open(self.mitmout_path, "rb"):
                try:
                    results["mitm"].append(json.loads(line))
                except:
                    results["mitm"].append(line)

        if not os.path.exists(self.pcap_path):
            log.warning("The PCAP file does not exist at path \"%s\".",
                        self.pcap_path)
            return results

        if os.path.getsize(self.pcap_path) == 0:
            log.error("The PCAP file at path \"%s\" is empty." % self.pcap_path)
            return results

        # PCAP file hash.
        if os.path.exists(self.pcap_path):
            results["pcap_sha256"] = File(self.pcap_path).get_sha256()

        if not HAVE_DPKT and not HAVE_HTTPREPLAY:
            log.error("Both Python HTTPReplay and Python DPKT are not "
                      "installed, no PCAP analysis possible.")
            return results

        sorted_path = self.pcap_path.replace("dump.", "dump_sorted.")
        if cfg.processing.sort_pcap:
            sort_pcap(self.pcap_path, sorted_path)
            pcap_path = sorted_path

            # Sorted PCAP file hash.
            if os.path.exists(sorted_path):
                results["sorted_pcap_sha256"] = File(sorted_path).get_sha256()
        else:
            pcap_path = self.pcap_path

        if HAVE_DPKT:
            results.update(Pcap(pcap_path, self.options).run())

        if HAVE_HTTPREPLAY and os.path.exists(pcap_path):
            try:
                p2 = Pcap2(pcap_path, self.get_tlsmaster(), self.network_path)
                results.update(p2.run())
            except:
                log.exception("Error running httpreplay-based PCAP analysis")

        return results

    def get_tlsmaster(self):
        """Obtain the client/server random to TLS master secrets mapping that
        we have obtained through dynamic analysis."""
        tlsmaster = {}
        summary = self.results.get("behavior", {}).get("summary", {})
        for entry in summary.get("tls_master", []):
            client_random, server_random, master_secret = entry
            client_random = client_random.decode("hex")
            server_random = server_random.decode("hex")
            master_secret = master_secret.decode("hex")
            tlsmaster[client_random, server_random] = master_secret
        return tlsmaster

def iplayer_from_raw(raw, linktype=1):
    """Converts a raw packet to a dpkt packet regarding of link type.
    @param raw: raw packet
    @param linktype: integer describing link type as expected by dpkt
    """
    if linktype == 1:  # ethernet
        try:
            pkt = dpkt.ethernet.Ethernet(raw)
            return pkt.data
        except dpkt.NeedData:
            pass
    elif linktype == 101:  # raw
        return dpkt.ip.IP(raw)
    else:
        raise CuckooProcessingError("unknown PCAP linktype")

def conn_from_flowtuple(ft):
    """Convert the flow tuple into a dictionary (suitable for JSON)"""
    sip, sport, dip, dport, offset, relts = ft
    reader = geoip2.database.Reader('/home/cuckoo/cuckoo/data/GeoIP.mmdb')
    try:
      country = ''
      isocode = ''
      response = reader.city(str(dip))
      country = response.country.name
      isocode = response.country.iso_code
    except:
      country = ''
      isocode = ''

    return {"src": sip, "sport": sport,
            "dst": dip, "dport": dport,
            "offset": offset, "time": relts, 
            "country": country
        }

    if (dip != '8.8.8.8') and (isocode != '') and (isocode != None):
        self.unique_geolocation.append({
        "isocode": isocode,
        })

# input_iterator should be a class that also supports writing so we can use
# it for the temp files
# this code is mostly taken from some SO post, can't remember the url though
def batch_sort(input_iterator, output_path, buffer_size=32000, output_class=None):
    """batch sort helper with temporary files, supports sorting large stuff"""
    if not output_class:
        output_class = input_iterator.__class__

    chunks = []
    try:
        while True:
            current_chunk = list(islice(input_iterator, buffer_size))
            if not current_chunk:
                break

            current_chunk.sort()
            fd, filepath = tempfile.mkstemp()
            os.close(fd)
            output_chunk = output_class(filepath)
            chunks.append(output_chunk)

            for elem in current_chunk:
                output_chunk.write(elem.obj)
            output_chunk.close()

        output_file = output_class(output_path)
        for elem in heapq.merge(*chunks):
            output_file.write(elem.obj)
        output_file.close()
    finally:
        for chunk in chunks:
            try:
                chunk.close()
                os.remove(chunk.name)
            except Exception:
                pass

class SortCap(object):
    """SortCap is a wrapper around the packet lib (dpkt) that allows us to sort pcaps
    together with the batch_sort function above."""

    def __init__(self, path, linktype=1):
        self.name = path
        self.linktype = linktype
        self.fd = None
        self.ctr = 0  # counter to pass through packets without flow info (non-IP)
        self.conns = set()

    def write(self, p):
        if not self.fd:
            self.fd = dpkt.pcap.Writer(open(self.name, "wb"), linktype=self.linktype)
        self.fd.writepkt(p.raw, p.ts)

    def __iter__(self):
        if not self.fd:
            self.fd = dpkt.pcap.Reader(open(self.name, "rb"))
            self.fditer = iter(self.fd)
            self.linktype = self.fd.datalink()
        return self

    def close(self):
        if self.fd:
            self.fd.close()
            self.fd = None

    def next(self):
        rp = next(self.fditer)
        if rp is None:
            return None

        self.ctr += 1

        ts, raw = rp
        rpkt = Packet(raw, ts)

        sip, dip, sport, dport, proto = flowtuple_from_raw(raw, self.linktype)

        # check other direction of same flow
        if (dip, sip, dport, sport, proto) in self.conns:
            flowtuple = (dip, sip, dport, sport, proto)
        else:
            flowtuple = (sip, dip, sport, dport, proto)

        self.conns.add(flowtuple)
        return Keyed((flowtuple, ts, self.ctr), rpkt)

def sort_pcap(inpath, outpath):
    """Use SortCap class together with batch_sort to sort a pcap"""
    inc = SortCap(inpath)
    batch_sort(inc, outpath, output_class=lambda path: SortCap(path, linktype=inc.linktype))
    return 0

def flowtuple_from_raw(raw, linktype=1):
    """Parse a packet from a pcap just enough to gain a flow description tuple"""
    ip = iplayer_from_raw(raw, linktype)

    if isinstance(ip, dpkt.ip.IP):
        sip, dip = socket.inet_ntoa(ip.src), socket.inet_ntoa(ip.dst)
        proto = ip.p

        if proto == dpkt.ip.IP_PROTO_TCP or proto == dpkt.ip.IP_PROTO_UDP:
            l3 = ip.data
            sport, dport = l3.sport, l3.dport
        else:
            sport, dport = 0, 0

    else:
        sip, dip, proto = 0, 0, -1
        sport, dport = 0, 0

    flowtuple = (sip, dip, sport, dport, proto)
    return flowtuple

def payload_from_raw(raw, linktype=1):
    """Get the payload from a packet, the data below TCP/UDP basically"""
    ip = iplayer_from_raw(raw, linktype)
    try:
        return ip.data.data
    except:
        return ""

def next_connection_packets(piter, linktype=1):
    """Extract all packets belonging to the same flow from a pcap packet
    iterator."""
    first_ft = None

    for ts, raw in piter:
        ft = flowtuple_from_raw(raw, linktype)
        if not first_ft:
            first_ft = ft

        sip, dip, sport, dport, proto = ft
        if not (first_ft == ft or first_ft == (dip, sip, dport, sport, proto)):
            break

        yield {
            "src": sip, "dst": dip, "sport": sport, "dport": dport,
            "raw": payload_from_raw(raw, linktype).encode("base64"),
            "direction": first_ft == ft,
        }

def packets_for_stream(fobj, offset):
    """Open a PCAP, seek to a packet offset, then get all packets belonging to
    the same connection."""
    pcap = dpkt.pcap.Reader(fobj)
    pcapiter = iter(pcap)
    ts, raw = pcapiter.next()

    fobj.seek(offset)
    for p in next_connection_packets(pcapiter, linktype=pcap.datalink()):
        yield p
1623664 commented 7 years ago

the database I took from here https://dev.maxmind.com/geoip/legacy/geolite/

1623664 commented 7 years ago

sorry for the quality

jbremer commented 7 years ago

Thanks @1623664, with some changes we should be able to use something like this for our final GeoIP integration. We'll be picking up on this soon'ish.

AnkurSavaliya commented 4 years ago

What is the value of LATEST_HTTPREPLAY, CUCKOO_ROOT in constant.py file?