KimiNewt / pyshark

Python wrapper for tshark, allowing python packet parsing using wireshark dissectors
MIT License
2.25k stars 422 forks source link

How to reassemble TCP/TLS flow similar to "Follow TCP/TLS" stream? #502

Open zliucd opened 3 years ago

zliucd commented 3 years ago

Hi everyone,

How to write code using Pyshark to implement "Follow TCP/TLS stream" (in order to extract full payload) in Wireshark? It's known reassemble TCP stream is non-trivial, and how can we use Pyshark to do it?

Thanks.

vadimszzz commented 2 years ago
from itertools import groupby

import pyshark
from natsort import natsorted

# Network layers
LINK    = 0     # data-link layer (Ethernet)
INET    = 1     # internet layer (IP/IPv6)
TRANS   = 2     # transport layer (TCP/UDP)
TLS = "tls"     # TLS layer
APP = -1        # application layer (HTTP/HTTP2/QUIC)

def get_packet_layers(pkt):
    """ Get list of real layer protocols in packet except fucking trash layers. """
    pkt.layers = [layer for layer in pkt.layers if layer._layer_name != "fake-field-wrapper"]
    while len(pkt.layers) > 4 + (TLS in pkt):
        pkt.layers.pop()
    return pkt.layers

def get_transport_layer_streamid(pkt):
    """ Get TCP/UDP stream id. """
    transport = get_packet_layers(pkt)[TRANS]
    transport_protocol = transport._layer_name
    transport_streamid = transport.stream
    return f"{transport_protocol}:{transport_streamid}"

def get_app_layer_streamid(pkt):
    """ Get application layer protocol stream id if protocol is multistreaming. """
    transport_layer_streamid = get_transport_layer_streamid(pkt)
    application = get_packet_layers(pkt)[APP]
    application_protocol = application._layer_name
    application_streamid = application._all_fields.get(application_protocol+'.streamid')
    return f"{transport_layer_streamid}:{application_protocol}:{application_streamid}"

def transport_layer_reassembly(packets):
    """ Reassembly packets into flows by TCP/UDP stream id. """
    packets = natsorted(packets, key=lambda pkt: get_transport_layer_streamid(pkt))
    return groupby(packets, lambda pkt: get_transport_layer_streamid(pkt))

def app_layer_reassembly(packets):
    """ Reassembly packets into flows by application layer protocol stream id. """
    packets = natsorted(packets, key=lambda pkt: get_app_layer_streamid(pkt))
    return groupby(packets, lambda pkt: get_app_layer_streamid(pkt))

def parse_pcap(pcap_file, sslkeylog_file):
    """ Decrypt the traffic with sslkeylog and reassembly flows. """
    packets = pyshark.FileCapture(
        input_file=pcap_file,
        override_prefs={"tls.keylog_file": sslkeylog_file},
        custom_parameters=["-2"])

    for transport_layer_streamid, transport_stream in transport_layer_reassembly(packets):
        for app_layer_streamid, session in app_layer_reassembly(transport_stream):
            for pkt in session:
                # Reassembled packets...
vadimszzz commented 2 years ago

@yssource, @FisherDock