Closed mjtooley closed 5 years ago
Matt, quick question, are the resulting files from steps 1 and 2 the same in both versions of the software?
No - very different sizes. V4.2.0 results in a much smaller file. Using the same input files for each I get two very different output JSON files
v4.2.0 (gzipped)
{"sa":"192.168.8.147","da":"104.23.242.254","pr":6,"sp":58770,"dp":80,"bytes_out":228,"num_pkts_out":7,"bytes_in":2468,"num_pkts_in":7,"time_start":1535650827.045222,"time_end":1535650827.378704,"packets":[{"b":228,"dir":">","ipt":2},{"b":1460,"dir":"<","ipt":330},{"b":1003,
"dir":"<","ipt":0},{"b":5,"dir":"<","ipt":0}],"byte_dist":[0,0,0,0,0,0,0,0,0,0,34,0,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,92,1,8,0,0,0,0,0,0,0,2,0,3,15,25,35,69,28,53,41,15,19,21,9,18,17,29,6,13,24,13,6,0,16,11,9,34,18,47,34,39,44,33,1,17,25,56,22,8,21,40,38,43,50,92,50,3
7,32,57,0,0,0,0,17,0,56,62,62,65,103,12,24,47,25,33,29,80,49,72,51,49,6,30,47,95,35,44,26,23,34,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"byte_dist_mean":84.410979,"byte_dist_std":23.840375,"ip":{"out":{"ttl":64,"id":[31751,31752,31753,31754,31755,31756,31757]},"in":{"ttl":57,"id":[0,26632,26633,26634,26635,26636,26637]
}},"tcp":{"first_seq":3354241729,"out":{"flags":"S","first_window_size":14600,"opt_len":20,"opts":[{"mss":1460},{"sackp":null},{"ts":{"val":4294956742,"ecr":0}},{"noop":null},{"ws":6}]},"in":{"flags":"SA","first_window_size":29200,"opt_len":12,"opts":[{"mss":1460},{"noop":nu
ll},{"noop":null},{"sackp":null},{"noop":null},{"ws":10}]}}}
while v4.0.0 (gzipped)
{"version":"4.0.0","interface":"none","promisc":0,"output":"none","outputdir":"none","username":"none","info":"none","count":0,"upload":"none","keyfile":"none","retain":0,"bidir":1,"num_pkts":0,"type":1,"zeros":0,"retrans":0,"
dist":1,"cdist":"none","entropy":0,"hd":0,"classify":0,"idp":0,"exe":0,"anon":"none","useranon":"none","bpf":"none","verbosity":3,"wht":0,"example":0,"dns":0,"ssh":0,"tls":0,"dhcp":0,"http":0,"ike":0,"payload":0,"salt":0,"ppi"
:0,"end-config":1}
{"sa":"192.168.5.112","da":"74.208.177.96","pr":6,"sp":53987,"dp":8000,"bytes_out":6182,"num_pkts_out":240,"time_start":1540238582.754234,"time_end":1540238587.092565,"packets":[{"b":359,"dir":"<","ipt":0},{"b":324,"dir":"<","
ipt":180},{"b":323,"dir":"<","ipt":53},{"b":330,"dir":"<","ipt":166},{"b":333,"dir":"<","ipt":63},{"b":333,"dir":"<","ipt":52},{"b":325,"dir":"<","ipt":115},{"b":324,"dir":"<","ipt":57},{"b":325,"dir":"<","ipt":54},{"b":329,"d
ir":"<","ipt":53},{"b":329,"dir":"<","ipt":185},{"b":325,"dir":"<","ipt":107},{"b":323,"dir":"<","ipt":54},{"b":362,"dir":"<","ipt":1987},{"b":484,"dir":"<","ipt":47},{"b":453,"dir":"<","ipt":244},{"b":601,"dir":"<","ipt":733}
],"byte_dist":[0,0,0,0,0,0,0,0,0,0,77,0,0,77,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,220,0,0,0,0,9,1,0,17,17,23,0,20,35,63,97,86,37,33,66,11,34,1,20,24,6,103,33,0,10,0,0,0,71,2,20,0,28,2,26,26,0,1,25,26,35,1,0,8,8,9,8,34,17,0,8,0,
1,0,0,0,0,0,8,0,70,41,135,46,221,23,49,12,100,6,41,81,36,106,126,93,3,76,42,130,53,44,3,13,19,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"byte_dist_mean":79.442737,"byte_dist_std":32.435096,"ip":{"out":{"ttl":64,"id":[49440,4
9441,49442,49443,49444,49445,49446,49447,49448,49449,49450,49451,49452,49453,49454,49455,49456,49457,49458,49459,49460,49461,49462,49463,49464,49465,49466,49467,49468,49469,49470,49471,49472,49473,49474,49475,49476,49477,49478
,49479,49480,49481,49482,49483,49484,49485,49486,49487,49488,49489]}},"tcp":{"first_seq":2108299465,"out":{"flags":"S","first_window_size":5840,"opt_len":20,"opts":[{"mss":1460},{"sackp":null},{"ts":{"val":4294760937,"ecr":0}}
,{"noop":null},{"ws":4}]}},"probable_os":{"out":"Linux 2.4 and 2.6"}}
{"sa":"192.168.5.112","da":"74.208.177.96","pr":6,"sp":53988,"dp":8000,"bytes_out":1884,"num_pkts_out":11,"time_start":1540238584.328659,"time_end":1540238586.272826,"packets":[{"b":359,"dir":"<","ipt":0},{"b":1460,"dir":"<","
ipt":281},{"b":65,"dir":"<","ipt":0}],"byte_dist":[0,0,0,0,0,0,0,0,0,0,20,0,0,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,59,0,0,0,0,69,21,0,4,4,4,0,5,13,18,23,84,55,113,52,22,41,28,38,42,20,35,16,0,27,0,1,0,55,23,39,18,31,17,9,7,6
,2,6,8,12,3,1,7,2,4,6,12,5,1,2,1,1,0,0,0,0,0,19,0,44,16,43,29,107,10,20,12,56,0,13,28,23,52,53,34,2,43,37,64,21,22,3,5,10,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"byte_dist_mean":75.177813,"byte_dist_std":29.384439,"ip":{"o
ut":{"ttl":64,"id":[51769,51770,51771,51772,51773,51774,51775,51776,51777,51778,51779]}},"tcp":{"first_seq":1262955062,"out":{"flags":"S","first_window_size":5840,"opt_len":20,"opts":[{"mss":1460},{"sackp":null},{"ts":{"val":4
294762511,"ecr":0}},{"noop":null},{"ws":4}]}},"probable_os":{"out":"Linux 2.4 and 2.6"}}
.... and lots more lines
And then I just tried rebuilding v4.2.0 with the compression turned off and what I noticed is that when joy is passed a list of pcap files as input and the option "output=data.json" is used Joy then makes a directory with a .json file each input file.
{"version":"4.2.0","interface":"none","promisc":0,"output":"piracy.json","outputdir":".","username":"none","info":"none","count":0,"upload":"none","keyfile":
"none","retain":0,"bidir":1,"num_pkts":50,"zeros":0,"retrans":0,"dist":1,"cdist":"none","entropy":0,"hd":0,"classify":0,"idp":0,"exe":0,"anon":"none","useran
on":"none","bpf":"none","verbosity":3,"threads":1,"updater":0,"wht":0,"example":0,"dns":0,"ssh":0,"tls":0,"dhcp":0,"http":0,"ike":0,"payload":0,"salt":0,"ppi
":0,"fpx":0,"end-config":1}
{"sa":"192.168.5.112","da":"74.208.177.96","pr":6,"sp":53987,"dp":8000,"bytes_out":6182,"num_pkts_out":240,"time_start":1540238582.754234,"time_end":15402385
87.092565,"packets":[{"b":359,"dir":"<","ipt":0},{"b":324,"dir":"<","ipt":180},{"b":323,"dir":"<","ipt":53},{"b":330,"dir":"<","ipt":166},{"b":333,"dir":"<",
"ipt":63},{"b":333,"dir":"<","ipt":52},{"b":325,"dir":"<","ipt":115},{"b":324,"dir":"<","ipt":57},{"b":325,"dir":"<","ipt":54},{"b":329,"dir":"<","ipt":53},{
"b":329,"dir":"<","ipt":185},{"b":325,"dir":"<","ipt":107},{"b":323,"dir":"<","ipt":54},{"b":362,"dir":"<","ipt":1987},{"b":484,"dir":"<","ipt":47},{"b":453,
"dir":"<","ipt":244},{"b":601,"dir":"<","ipt":733}],"byte_dist":[0,0,0,0,0,0,0,0,0,0,54,0,0,54,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,151,0,0,0,0,0,0,0,12,12,18
,0,14,24,45,68,54,19,18,36,6,18,0,6,12,0,71,19,0,1,0,0,0,42,0,12,0,18,0,18,19,0,0,18,18,24,0,0,6,6,6,6,25,12,0,6,0,0,0,0,0,0,0,0,0,41,27,93,27,144,11,33,7,65
,6,26,53,22,70,84,64,2,49,28,83,36,30,2,9,13,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
,"byte_dist_mean":79.442737,"byte_dist_std":32.435096,"ip":{"out":{"ttl":64,"id":[49440,49441,49442,49443,49444,49445,49446,49447,49448,49449,49450,49451,494
52,49453,49454,49455,49456,49457,49458,49459,49460,49461,49462,49463,49464,49465,49466,49467,49468,49469,49470,49471,49472,49473,49474,49475,49476,49477,4947
8,49479,49480,49481,49482,49483,49484,49485,49486,49487,49488,49489]}},"tcp":{"first_seq":2108299465,"out":{"flags":"S","first_window_size":5840,"opt_len":20
,"opts":[{"mss":1460},{"sackp":null},{"ts":{"val":4294760937,"ecr":0}},{"noop":null},{"ws":4}]}},"probable_os":{"out":"Linux 2.4 and 2.6"}}
If I don't use the option "output=", but instead just pipe the stdout to file I get single json file for all the input files.
I then "DIFFed" the json from v4.2 with the gunzipped from v4.0 and they are different. And it isn't just the pre-amble that is different. The v4.0 file is 215911 bytes and the v4.2 is 19637 bytes long.
V4.2 created a JSON file with 10 lines, while v4.0 creates JSON file with 198 lines
resolved offline. sent user diffs and problem has been fixed. changes will be committed into master shortly.
While doing some testing and debugging I noticed a difference in the output of Joy between versions v4.0.0 and v4.2.0.
When I built Joy I did:
I am doing the following:
I have a number of pcap files in both the /piracy and /benign directories.
When I run it with v4.0.0 the output of step 3 &4 is something like this:
When I run it with v.4.2.0 the output of step 3 & 4 is something like this:
Note the big difference between the Num Positive and Num Negative between the two versions.
The resulting classifier is different too leading to different results. V4.2.0 definitely runs faster, but I think it is because the output from steps #1 and #2 is resulting in a much smaller file. Through some trial-and-error, v4.2.0 seems capped at Num Positive (~100) and the Num Negative at 3100.
So I am trying to understand if I am doing something wrong, or if something changed in the code base that is causing this. And if so, is the output of v4.2.0 valid?