Closed YamatoSecurity closed 1 year ago
I created a minimal pseudo-code as follows. I've considered the following changes:
<String, String>
to <Enum>
. DetectInfo.detail
and Profile%Detail%
by using Arcuse std::collections::{HashMap, HashSet};
use std::sync::Arc;
//v1.7.2リリース時の検知結果格納struct
struct DetectInfo {
pub rulepath: String,
pub ruletitle: String,
pub level: String,
pub computername: String,
pub eventid: String,
pub detail: String,
pub record_information: String,
pub ext_field: HashMap<String, String>,
}
//改善版
#[derive(Eq, PartialEq, Hash)]
enum Profile {
Timestamp(String),
Computer(String),
Channel(String),
Level(String),
EventID(String),
RecordID(String),
RuleTitle(String),
Detail(Arc<String>)
}
impl Profile {
fn val_mem_size(&self) -> usize {
match self {
Profile::Timestamp(v) => v.len(),
Profile::Computer(v) => v.len(),
Profile::Channel(v) => v.len(),
Profile::Level(v) => v.len(),
Profile::EventID(v) => v.len(),
Profile::RecordID(v) => v.len(),
Profile::RuleTitle(v) => v.len(),
Profile::Detail(v) => std::mem::size_of_val(&v)
}
}
}
fn main() {
// サンプルデータ
let timestamp = "2022-09-18 23:36:51.075 +09:00";
let computer = "IE10Win7";
let channel = "Sec";
let level = "info";
let event_id = "4688";
let record_id = "1";
let rule_title = "Proc Exec";
let rule_path = "Sec_4688_Info_ProcExec.yml";
let detail = "CommandLine: C:\\Windows\\Microsoft.NET\\Framework\\v4.0.30319\\mscorsvw.exe -StartupEvent 1f8 -InterruptEvent 0 -NGENProcess 1f0 -Pipe 1ec -Comment \"\"NGen Worker Process\"\" ¦ NewProcessId: 0x1318 ¦ NewProcessName: C:\\Windows\\Microsoft.NET\\Framework\\v4.0.30319\\mscorsvw.exe ¦ ProcessId: 0xfec ¦ SubjectDomainName: WORKGROUP ¦ SubjectLogonId: 0x3e7 ¦ SubjectUserName: IE10WIN7$ ¦ SubjectUserSid: S-1-5-18 ¦ TokenElevationType: %%1936";
// サンプルデータでProfileを作成
let mut q = HashMap::new();
q.insert("%Timestamp%".to_string(), timestamp.to_string());
q.insert("%Computer%".to_string(), computer.to_string());
q.insert("%Channel%".to_string(), channel.to_string());
q.insert("%Level%".to_string(), level.to_string());
q.insert("%EventID%".to_string(), event_id.to_string());
q.insert("%RecordID%".to_string(), record_id.to_string());
q.insert("%RuleTitle%".to_string(), rule_title.to_string());
q.insert("%Detail%".to_string(), detail.to_string());
// Profile用のHashMapの各エントリのメモリ上のサイズを合計
let string_mem_size = 8 * 3; // pointer(1byte) + capacity(1byte) + len(1byte) on stack memory
let j = q.keys().map(|k| string_mem_size + k.len()).sum::<usize>(); // stack(24bytes) + heap(string length bytes)
let k = q.values().map(|k| string_mem_size + k.len()).sum::<usize>(); // stack(24bytes) + heap(string length bytes)
let l = std::mem::size_of_val(&q);
// サンプルデータでDetectInfoを作成
let a = DetectInfo {
rulepath: rule_path.to_string(),
ruletitle: rule_title.to_string(),
level: level.to_string(),
computername: computer.to_string(),
eventid: event_id.to_string(),
detail: detail.to_string(),
record_information: "".to_string(),
ext_field: q
};
//DetectInfoのstruct全体のメモリサイズ
let b = std::mem::size_of_val(&a);
//DetectInfoの各フィールドのメモリサイズ
let c = a.ruletitle.len();
let d = a.rulepath.len();
let e = a.level.len();
let f = a.computername.len();
let g = a.eventid.len();
let h = a.detail.len();
let i = a.record_information.len();
let detect_count = 1629710; //6.1GB evtxで実行したときの検知数
let profile_size = j + k + l;
let detect_info_exclude_profile_size = b + c + d + e + f + g + h + i;
let total_detect_info_size = detect_info_exclude_profile_size + profile_size;
println!("old version.");
println!("avg profile key size [{}] byte", j);
println!("avg profile val size [{}] byte", k);
println!("total profile key size [{}] MB", j * detect_count/1024/1024);
println!("total profile val size [{}] MB", k * detect_count/1024/1024);
println!("total profile size [{}] MB", profile_size * detect_count/1024/1024);
println!("total detect_info size'(exclude profile) [{}] MB", detect_info_exclude_profile_size * detect_count/1024/1024);
println!("total detect_info size [{}] MB", total_detect_info_size * detect_count/1024/1024);
println!("");
let _ = a.ext_field.len();
let mut w = HashSet::new();
w.insert(Profile::Timestamp(timestamp.to_string()));
w.insert(Profile::Computer(computer.to_string()));
w.insert(Profile::Channel(channel.to_string()));
w.insert(Profile::Level(level.to_string()));
w.insert(Profile::EventID(event_id.to_string()));
w.insert(Profile::RecordID(record_id.to_string()));
w.insert(Profile::RuleTitle(rule_title.to_string()));
w.insert(Profile::Detail(Arc::new(a.detail.to_string())));
let x = w.iter().map(|p| std::mem::size_of_val(&p)).sum::<usize>(); //Enumのメモリサイズ合計
let y = w.iter().map(|p| string_mem_size + p.val_mem_size()).sum::<usize>(); //Enumのvalue部のメモリサイズ合計
let z = std::mem::size_of_val(&w);
let new_profile_size = x + y + z;
let total_new_detect_info_size = detect_info_exclude_profile_size + new_profile_size;
println!("new version.");
println!("avg new profile key size [{}] byte", x);
println!("avg new profile val size [{}] byte", y);
println!("total new profile key size [{}] MB", x * detect_count/1024/1024);
println!("total new profile val size [{}] MB", y * detect_count/1024/1024);
println!("total new profile size [{}] MB", new_profile_size * detect_count/1024/1024);
println!("total new detect_info size'(exclude new profile) [{}] MB", detect_info_exclude_profile_size * detect_count/1024/1024);
println!("total new detect_info size [{}] MB", total_new_detect_info_size * detect_count/1024/1024);
}
the above code output is as follows. Since memory reduction can be expected, I'll try to implement with this direction.
old version.
avg profile key size [267] byte
avg profile val size [685] byte
total profile key size [414] MB
total profile val size [1064] MB
total profile size [1554] MB
total detect_info size'(exclude profile) [1089] MB
total detect_info size [2643] MB
new version.
avg new profile key size [64] byte
avg new profile val size [259] byte
total new profile key size [99] MB
total new profile val size [402] MB
total new profile size [576] MB
total new detect_info size'(exclude new profile) [1089] MB
total new detect_info size [1666] MB
With the introduction to custom profiles in 1.5.0, Hayabusa has been using about twice as much memory. In my tests against a 14GB dataset, with 1.4.3, Hayabusa used less than 6 GB but on the current 1.8.0-dev version, it uses about 11.5GB. We are currently investigating how we can reduce the memory usage.
Note: this only happens when there is enough available memory on the system. When there is low memory, Hayabusa will only use around 300~400MB on average in order not to max out memory.