Open Nisar-1234 opened 4 years ago
import sys sys.path.append('../') import pandas as pd import loglizer from loglizer.models import * from loglizer import dataloader, preprocessing
run_models = ['PCA', 'InvariantsMiner', 'LogClustering'] struct_log = r'C:\Users\privus\Downloads\HDFS.npz' # The benchmark dataset
if name == 'main': (x_tr, y_train), (x_te, y_test) = dataloader.load_HDFS(struct_log, window='session', train_ratio=0.5, split_type='uniform') benchmark_results = [] for _model in run_models: print('Evaluating {} on HDFS:'.format(_model)) if _model == 'PCA': feature_extractor = preprocessing.FeatureExtractor() x_train = feature_extractor.fit_transform(x_tr, term_weighting='tf-idf', normalization='zero-mean') model = PCA() model.fit(x_train)
elif _model == 'InvariantsMiner': feature_extractor = preprocessing.FeatureExtractor() x_train = feature_extractor.fit_transform(x_tr) model = InvariantsMiner(epsilon=0.5) model.fit(x_train) elif _model == 'LogClustering': feature_extractor = preprocessing.FeatureExtractor() x_train = feature_extractor.fit_transform(x_tr, term_weighting='tf-idf') model = LogClustering(max_dist=0.3, anomaly_threshold=0.3) model.fit(x_train[y_train == 0, :]) # Use only normal samples for training
It is probably caused by Python version mismatch.
import sys sys.path.append('../') import pandas as pd import loglizer from loglizer.models import * from loglizer import dataloader, preprocessing
run_models = ['PCA', 'InvariantsMiner', 'LogClustering'] struct_log = r'C:\Users\privus\Downloads\HDFS.npz' # The benchmark dataset
if name == 'main': (x_tr, y_train), (x_te, y_test) = dataloader.load_HDFS(struct_log, window='session', train_ratio=0.5, split_type='uniform') benchmark_results = [] for _model in run_models: print('Evaluating {} on HDFS:'.format(_model)) if _model == 'PCA': feature_extractor = preprocessing.FeatureExtractor() x_train = feature_extractor.fit_transform(x_tr, term_weighting='tf-idf', normalization='zero-mean') model = PCA() model.fit(x_train)