munhouiani / Deep-Packet

Pytorch implementation of deep packet: a novel approach for encrypted traffic classification using deep learning
MIT License
181 stars 56 forks source link

training fails on VPN dataset with a ValueError #42

Open gsankara opened 1 year ago

gsankara commented 1 year ago

I see a ValueError: Please pass features or at least one example when writing data` at the end of train_cnn when run on VPN dataset. I have not modified the code. I faced NaN error and set under sampling to False. Then I encountered this one.

Here is the detailed output `Trainer(val_check_interval=1.0) was configured so validation will run at the end of the training epoch..

| Name | Type | Params

0 | conv1 | Sequential | 1.0 K 1 | conv2 | Sequential | 200 K 2 | max_pool | MaxPool1d | 0 3 | fc1 | Sequential | 9.9 M 4 | fc2 | Sequential | 20.1 K 5 | fc3 | Sequential | 5.0 K 6 | out | Linear | 867

10.1 M Trainable params 0 Non-trainable params 10.1 M Total params 40.430 Total estimated model params size (MB) Using custom data configuration train.parquet-2c3be5e9d214c057 Downloading and preparing dataset parquet/train.parquet to /home/rvn/.cache/huggingface/datasets/parquet/train.parquet-2c3be5e9d214c057/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec... Downloading data files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 3663.15it/s] Extracting data files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 565.27it/s] Traceback (most recent call last): File "/home/rvn/Deep-Packet/train_cnn.py", line 33, in main() File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/click/core.py", line 1130, in call return self.main(args, kwargs) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/click/core.py", line 1055, in main rv = self.invoke(ctx) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/click/core.py", line 1404, in invoke return ctx.invoke(self.callback, ctx.params) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/click/core.py", line 760, in invoke return __callback(args, kwargs) File "/home/rvn/Deep-Packet/train_cnn.py", line 25, in main train_application_classification_cnn_model(data_path, model_path) File "/home/rvn/Deep-Packet/ml/utils.py", line 117, in train_application_classification_cnn_model train_cnn( File "/home/rvn/Deep-Packet/ml/utils.py", line 58, in train_cnn trainer.fit(model) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 696, in fit self._call_and_handle_interrupt( File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _call_and_handle_interrupt return trainer_fn(*args, *kwargs) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 735, in _fit_impl results = self._run(model, ckpt_path=self.ckpt_path) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1166, in _run results = self._run_stage() File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1252, in _run_stage return self._run_train() File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1283, in _run_train self.fit_loop.run() File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/loops/loop.py", line 195, in run self.on_run_start(args, kwargs) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py", line 211, in on_run_start self.trainer.reset_train_dataloader(self.trainer.lightning_module) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1812, in reset_train_dataloader self.train_dataloader = self._data_connector._request_dataloader(RunningStage.TRAINING) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 453, in _request_dataloader dataloader = source.dataloader() File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 526, in dataloader return self.instance.trainer._call_lightning_module_hook(self.name, pl_module=self.instance) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1550, in _call_lightning_module_hook output = fn(*args, kwargs) File "/home/rvn/Deep-Packet/ml/model.py", line 101, in train_dataloader dataset_dict = datasets.load_dataset(self.hparams.data_path) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/load.py", line 1698, in load_dataset builder_instance.download_and_prepare( File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/builder.py", line 807, in download_and_prepare self._download_and_prepare( File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/builder.py", line 898, in _download_and_prepare self._prepare_split(split_generator, prepare_split_kwargs) File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/builder.py", line 1516, in _prepare_split num_examples, num_bytes = writer.finalize() File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/arrow_writer.py", line 559, in finalize raise ValueError("Please pass features or at least one example when writing data") ValueError: Please pass features or at least one example when writing data`

365ms commented 1 year ago

I faced NaN error and set under sampling to False. You dont need to set False. Under sampling is quiet an important process. Try to correct the processed_data ,open one of the .json file,check the app_label and traffic_label,if value is null that means u should add the specific prefix-id according to your dataset in utlis.py. PREFIX_TO_APP_ID = {

AIM chat

"aim_chat_3a": 0,
"aim_chat_3b": 0,
"aimchat1": 0,
"aimchat2": 0,
"vpn_aim_chat1a":0,
"vpn_aim_chat1b":0,
# Email
"email1a": 1,
"email1b": 1,
"email2a": 1,
"email2b": 1,
"vpn_email2a": 1,
"vpn_email2b": 1,
# Facebook
"facebook_audio1a": 2,
"facebook_audio1b": 2,
"facebook_audio2a": 2,
"facebook_audio2b": 2,
"facebook_audio3": 2,
"facebook_audio4": 2,
"facebook_chat_4a": 2,
"facebook_chat_4b": 2,
"facebook_video1a": 2,
"facebook_video1b": 2,
"facebook_video2a": 2,
"facebook_video2b": 2,
"facebookchat1": 2,
"facebookchat2": 2,
"facebookchat3": 2,
"vpn_facebook_audio2":2,
"vpn_facebook_chat1a":2,
"vpn_facebook_chat1b":2,
# FTPS
"ftps_down_1a": 3,
"ftps_down_1b": 3,
"ftps_up_2a": 3,
"ftps_up_2b": 3,
"vpn_ftps_A":3,
"vpn_ftps_B":3,
# Gmail
"gmailchat1": 4,
"gmailchat2": 4,
"gmailchat3": 4,
# Hangouts
"hangout_chat_4b": 5,
"hangouts_audio1a": 5,
"hangouts_audio1b": 5,
"hangouts_audio2a": 5,
"hangouts_audio2b": 5,
"hangouts_audio3": 5,
"hangouts_audio4": 5,
"hangouts_chat_4a": 5,
"hangouts_video1b": 5,
"hangouts_video2a": 5,
"hangouts_video2b": 5,
"vpn_hangouts_audio1":5,
"vpn_hangouts_audio2":5,
"vpn_hangouts_chat1a":5,
"vpn_hangouts_chat1b":5,
# ICQ
"icq_chat_3a": 6,
"icq_chat_3b": 6,
"icqchat1": 6,
"icqchat2": 6,
# Netflix
"netflix1": 7,
"netflix2": 7,
"netflix3": 7,
"netflix4": 7,
# SCP
"scp1": 8,
"scpdown1": 8,
"scpdown2": 8,
"scpdown3": 8,
"scpdown4": 8,
"scpdown5": 8,
"scpdown6": 8,
"scpup1": 8,
"scpup2": 8,
"scpup3": 8,
"scpup5": 8,
"scpup6": 8,
# SFTP
"sftp1": 9,
"sftp_down_3a": 9,
"sftp_down_3b": 9,
"sftp_up_2a": 9,
"sftp_up_2b": 9,
"sftpdown1": 9,
"sftpdown2": 9,
"sftpup1": 9,
# Skype
"skype_audio1a": 10,
"skype_audio1b": 10,
"skype_audio2a": 10,
"skype_audio2b": 10,
"skype_audio3": 10,
"skype_audio4": 10,
"skype_chat1a": 10,
"skype_chat1b": 10,
"skype_file1": 10,
"skype_file2": 10,
"skype_file3": 10,
"skype_file4": 10,
"skype_file5": 10,
"skype_file6": 10,
"skype_file7": 10,
"skype_file8": 10,
"skype_video1a": 10,
"skype_video1b": 10,
"skype_video2a": 10,
"skype_video2b": 10,
# Spotify
"spotify1": 11,
"spotify2": 11,
"spotify3": 11,
"spotify4": 11,
# Vimeo
"vimeo1": 12,
"vimeo2": 12,
"vimeo3": 12,
"vimeo4": 12,
# Voipbuster
"voipbuster1b": 13,
"voipbuster2b": 13,
"voipbuster3b": 13,
"voipbuster_4a": 13,
"voipbuster_4b": 13,
# Youtube
"youtube1": 14,
"youtube2": 14,
"youtube3": 14,
"youtube4": 14,
"youtube5": 14,
"youtube6": 14,
"youtubehtml5_1": 14,
#bittorrent
"vpn_bittorrent":15,

} The dataset i use is VPN-PCAPS-01.