Why i use graph_rewrite training model such slowly,5s per step? #5794

Closed S601327412 closed 5 years ago

S601327412 commented 5 years ago

And excessive memory usage but i only use batch_size 48

Source code / logs

2018-11-21 14:44:21.759806: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.582 pciBusID: 0000:03:00.0 totalMemory: 10.91GiB freeMemory: 10.35GiB 2018-11-21 14:44:21.961547: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 1 with properties: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.582 pciBusID: 0000:04:00.0 totalMemory: 10.91GiB freeMemory: 10.75GiB 2018-11-21 14:44:21.963074: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0, 1

INFO:tensorflow:global step 234: loss = 2.1614 (5.394 sec/step) I1121 15:11:57.188936 140018120492800 tf_logging.py:115] global step 234: loss = 2.1614 (5.394 sec/step) INFO:tensorflow:global step 235: loss = 2.0140 (5.465 sec/step) I1121 15:12:02.656718 140018120492800 tf_logging.py:115] global step 235: loss = 2.0140 (5.465 sec/step) INFO:tensorflow:global step 236: loss = 1.9067 (5.226 sec/step) I1121 15:12:07.885817 140018120492800 tf_logging.py:115] global step 236: loss = 1.9067 (5.226 sec/step) INFO:tensorflow:global step 237: loss = 2.5041 (5.481 sec/step) I1121 15:12:13.369063 140018120492800 tf_logging.py:115] global step 237: loss = 2.5041 (5.481 sec/step) INFO:tensorflow:global step 238: loss = 1.9676 (5.412 sec/step) I1121 15:12:18.783356 140018120492800 tf_logging.py:115] global step 238: loss = 1.9676 (5.412 sec/step) INFO:tensorflow:global step 239: loss = 2.2165 (5.401 sec/step) I1121 15:12:24.187036 140018120492800 tf_logging.py:115] global step 239: loss = 2.2165 (5.401 sec/step) INFO:tensorflow:global step 240: loss = 2.3579 (5.381 sec/step) I1121 15:12:29.570735 140018120492800 tf_logging.py:115] global step 240: loss = 2.3579 (5.381 sec/step)

model { ssd { num_classes: 7 image_resizer { fixed_shape_resizer { height: 300 width: 300 } } feature_extractor { type: "ssd_mobilenet_v2" depth_multiplier: 1.0 min_depth: 16 conv_hyperparams { regularizer { l2_regularizer { weight: 3.99999989895e-05 } } initializer { random_normal_initializer { mean: 0.0 stddev: 0.00999999977648 } } activation: RELU_6 batch_norm { decay: 0.9997 center: true scale: true epsilon: 0.0010000000475 } } override_base_feature_extractor_hyperparams: true } box_coder { faster_rcnn_box_coder { y_scale: 10.0 x_scale: 10.0 height_scale: 5.0 width_scale: 5.0 } } matcher { argmax_matcher { matched_threshold: 0.5 unmatched_threshold: 0.5 ignore_thresholds: false negatives_lower_than_unmatched: true force_match_for_each_row: true use_matmul_gather: true } } similarity_calculator { iou_similarity { } } box_predictor { convolutional_box_predictor { conv_hyperparams { regularizer { l2_regularizer { weight: 3.99999989895e-05 } } initializer { truncated_normal_initializer { mean: 0.0 stddev: 0.03 } } activation: RELU_6 batch_norm { decay: 0.9997 center: true scale: true epsilon: 0.0010000000475 } } min_depth: 0 max_depth: 0 num_layers_before_predictor: 0 use_dropout: false dropout_keep_probability: 0.800000011921 kernel_size: 3 use_depthwise:true box_code_size: 4 apply_sigmoid_to_scores: false class_prediction_bias_init: 0.0 class_prediction_bias_init: -4.59999990463 } } anchor_generator { ssd_anchor_generator { num_layers: 6 min_scale: 0.20000000298 max_scale: 0.949999988079 aspect_ratios: 2.0 aspect_ratios: 1.5 aspect_ratios: 0.8 aspect_ratios: 1.82 aspect_ratios: 1.0 } } post_processing { batch_non_max_suppression { score_threshold: 0.3 iou_threshold: 0.600000023842 max_detections_per_class: 100 max_total_detections: 100 } score_converter: SIGMOID } normalize_loss_by_num_matches: true loss { localization_loss { weighted_smooth_l1 { } } classification_loss { weighted_sigmoid { } } hard_example_miner { num_hard_examples: 1000 iou_threshold: 0.99 loss_type: CLASSIFICATION max_negatives_per_positive: 3 min_negatives_per_image: 3 } classification_weight: 1.0 localization_weight: 1.0 } encode_background_as_zeros: true normalize_loc_loss_by_codesize: false } } train_config { batch_size: 48 data_augmentation_options { random_horizontal_flip { } } data_augmentation_options { ssd_random_crop { } } sync_replicas: false optimizer { rms_prop_optimizer: { learning_rate: { exponential_decay_learning_rate { initial_learning_rate: 0.0035 decay_steps: 800456 decay_factor: 0.95 } } momentum_optimizer_value: 0.9 decay: 0.9 epsilon: 1.0 } use_moving_average: true }

fine_tune_checkpoint: "/media/ubuntu/data1/models/research/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09/model.ckpt"

fine_tune_checkpoint: "/media/ubuntu/data1/models/research/object_detection/mymodel/model/train/model.ckpt-69744"

fine_tune_checkpoint: "/media/ubuntu/data1/models/research/object_detection/ssd_mobilenet_v2_quantized_300x300_coco_2018_09_14/model.ckpt"

fine_tune_checkpoint_type: "detection" num_steps: 69744

startup_delay_steps: 0.0

replicas_to_aggregate: 8

max_number_of_boxes: 50 unpad_groundtruth_tensors: false summarize_gradients:false } train_input_reader { label_map_path: "../mymodel/label_map.pbtxt" tf_record_input_reader { input_path: "../mymodel/data/train.record" } } eval_config { num_examples: 655 metrics_set: "coco_detection_metrics" use_moving_averages: true num_visualizations:30 batch_size:10 }

eval_input_reader { label_map_path: "../mymodel/data/label_map.pbtxt" shuffle: true num_readers: 1 tf_record_input_reader { input_path: "../mymodel/data/valid.record" } }

graph_rewriter { quantization { delay: 0 weight_bits: 8 activation_bits: 8 } }

