lllyasviel / stable-diffusion-webui-forge

GNU Affero General Public License v3.0
8.72k stars 865 forks source link

TensorRT support #64

Open CaptainVarghoss opened 10 months ago

CaptainVarghoss commented 10 months ago

Checklist

What happened?

TensorRT extension installs and seems to function properly on a clean install, Console shows unet is loaded and TRT profile loaded, but there is no change in generation time.

Steps to reproduce the problem

  1. Install Forge
  2. Install TensorRT extension
  3. Generate image with and without TRT engine

What should have happened?

Image generation speed should have increased significantly.

What browsers do you use to access the UI ?

Google Chrome

Sysinfo

{
    "Platform": "Windows-10-10.0.22631-SP0",
    "Python": "3.10.10",
    "Version": "f0.0.7-latest-44-g7359740f",
    "Commit": "7359740f36e9f59d8c358cbb5b07a4fde85900c3",
    "Script path": "D:\\AI\\Auto1111\\Forge",
    "Data path": "D:\\AI\\Auto1111\\Forge",
    "Extensions dir": "D:\\AI\\Auto1111\\Forge\\extensions",
    "Checksum": "cc09ece5a30ee1a1f33cfb3b1ada98495f13935771dd980222821fd63b68dd30",
    "Commandline": [
        "launch.py",
        "--always-gpu",
        "--xformers",
        "--ckpt-dir",
        "D:\\AI\\Models\\checkpoints",
        "--hypernetwork-dir",
        "D:\\AI\\Models\\hypernetworks",
        "--embeddings-dir",
        "D:\\AI\\Models\\embeddings",
        "--lora-dir",
        "D:\\AI\\Models\\loras",
        "--vae-dir",
        "D:\\AI\\Models\\vae",
        "--realesrgan-models-path",
        "D:\\AI\\Models\\upscalers",
        "--esrgan-models-path",
        "D:\\AI\\Models\\upscalers"
    ],
    "Torch env info": {
        "torch_version": "2.1.2+cu121",
        "is_debug_build": "False",
        "cuda_compiled_version": "12.1",
        "gcc_version": null,
        "clang_version": null,
        "cmake_version": null,
        "os": "Microsoft Windows 11 Home",
        "libc_version": "N/A",
        "python_version": "3.10.10 (tags/v3.10.10:aad5f6a, Feb  7 2023, 17:20:36) [MSC v.1929 64 bit (AMD64)] (64-bit runtime)",
        "python_platform": "Windows-10-10.0.22631-SP0",
        "is_cuda_available": "True",
        "cuda_runtime_version": null,
        "cuda_module_loading": "LAZY",
        "nvidia_driver_version": "551.23",
        "nvidia_gpu_models": "GPU 0: NVIDIA GeForce RTX 4090",
        "cudnn_version": null,
        "pip_version": "pip3",
        "pip_packages": [
            "numpy==1.26.2",
            "open-clip-torch==2.20.0",
            "pytorch-lightning==1.9.4",
            "torch==2.1.2+cu121",
            "torchdiffeq==0.2.3",
            "torchmetrics==1.3.0.post0",
            "torchsde==0.2.6",
            "torchvision==0.16.2+cu121"
        ],
        "conda_packages": null,
        "hip_compiled_version": "N/A",
        "hip_runtime_version": "N/A",
        "miopen_runtime_version": "N/A",
        "caching_allocator_config": "",
        "is_xnnpack_available": "True",
        "cpu_info": [
            "Architecture=9",
            "CurrentClockSpeed=3000",
            "DeviceID=CPU0",
            "Family=207",
            "L2CacheSize=32768",
            "L2CacheSpeed=",
            "Manufacturer=GenuineIntel",
            "MaxClockSpeed=3000",
            "Name=13th Gen Intel(R) Core(TM) i9-13900K",
            "ProcessorType=3",
            "Revision="
        ]
    },
    "Exceptions": [],
    "CPU": {
        "model": "Intel64 Family 6 Model 183 Stepping 1, GenuineIntel",
        "count logical": 32,
        "count physical": 24
    },
    "RAM": {
        "total": "64GB",
        "used": "20GB",
        "free": "43GB"
    },
    "Extensions": [
        {
            "name": "Stable-Diffusion-WebUI-TensorRT",
            "path": "D:\\AI\\Auto1111\\Forge\\extensions\\Stable-Diffusion-WebUI-TensorRT",
            "version": "e2b6980c",
            "branch": "main",
            "remote": "https://github.com/NVIDIA/Stable-Diffusion-WebUI-TensorRT"
        },
        {
            "name": "adetailer",
            "path": "D:\\AI\\Auto1111\\Forge\\extensions\\adetailer",
            "version": "8f01dfda",
            "branch": "main",
            "remote": "https://github.com/Bing-su/adetailer.git"
        }
    ],
    "Inactive extensions": [],
    "Environment": {
        "COMMANDLINE_ARGS": "--always-gpu --xformers --ckpt-dir \"D:\\AI\\Models\\checkpoints\" --hypernetwork-dir \"D:\\AI\\Models\\hypernetworks\" --embeddings-dir \"D:\\AI\\Models\\embeddings\" --lora-dir \"D:\\AI\\Models\\loras\" --vae-dir \"D:\\AI\\Models\\vae\" --realesrgan-models-path \"D:\\AI\\Models\\upscalers\" --esrgan-models-path \"D:\\AI\\Models\\upscalers\"",
        "GRADIO_ANALYTICS_ENABLED": "False"
    },
    "Config": {
        "ldsr_steps": 100,
        "ldsr_cached": false,
        "SCUNET_tile": 256,
        "SCUNET_tile_overlap": 8,
        "SWIN_tile": 192,
        "SWIN_tile_overlap": 8,
        "SWIN_torch_compile": false,
        "control_net_detectedmap_dir": "detected_maps",
        "control_net_models_path": "D:\\AI\\Models\\controlnet",
        "control_net_modules_path": "",
        "control_net_unit_count": 3,
        "control_net_model_cache_size": 5,
        "control_net_no_detectmap": false,
        "control_net_detectmap_autosaving": false,
        "control_net_allow_script_control": false,
        "control_net_sync_field_args": true,
        "controlnet_show_batch_images_in_ui": false,
        "controlnet_increment_seed_during_batch": false,
        "controlnet_disable_openpose_edit": false,
        "controlnet_disable_photopea_edit": false,
        "controlnet_photopea_warning": true,
        "controlnet_input_thumbnail": true,
        "sd_checkpoint_hash": "093fe6613182949a3a1d58003180e14dee9d3ca6447a144900034f5e075214ee",
        "sd_model_checkpoint": "ProtoLarkXL_v5.fp16_vae_NSFW.fp16.ckpt",
        "outdir_samples": "D:\\AI\\Output\\new-images",
        "outdir_txt2img_samples": "D:\\AI\\Output\\new-images",
        "outdir_img2img_samples": "D:\\AI\\Output\\new-images",
        "outdir_extras_samples": "D:\\AI\\Output\\new-images",
        "outdir_grids": "D:\\AI\\Output\\new-images",
        "outdir_txt2img_grids": "D:\\AI\\Output\\new-images",
        "outdir_img2img_grids": "D:\\AI\\Output\\new-images",
        "outdir_save": "D:\\AI\\Output\\saved",
        "outdir_init_images": "D:\\AI\\Output\\init-images",
        "samples_save": true,
        "samples_format": "png",
        "samples_filename_pattern": "[datetime<%Y%m%d_%H%M%S>]_[seed]",
        "save_images_add_number": false,
        "save_images_replace_action": "Replace",
        "grid_save": false,
        "grid_format": "png",
        "grid_extended_filename": false,
        "grid_only_if_multiple": true,
        "grid_prevent_empty_spots": false,
        "grid_zip_filename_pattern": "",
        "n_rows": -1,
        "font": "",
        "grid_text_active_color": "#000000",
        "grid_text_inactive_color": "#999999",
        "grid_background_color": "#ffffff",
        "save_images_before_face_restoration": false,
        "save_images_before_highres_fix": false,
        "save_images_before_color_correction": false,
        "save_mask": false,
        "save_mask_composite": false,
        "jpeg_quality": 80,
        "webp_lossless": false,
        "export_for_4chan": false,
        "img_downscale_threshold": 4.0,
        "target_side_length": 4000.0,
        "img_max_size_mp": 200.0,
        "use_original_name_batch": true,
        "use_upscaler_name_as_suffix": false,
        "save_selected_only": true,
        "save_init_img": false,
        "temp_dir": "",
        "clean_temp_dir_at_start": false,
        "save_incomplete_images": false,
        "notification_audio": true,
        "notification_volume": 100,
        "save_to_dirs": false,
        "grid_save_to_dirs": false,
        "use_save_to_dirs_for_ui": false,
        "directories_filename_pattern": "",
        "directories_max_prompt_words": 8,
        "auto_backcompat": true,
        "use_old_emphasis_implementation": false,
        "use_old_karras_scheduler_sigmas": false,
        "no_dpmpp_sde_batch_determinism": false,
        "use_old_hires_fix_width_height": false,
        "dont_fix_second_order_samplers_schedule": false,
        "hires_fix_use_firstpass_conds": false,
        "use_old_scheduling": false,
        "use_downcasted_alpha_bar": false,
        "lora_functional": false,
        "extra_networks_show_hidden_directories": true,
        "extra_networks_dir_button_function": false,
        "extra_networks_hidden_models": "When searched",
        "extra_networks_default_multiplier": 1,
        "extra_networks_card_width": 0.0,
        "extra_networks_card_height": 0.0,
        "extra_networks_card_text_scale": 1,
        "extra_networks_card_show_desc": true,
        "extra_networks_card_order_field": "Path",
        "extra_networks_card_order": "Ascending",
        "extra_networks_tree_view_default_enabled": false,
        "extra_networks_add_text_separator": " ",
        "ui_extra_networks_tab_reorder": "",
        "textual_inversion_print_at_load": false,
        "textual_inversion_add_hashes_to_infotext": true,
        "sd_hypernetwork": "None",
        "sd_lora": "None",
        "lora_preferred_name": "Filename",
        "lora_add_hashes_to_infotext": true,
        "lora_show_all": false,
        "lora_hide_unknown_for_versions": [],
        "lora_in_memory_limit": 0,
        "lora_not_found_warning_console": false,
        "lora_not_found_gradio_warning": false,
        "cross_attention_optimization": "Automatic",
        "s_min_uncond": 0,
        "token_merging_ratio": 0,
        "token_merging_ratio_img2img": 0,
        "token_merging_ratio_hr": 0,
        "pad_cond_uncond": false,
        "pad_cond_uncond_v0": false,
        "persistent_cond_cache": true,
        "batch_cond_uncond": true,
        "fp8_storage": "Disable",
        "cache_fp16_weight": false,
        "hide_samplers": [],
        "eta_ddim": 0,
        "eta_ancestral": 1,
        "ddim_discretize": "uniform",
        "s_churn": 0,
        "s_tmin": 0,
        "s_tmax": 0,
        "s_noise": 1,
        "k_sched_type": "Automatic",
        "sigma_min": 0.0,
        "sigma_max": 0.0,
        "rho": 0.0,
        "eta_noise_seed_delta": 0,
        "always_discard_next_to_last_sigma": false,
        "sgm_noise_multiplier": false,
        "uni_pc_variant": "bh1",
        "uni_pc_skip_type": "time_uniform",
        "uni_pc_order": 3,
        "uni_pc_lower_order_final": true,
        "sd_noise_schedule": "Default",
        "sd_checkpoints_limit": 4,
        "sd_checkpoints_keep_in_cpu": true,
        "sd_checkpoint_cache": 0,
        "sd_unet": "Automatic",
        "enable_quantization": false,
        "enable_emphasis": true,
        "enable_batch_seeds": true,
        "comma_padding_backtrack": 20,
        "upcast_attn": false,
        "randn_source": "GPU",
        "tiling": false,
        "hires_fix_refiner_pass": "second pass",
        "sdxl_crop_top": 0.0,
        "sdxl_crop_left": 0.0,
        "sdxl_refiner_low_aesthetic_score": 2.5,
        "sdxl_refiner_high_aesthetic_score": 6.0,
        "sd_vae_checkpoint_cache": 2,
        "sd_vae_overrides_per_model_preferences": true,
        "auto_vae_precision_bfloat16": false,
        "auto_vae_precision": true,
        "sd_vae_encode_method": "Full",
        "sd_vae_decode_method": "Full",
        "inpainting_mask_weight": 1,
        "initial_noise_multiplier": 1,
        "img2img_extra_noise": 0,
        "img2img_color_correction": false,
        "img2img_fix_steps": false,
        "img2img_background_color": "#ffffff",
        "img2img_editor_height": 720,
        "img2img_sketch_default_brush_color": "#ffffff",
        "img2img_inpaint_mask_brush_color": "#ffffff",
        "img2img_inpaint_sketch_default_brush_color": "#ffffff",
        "return_mask": false,
        "return_mask_composite": false,
        "img2img_batch_show_results_limit": 32,
        "overlay_inpaint": true,
        "return_grid": true,
        "do_not_show_images": false,
        "js_modal_lightbox": true,
        "js_modal_lightbox_initially_zoomed": true,
        "js_modal_lightbox_gamepad": false,
        "js_modal_lightbox_gamepad_repeat": 250.0,
        "sd_webui_modal_lightbox_icon_opacity": 1,
        "sd_webui_modal_lightbox_toolbar_opacity": 0.9,
        "gallery_height": "",
        "enable_pnginfo": true,
        "save_txt": false,
        "add_model_name_to_info": true,
        "add_model_hash_to_info": true,
        "add_vae_name_to_info": true,
        "add_vae_hash_to_info": true,
        "add_user_name_to_info": false,
        "add_version_to_infotext": true,
        "disable_weights_auto_swap": true,
        "infotext_skip_pasting": [],
        "infotext_styles": "Apply if any",
        "show_progressbar": true,
        "live_previews_enable": true,
        "live_previews_image_format": "png",
        "show_progress_grid": true,
        "show_progress_every_n_steps": 10,
        "show_progress_type": "Approx NN",
        "live_preview_allow_lowvram_full": false,
        "live_preview_content": "Prompt",
        "live_preview_refresh_period": 1000.0,
        "live_preview_fast_interrupt": true,
        "js_live_preview_in_modal_lightbox": true,
        "keyedit_precision_attention": 0.1,
        "keyedit_precision_extra": 0.05,
        "keyedit_delimiters": ".,\\/!?%^*;:{}=`~() ",
        "keyedit_delimiters_whitespace": [
            "Tab",
            "Carriage Return",
            "Line Feed"
        ],
        "keyedit_move": true,
        "disable_token_counters": false,
        "extra_options_txt2img": [],
        "extra_options_img2img": [],
        "extra_options_cols": 1,
        "extra_options_accordion": false,
        "compact_prompt_box": false,
        "samplers_in_dropdown": true,
        "dimensions_and_batch_together": true,
        "sd_checkpoint_dropdown_use_short": false,
        "hires_fix_show_sampler": false,
        "hires_fix_show_prompts": false,
        "txt2img_settings_accordion": false,
        "img2img_settings_accordion": false,
        "interrupt_after_current": false,
        "localization": "None",
        "quicksettings_list": [
            "sd_model_checkpoint",
            "sd_vae",
            "CLIP_stop_at_last_layers",
            "sd_unet",
            "cross_attention_optimization"
        ],
        "ui_tab_order": [],
        "hidden_tabs": [],
        "ui_reorder_list": [],
        "gradio_theme": "Default",
        "gradio_themes_cache": true,
        "show_progress_in_title": true,
        "send_seed": true,
        "send_size": true,
        "api_enable_requests": true,
        "api_forbid_local_requests": true,
        "api_useragent": "",
        "auto_launch_browser": "Disable",
        "enable_console_prompts": false,
        "show_warnings": true,
        "show_gradio_deprecation_warnings": true,
        "memmon_poll_rate": 8,
        "samples_log_stdout": false,
        "multiple_tqdm": true,
        "enable_upscale_progressbar": true,
        "print_hypernet_extra": false,
        "list_hidden_files": true,
        "disable_mmap_load_safetensors": false,
        "hide_ldm_prints": true,
        "dump_stacks_on_signal": false,
        "face_restoration": false,
        "face_restoration_model": "CodeFormer",
        "code_former_weight": 0.5,
        "face_restoration_unload": false,
        "postprocessing_enable_in_main_ui": [],
        "postprocessing_operation_order": [],
        "upscaling_max_images_in_cache": 5,
        "postprocessing_existing_caption_action": "Ignore",
        "ESRGAN_tile": 192,
        "ESRGAN_tile_overlap": 8,
        "realesrgan_enabled_models": [
            "R-ESRGAN 4x+",
            "R-ESRGAN 4x+ Anime6B"
        ],
        "dat_enabled_models": [
            "DAT x2",
            "DAT x3",
            "DAT x4"
        ],
        "DAT_tile": 192,
        "DAT_tile_overlap": 8,
        "unload_models_when_training": false,
        "pin_memory": false,
        "save_optimizer_state": false,
        "save_training_settings_to_txt": true,
        "dataset_filename_word_regex": "",
        "dataset_filename_join_string": " ",
        "training_image_repeats_per_epoch": 1,
        "training_write_csv_every": 500.0,
        "training_xattention_optimizations": false,
        "training_enable_tensorboard": false,
        "training_tensorboard_save_images": false,
        "training_tensorboard_flush_every": 120.0,
        "canvas_hotkey_zoom": "Alt",
        "canvas_hotkey_adjust": "Ctrl",
        "canvas_hotkey_shrink_brush": "Q",
        "canvas_hotkey_grow_brush": "W",
        "canvas_hotkey_move": "F",
        "canvas_hotkey_fullscreen": "S",
        "canvas_hotkey_reset": "R",
        "canvas_hotkey_overlap": "O",
        "canvas_show_tooltip": true,
        "canvas_auto_expand": true,
        "canvas_blur_prompt": false,
        "canvas_disabled_functions": [
            "Overlap"
        ],
        "interrogate_keep_models_in_memory": false,
        "interrogate_return_ranks": false,
        "interrogate_clip_num_beams": 1,
        "interrogate_clip_min_length": 24,
        "interrogate_clip_max_length": 48,
        "interrogate_clip_dict_limit": 1500.0,
        "interrogate_clip_skip_categories": [],
        "interrogate_deepbooru_score_threshold": 0.5,
        "deepbooru_sort_alpha": true,
        "deepbooru_use_spaces": true,
        "deepbooru_escape": true,
        "deepbooru_filter_tags": "",
        "disabled_extensions": [],
        "disable_all_extensions": "none",
        "ad_max_models": 2,
        "ad_extra_models_dir": "",
        "ad_save_previews": false,
        "ad_save_images_before": false,
        "ad_only_seleted_scripts": true,
        "ad_script_names": "dynamic_prompting,dynamic_thresholding,wildcard_recursive,wildcards,lora_block_weight,negpip",
        "ad_bbox_sortby": "None",
        "ad_same_seed_for_each_tap": false,
        "CLIP_stop_at_last_layers": 1,
        "sd_vae": "sdxl_vae-fp16-fix.safetensors"
    },
    "Startup": {
        "total": 10.948799848556519,
        "records": {
            "initial startup": 0.017045021057128906,
            "prepare environment/checks": 0.007546901702880859,
            "prepare environment/git version info": 0.05465841293334961,
            "prepare environment/torch GPU test": 1.6307337284088135,
            "prepare environment/clone repositores": 0.11122941970825195,
            "prepare environment/run extensions installers/adetailer": 0.10972023010253906,
            "prepare environment/run extensions installers/Stable-Diffusion-WebUI-TensorRT": 0.11524391174316406,
            "prepare environment/run extensions installers": 0.22496414184570312,
            "prepare environment/run extensions_builtin installers/canvas-zoom-and-pan": 0.0,
            "prepare environment/run extensions_builtin installers/extra-options-section": 0.0,
            "prepare environment/run extensions_builtin installers/forge_legacy_preprocessors": 0.23349308967590332,
            "prepare environment/run extensions_builtin installers/forge_preprocessor_inpaint": 0.0005006790161132812,
            "prepare environment/run extensions_builtin installers/forge_preprocessor_marigold": 0.0,
            "prepare environment/run extensions_builtin installers/forge_preprocessor_normalbae": 0.0,
            "prepare environment/run extensions_builtin installers/forge_preprocessor_recolor": 0.0,
            "prepare environment/run extensions_builtin installers/forge_preprocessor_reference": 0.0,
            "prepare environment/run extensions_builtin installers/forge_preprocessor_revision": 0.0,
            "prepare environment/run extensions_builtin installers/forge_preprocessor_tile": 0.0,
            "prepare environment/run extensions_builtin installers/LDSR": 0.0,
            "prepare environment/run extensions_builtin installers/Lora": 0.0,
            "prepare environment/run extensions_builtin installers/mobile": 0.0,
            "prepare environment/run extensions_builtin installers/prompt-bracket-checker": 0.0,
            "prepare environment/run extensions_builtin installers/ScuNET": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_controlllite": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_controlnet": 0.20536065101623535,
            "prepare environment/run extensions_builtin installers/sd_forge_controlnet_example": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_freeu": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_hypertile": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_ipadapter": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_kohya_hrfix": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_photomaker": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_sag": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_stylealign": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_svd": 0.0,
            "prepare environment/run extensions_builtin installers/sd_forge_z123": 0.0005002021789550781,
            "prepare environment/run extensions_builtin installers/soft-inpainting": 0.0,
            "prepare environment/run extensions_builtin installers/SwinIR": 0.0,
            "prepare environment/run extensions_builtin installers": 0.43985462188720703,
            "prepare environment": 2.4950222969055176,
            "launcher": 0.006000518798828125,
            "import torch": 3.378505229949951,
            "import gradio": 0.8846189975738525,
            "setup paths": 0.5222210884094238,
            "import ldm": 0.0049839019775390625,
            "import sgm": 0.0,
            "initialize shared": 0.1600627899169922,
            "other imports": 0.8507680892944336,
            "opts onchange": 0.0,
            "setup SD model": 0.0,
            "setup codeformer": 0.0015099048614501953,
            "setup gfpgan": 0.011515617370605469,
            "set samplers": 0.0,
            "list extensions": 0.0014989376068115234,
            "restore config state file": 0.0,
            "list SD models": 0.06541204452514648,
            "list localizations": 0.001010894775390625,
            "load scripts/custom_code.py": 0.0035066604614257812,
            "load scripts/img2imgalt.py": 0.0004994869232177734,
            "load scripts/loopback.py": 0.0004994869232177734,
            "load scripts/outpainting_mk_2.py": 0.0005009174346923828,
            "load scripts/poor_mans_outpainting.py": 0.0004990100860595703,
            "load scripts/postprocessing_caption.py": 0.0004999637603759766,
            "load scripts/postprocessing_codeformer.py": 0.0,
            "load scripts/postprocessing_create_flipped_copies.py": 0.0004999637603759766,
            "load scripts/postprocessing_focal_crop.py": 0.0025022029876708984,
            "load scripts/postprocessing_gfpgan.py": 0.001008749008178711,
            "load scripts/postprocessing_split_oversized.py": 0.0005025863647460938,
            "load scripts/postprocessing_upscale.py": 0.0004999637603759766,
            "load scripts/processing_autosized_crop.py": 0.0,
            "load scripts/prompt_matrix.py": 0.0004999637603759766,
            "load scripts/prompts_from_file.py": 0.0005002021789550781,
            "load scripts/sd_upscale.py": 0.000499725341796875,
            "load scripts/xyz_grid.py": 0.0015001296997070312,
            "load scripts/ldsr_model.py": 0.2950904369354248,
            "load scripts/lora_script.py": 0.0691525936126709,
            "load scripts/scunet_model.py": 0.013014078140258789,
            "load scripts/swinir_model.py": 0.010008811950683594,
            "load scripts/hotkey_config.py": 0.0005002021789550781,
            "load scripts/extra_options_section.py": 0.000499725341796875,
            "load scripts/legacy_preprocessors.py": 0.007002353668212891,
            "load scripts/preprocessor_inpaint.py": 0.01101827621459961,
            "load scripts/preprocessor_marigold.py": 0.0778038501739502,
            "load scripts/preprocessor_normalbae.py": 0.004111289978027344,
            "load scripts/preprocessor_recolor.py": 0.0,
            "load scripts/forge_reference.py": 0.0009300708770751953,
            "load scripts/preprocessor_revision.py": 0.0,
            "load scripts/preprocessor_tile.py": 0.0005037784576416016,
            "load scripts/forge_controllllite.py": 0.005502223968505859,
            "load scripts/controlnet.py": 0.236037015914917,
            "load scripts/xyz_grid_support.py": 0.0010004043579101562,
            "load scripts/sd_forge_controlnet_example.py": 0.0005006790161132812,
            "load scripts/forge_freeu.py": 0.001499176025390625,
            "load scripts/forge_hypertile.py": 0.002518177032470703,
            "load scripts/forge_ipadapter.py": 0.004499912261962891,
            "load scripts/kohya_hrfix.py": 0.0019996166229248047,
            "load scripts/forge_photomaker.py": 0.0009996891021728516,
            "load scripts/forge_sag.py": 0.0020170211791992188,
            "load scripts/forge_stylealign.py": 0.0005030632019042969,
            "load scripts/forge_svd.py": 0.017520427703857422,
            "load scripts/forge_z123.py": 0.013013839721679688,
            "load scripts/soft_inpainting.py": 0.0005002021789550781,
            "load scripts/lora.py": 0.0005006790161132812,
            "load scripts/trt.py": 0.17665934562683105,
            "load scripts/!adetailer.py": 0.7216448783874512,
            "load scripts/refiner.py": 0.0009989738464355469,
            "load scripts/seed.py": 0.0005006790161132812,
            "load scripts": 1.692070484161377,
            "load upscalers": 0.0034999847412109375,
            "refresh VAE": 0.0014994144439697266,
            "refresh textual inversion templates": 0.0,
            "scripts list_optimizers": 0.0005002021789550781,
            "scripts list_unets": 0.0,
            "reload hypernetworks": 0.0015039443969726562,
            "initialize extra networks": 0.0065155029296875,
            "scripts before_ui_callback": 0.000499725341796875,
            "create ui": 0.7580077648162842,
            "gradio launch": 0.10605955123901367,
            "add APIs": 0.004004001617431641,
            "app_started_callback/lora_script.py": 0.0004990100860595703,
            "app_started_callback/!adetailer.py": 0.0,
            "app_started_callback": 0.0004990100860595703
        }
    },
    "Packages": [
        "absl-py==2.1.0",
        "accelerate==0.21.0",
        "addict==2.4.0",
        "aenum==3.1.15",
        "aiofiles==23.2.1",
        "aiohttp==3.9.3",
        "aiosignal==1.3.1",
        "albumentations==1.3.1",
        "altair==5.2.0",
        "antlr4-python3-runtime==4.9.3",
        "anyio==3.7.1",
        "async-timeout==4.0.3",
        "attrs==23.2.0",
        "basicsr==1.4.2",
        "blendmodes==2022",
        "certifi==2024.2.2",
        "cffi==1.16.0",
        "chardet==5.2.0",
        "charset-normalizer==3.3.2",
        "clean-fid==0.1.35",
        "click==8.1.7",
        "clip==1.0",
        "colorama==0.4.6",
        "coloredlogs==15.0.1",
        "colorlog==6.8.2",
        "contourpy==1.2.0",
        "cssselect2==0.7.0",
        "cycler==0.12.1",
        "cython==3.0.8",
        "datasets==2.16.1",
        "deprecation==2.1.0",
        "depth-anything==2024.1.22.0",
        "diffusers==0.25.0",
        "dill==0.3.7",
        "easydict==1.11",
        "einops==0.4.1",
        "embreex==2.17.7.post4",
        "exceptiongroup==1.2.0",
        "facexlib==0.3.0",
        "fastapi==0.94.0",
        "ffmpy==0.3.1",
        "filelock==3.13.1",
        "filterpy==1.4.5",
        "flatbuffers==23.5.26",
        "fonttools==4.47.2",
        "frozenlist==1.4.1",
        "fsspec==2023.10.0",
        "ftfy==6.1.3",
        "future==0.18.3",
        "fvcore==0.1.5.post20221221",
        "gitdb==4.0.11",
        "gitpython==3.1.32",
        "gradio-client==0.5.0",
        "gradio==3.41.2",
        "grpcio==1.60.1",
        "h11==0.12.0",
        "handrefinerportable==2024.1.18.0",
        "httpcore==0.15.0",
        "httpx==0.24.1",
        "huggingface-hub==0.20.3",
        "humanfriendly==10.0",
        "idna==3.6",
        "imageio==2.33.1",
        "importlib-metadata==7.0.1",
        "importlib-resources==6.1.1",
        "inflection==0.5.1",
        "insightface==0.7.3",
        "iopath==0.1.9",
        "jinja2==3.1.3",
        "joblib==1.3.2",
        "jsonmerge==1.8.0",
        "jsonschema-specifications==2023.12.1",
        "jsonschema==4.21.1",
        "kiwisolver==1.4.5",
        "kornia==0.6.7",
        "lark==1.1.2",
        "lazy-loader==0.3",
        "lightning-utilities==0.10.1",
        "llvmlite==0.42.0",
        "lmdb==1.4.1",
        "lxml==5.1.0",
        "mapbox-earcut==1.0.1",
        "markdown-it-py==3.0.0",
        "markdown==3.5.2",
        "markupsafe==2.1.5",
        "matplotlib==3.8.2",
        "mdurl==0.1.2",
        "mediapipe==0.10.9",
        "mpmath==1.3.0",
        "multidict==6.0.5",
        "multiprocess==0.70.15",
        "networkx==3.2.1",
        "numba==0.59.0",
        "numpy==1.26.2",
        "nvidia-cublas-cu11==11.11.3.6",
        "nvidia-cuda-nvrtc-cu11==11.8.89",
        "nvidia-cuda-runtime-cu11==11.8.89",
        "omegaconf==2.2.3",
        "onnx-graphsurgeon==0.3.27",
        "onnx==1.15.0",
        "onnxruntime==1.17.0",
        "open-clip-torch==2.20.0",
        "opencv-contrib-python==4.9.0.80",
        "opencv-python-headless==4.9.0.80",
        "opencv-python==4.9.0.80",
        "optimum==1.16.2",
        "orjson==3.9.13",
        "packaging==23.2",
        "pandas==2.2.0",
        "piexif==1.1.3",
        "pillow==9.5.0",
        "pip==24.0",
        "platformdirs==4.2.0",
        "polygraphy==0.49.0",
        "portalocker==2.8.2",
        "prettytable==3.9.0",
        "protobuf==3.20.2",
        "psutil==5.9.5",
        "py-cpuinfo==9.0.0",
        "pyarrow-hotfix==0.6",
        "pyarrow==15.0.0",
        "pycollada==0.8",
        "pycparser==2.21",
        "pydantic==1.10.14",
        "pydub==0.25.1",
        "pygments==2.17.2",
        "pyparsing==3.1.1",
        "pyreadline3==3.4.1",
        "python-dateutil==2.8.2",
        "python-multipart==0.0.7",
        "pytorch-lightning==1.9.4",
        "pytz==2024.1",
        "pywavelets==1.5.0",
        "pywin32==306",
        "pyyaml==6.0.1",
        "qudida==0.0.4",
        "referencing==0.33.0",
        "regex==2023.12.25",
        "reportlab==4.0.9",
        "requests==2.31.0",
        "resize-right==0.0.2",
        "rich==13.7.0",
        "rpds-py==0.17.1",
        "rtree==1.2.0",
        "safetensors==0.4.2",
        "scikit-image==0.21.0",
        "scikit-learn==1.4.0",
        "scipy==1.12.0",
        "seaborn==0.13.2",
        "semantic-version==2.10.0",
        "sentencepiece==0.1.99",
        "setuptools==65.5.0",
        "shapely==2.0.2",
        "six==1.16.0",
        "smmap==5.0.1",
        "sniffio==1.3.0",
        "sounddevice==0.4.6",
        "spandrel==0.1.6",
        "starlette==0.26.1",
        "svg.path==6.3",
        "svglib==1.5.1",
        "sympy==1.12",
        "tabulate==0.9.0",
        "tb-nightly==2.16.0a20240205",
        "tensorboard-data-server==0.7.2",
        "tensorrt-bindings==9.0.1.post11.dev4",
        "tensorrt-libs==9.0.1.post11.dev4",
        "tensorrt==9.0.1.post11.dev4",
        "termcolor==2.4.0",
        "tf-keras-nightly==2.16.0.dev2024020510",
        "thop==0.1.1.post2209072238",
        "threadpoolctl==3.2.0",
        "tifffile==2024.1.30",
        "timm==0.9.12",
        "tinycss2==1.2.1",
        "tokenizers==0.13.3",
        "tomesd==0.1.3",
        "tomli==2.0.1",
        "toolz==0.12.1",
        "torch==2.1.2+cu121",
        "torchdiffeq==0.2.3",
        "torchmetrics==1.3.0.post0",
        "torchsde==0.2.6",
        "torchvision==0.16.2+cu121",
        "tqdm==4.66.1",
        "trampoline==0.1.2",
        "transformers==4.30.2",
        "trimesh==4.1.3",
        "typing-extensions==4.9.0",
        "tzdata==2023.4",
        "ultralytics==8.1.9",
        "urllib3==2.2.0",
        "uvicorn==0.27.0.post1",
        "vhacdx==0.0.5",
        "wcwidth==0.2.13",
        "webencodings==0.5.1",
        "websockets==11.0.3",
        "werkzeug==3.0.1",
        "xformers==0.0.23.post1",
        "xxhash==3.4.1",
        "yacs==0.1.8",
        "yapf==0.40.2",
        "yarl==1.9.4",
        "zipp==3.17.0"
    ]
}

Console logs

N/A

Additional information

No response

Vigilence commented 9 months ago

This extensions hasn't worked for a while unless you use the dev branch, and even then it stopped working a while ago. Not sure what nvidia is doing but this is shameful.

CaptainVarghoss commented 9 months ago

I'm guessing you haven't kept up with it. It was updated a little over a month ago and it works fine in a1111. It requires that you have your files in specific places (not sub-folders) and there are a couple issues with installing it for some people, but I use it constantly in A1111 and with double the it/s it's still ~40% faster than Forge and at this point is the only thing keeping me from switching over completely.

The extension also seems to work just fine in Forge as far as I can tell and Forge even auto-loads the unet, but something in the way the unet stuff was changed in forge makes it not actually get used for inference.

Maybe it's because the main branch doesn't work with controlnet currently and that's built into Forge.

Vigilence commented 9 months ago

Man you’re lucky then. Many of use can’t get it to work. Installing it results in the DLL complaint and it doesn’t even show up in the ui anymore.

I would love it if forge could do what invokeai does for diffusion models. It lets you convert the model to a diffusion model with one click, with no messing with any settings etc. And it works super awesome.

CaptainVarghoss commented 9 months ago

It's not luck, there's an entire thread in the issues on the TRT extension github that shows how to fix those problems.

The documentation is garbage and installation is not as one-click as it could be, but it works fine.

Vigilence commented 9 months ago

Many people in their help section have the same issues. I would say if they added what sdnext and comfyui use for the backend we would benefit better.

I got it working once, but it was so buggy it stopped working.

https://github.com/chengzeyi/stable-fast would be welcomed as well as it works with controlnet.

contentis commented 9 months ago

I just started looking at Forge, but it seems to me that there is no unified way of getting the TRT extension working on both Auto1111 and Forge, as they use different APIs to overwrite the UNet. If there is anyone with more experience within the Forges code base, I'd be happy to know more about the best practices for implementing a different backend and make an POC.

wobba commented 9 months ago

TensorRT works fine for me for generated files I had in automatic1111, but it fails when I try generate new TensorRT files.

D:\stable-diffusion-webui-forge\ldm_patched\ldm\modules\diffusionmodules\openaimodel.py:857: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert y.shape[0] == x.shape[0]
D:\stable-diffusion-webui-forge\ldm_patched\ldm\modules\diffusionmodules\openaimodel.py:137: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert x.shape[1] == self.channels
ERROR:root:Exporting to ONNX failed. module 'torch.nn.functional' has no attribute 'scaled_dot_product_attention'
Building TensorRT engine... This can take a while, please check the progress in the terminal.
Building TensorRT engine for D:\stable-diffusion-webui-forge\models\Unet-onnx\mymodel.onnx: D:\stable-diffusion-webui-forge\models\Unet-trt\mymodel_78890989_cc86_sample=1x4x96x96+2x4x128x128+4x4x256x256-timesteps=1+2+4-encoder_hidden_states=1x77x2048+2x77x2048+4x231x2048-y=1x2816+2x2816+4x2816.trt
Could not open file D:\stable-diffusion-webui-forge\models\Unet-onnx\mymodel.onnx
ceoper commented 8 months ago

I somehow made a workaround of this error and can successfully "Export Default Engine" (In other webui and repo they call this "create a profile in TensorRT"). But I'm a noob programmer so chances are I don't actually know what I'm doing. Someone might want to check what this actually does before they try the same. In line:123 of webui_forge_cu121_torch21\webui\extensions\Stable-Diffusion-WebUI-TensorRT\exporter.py: replace the swap_sdpa function into the following:

def swap_sdpa(func):
            def wrapper(*args, **kwargs):
                swap_sdpa = hasattr(F, "scaled_dot_product_attention")
                print('#### Exporter.swap_sdpa  hasattr(scaled_dot_product_attention) :: ' + str(hasattr(F, "scaled_dot_product_attention")))
                old_sdpa = (
                    getattr(F, "scaled_dot_product_attention", None) if swap_sdpa else None
                )
                #if swap_sdpa:
                #    delattr(F, "scaled_dot_product_attention")
                #ret = func(*args, **kwargs)
                if swap_sdpa and old_sdpa:
                    delattr(F, "scaled_dot_product_attention")
                    setattr(F, "scaled_dot_product_attention", old_sdpa)
                    ret = func(*args, **kwargs)
                return ret

            return wrapper

My guess is the ret = func(*args, **kwargs) part happens before setattr(... old_sdpa) but after delattr(...), so the scaled_dot_product_attention method is gone from the object....?

TensorRT works fine for me for generated files I had in automatic1111, but it fails when I try generate new TensorRT files.

D:\stable-diffusion-webui-forge\ldm_patched\ldm\modules\diffusionmodules\openaimodel.py:857: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert y.shape[0] == x.shape[0]
D:\stable-diffusion-webui-forge\ldm_patched\ldm\modules\diffusionmodules\openaimodel.py:137: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert x.shape[1] == self.channels
ERROR:root:Exporting to ONNX failed. module 'torch.nn.functional' has no attribute 'scaled_dot_product_attention'
Building TensorRT engine... This can take a while, please check the progress in the terminal.
Building TensorRT engine for D:\stable-diffusion-webui-forge\models\Unet-onnx\mymodel.onnx: D:\stable-diffusion-webui-forge\models\Unet-trt\mymodel_78890989_cc86_sample=1x4x96x96+2x4x128x128+4x4x256x256-timesteps=1+2+4-encoder_hidden_states=1x77x2048+2x77x2048+4x231x2048-y=1x2816+2x2816+4x2816.trt
Could not open file D:\stable-diffusion-webui-forge\models\Unet-onnx\mymodel.onnx
contentis commented 8 months ago

@ceoper The swap_sdpa was a WAR for an issue in torch < 2.0 when exporting ONNX models. As forge is using a newer version it should be sufficient to simply comment out the @swap_sdpa decorator.

wobba commented 4 months ago

@ceoper's fix together with --always-gpu solved it for me.