HolyWu / vs-realesrgan

Real-ESRGAN function for VapourSynth
BSD 3-Clause "New" or "Revised" License
76 stars 9 forks source link

TRT issue,.. #21

Closed Selur closed 1 year ago

Selur commented 1 year ago

Using:

# Imports
import vapoursynth as vs
import os
import sys
# getting Vapoursynth core
import ctypes
# Loading Support Files
Dllref = ctypes.windll.LoadLibrary("i:/Hybrid/64bit/vsfilters/Support/libfftw3f-3.dll")
core = vs.core
# Import scripts folder
scriptPath = 'i:/Hybrid/64bit/vsscripts'
sys.path.insert(0, os.path.abspath(scriptPath))
import site
# Adding torch dependencies to PATH
path = site.getsitepackages()[0]+'/torch_dependencies/bin/'
ctypes.windll.kernel32.SetDllDirectoryW(path)
path = path.replace('\\', '/')
os.environ["PATH"] = path + os.pathsep + os.environ["PATH"]
# Loading Plugins
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/GrainFilter/AdaptiveGrain/adaptivegrain_rs.dll")
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/GrainFilter/AddGrain/AddGrain.dll")
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/Support/fmtconv.dll")
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/GrainFilter/RemoveGrain/RemoveGrainVS.dll")
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/SharpenFilter/CAS/CAS.dll")
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/DenoiseFilter/DFTTest/DFTTest.dll")
core.std.LoadPlugin(path="i:/Hybrid/64bit/vsfilters/SourceFilter/DGDecNV/DGDecodeNV.dll")
# Import scripts
import havsfunc
# source: 'C:\Users\Selur\Desktop\VideoTest_9s.mkv'
# current color space: YUV420P8, bit depth: 8, resolution: 720x306, fps: 23.976, color matrix: 709, yuv luminance scale: limited, scanorder: progressive
# Loading C:\Users\Selur\Desktop\VideoTest_9s.mkv using DGSource
clip = core.dgdecodenv.DGSource("J:/tmp/2023-01-30@19_43_10_8210.dgi")# 23.976 fps, scanorder: progressive
# Setting detected color matrix (709).
clip = core.std.SetFrameProps(clip, _Matrix=1)
# Setting color transfer info, when it is not set
clip = clip if not core.text.FrameProps(clip,'_Transfer') else core.std.SetFrameProps(clip, _Transfer=1)
# Setting color primaries info, when it is not set
clip = clip if not core.text.FrameProps(clip,'_Primaries') else core.std.SetFrameProps(clip, _Primaries=1)
# Setting color range to TV (limited) range.
clip = core.std.SetFrameProp(clip=clip, prop="_ColorRange", intval=1)
# making sure frame rate is set to 23.976
clip = core.std.AssumeFPS(clip=clip, fpsnum=24000, fpsden=1001)
clip = core.std.SetFrameProp(clip=clip, prop="_FieldBased", intval=0)
clip = havsfunc.LSFmod(input=clip)
# Using FastLineDarkenMOD for line darkening
clip = havsfunc.FastLineDarkenMOD(c=clip)
from vsrealesrgan import RealESRGAN
clip = core.std.AddBorders(clip=clip, left=0, right=0, top=0, bottom=2) # add borders to archive mod 4 (VsRealESRGAN) - 720x308
# adjusting color space from YUV420P8 to RGBH for VsRealESRGAN
clip = core.resize.Bicubic(clip=clip, format=vs.RGBH, matrix_in_s="709", range_s="limited")
# resizing using RealESRGAN
clip = RealESRGAN(clip=clip, model=5, device_index=0, trt=True, trt_cache_path=r"J:\tmp") # 2880x1232
# resizing 2880x1232 to 720x306
clip = core.std.CropRel(clip=clip, left=0, right=0, top=0, bottom=8) # removing borders (VsRealESRGAN) -  2880x1224
# adjusting resizing
clip = core.resize.Bicubic(clip=clip, format=vs.RGBS, range_s="limited")
clip = core.fmtc.resample(clip=clip, w=720, h=306, kernel="lanczos", interlaced=False, interlacedd=False)
# contrast sharpening using CAS
clip = core.cas.CAS(clip=clip)
# adjusting color space from RGBS to YUV444P16 for vsAddGrain
clip = core.resize.Bicubic(clip=clip, format=vs.YUV444P16, matrix_s="709", range_s="limited", dither_type="error_diffusion")
# adding Grain using AddGrain and adaptive luma mask
clip = core.std.PlaneStats(clipa=clip)
clipmask = core.adg.Mask(clip=clip, luma_scaling=12)
clipgrained = core.grain.Add(clip=clip, var=8.00)
clip = core.std.MaskedMerge(clip, clipgrained, clipmask)
# adjusting output color from: YUV444P16 to YUV420P10 for x265Model
clip = core.resize.Bicubic(clip=clip, dither_type="error_diffusion", format=vs.YUV420P10, range_s="limited")
# set output frame rate to 23.976fps
clip = core.std.AssumeFPS(clip=clip, fpsnum=24000, fpsden=1001)
# Output
clip.set_output()`

And calling: i:\Hybrid\64bit\Vapoursynth\VSPipe.exe "J:\tmp\encodingTempSynthSkript_2023-01-30@19_43_10_8210_0.vpy" - -c y4m | i:\Hybrid\64bit\x265.exe --input - --output-depth 10 --y4m --profile main10 --limit-modes --no-early-skip --no-open-gop --opt-ref-list-length-pps --crf 18.00 --opt-qp-pps --cbqpoffs -2 --crqpoffs -2 --limit-refs 0 --ssim-rd --psy-rd 2.50 --rdoq-level 2 --psy-rdoq 10.00 --aq-mode 0 --deblock=-1:-1 --limit-sao --no-repeat-headers --range limited --colormatrix bt709 --output "J:\tmp\2023-01-30@19_43_10_8210_03.265"

I get:

Warning: i:\Hybrid\64bit\Vapoursynth\Lib\site-packages\torch_tensorrt\fx\tracer\acc_tracer\acc_tracer.py:584: UserWarning: acc_tracer does not support currently support models for training. Calling eval on model before tracing.
  warnings.warn(

Information: == Log pass <function fuse_permute_matmul at 0x0000029B26F88700> before/after graph to C:\Users\Selur\AppData\Local\Temp\tmpjsmylxpt, before/after are the same = True
Information: == Log pass <function fuse_permute_linear at 0x0000029B26F884C0> before/after graph to C:\Users\Selur\AppData\Local\Temp\tmp5zwqs8fw, before/after are the same = True
Information: Now lowering submodule _run_on_acc_0
Information: split_name=_run_on_acc_0, input_specs=[InputTensorSpec(shape=torch.Size([1, 3, 308, 720]), dtype=torch.float16, device=device(type='cuda', index=0), shape_ranges=[], has_batch_dim=True)]
Information: Timing cache is used!
x265 [error]: unable to open input file <->
Information: TRT INetwork construction elapsed time: 0:00:00.387301
Information: Build TRT engine elapsed time: 0:01:01.767990
Information: Lowering submodule _run_on_acc_0 elapsed time 0:01:05.018931
Information: Now lowering submodule _run_on_acc_2
Information: split_name=_run_on_acc_2, input_specs=[InputTensorSpec(shape=torch.Size([1, 3, 1232, 2880]), dtype=torch.float16, device=device(type='cuda', index=0), shape_ranges=[], has_batch_dim=True), InputTensorSpec(shape=torch.Size([1, 3, 1232, 2880]), dtype=torch.float16, device=device(type='cuda', index=0), shape_ranges=[], has_batch_dim=True)]
Information: Timing cache is used!
Information: TRT INetwork construction elapsed time: 0:00:00
Information: Build TRT engine elapsed time: 0:00:00.224972
Information: Lowering submodule _run_on_acc_2 elapsed time 0:00:00.229702
Error: fwrite() call failed when writing frame: 0, plane: 0, errno: 22
Output 33 frames in 0.80 seconds (41.09 fps)

With trt=False the script works fine. Also, I set trt_cache_path=r"J:\tmp" and the error complains about something in C:\Users\Selur\AppData\Local\Temp\tmpjsmylxpt :/

Funny thing is, that using vsViewer/vsedit the preview works fine.

Any idea what could be the issue?

'torch_dependencies/bin/'-folder contains the files from CUDA-11.7_cuDNN-8.6.0_TensorRT-8.5.2.2_win64.7z I'm using NVIIDA Studio driver 528.24 atm. on Windows 11 pro 64bit

Selur commented 1 year ago

Adding: os.environ["CUDA_MODULE_LOADING"] = "LAZY" to the script seems to fix the issue for me.