Inference using python and pre-trained tflite

ubergeekNZ commented 10 months ago

Hi,

I was wondering if I am using the tflite model correctly and visualizing using the flow_vis function. I don't seem to be able to get similar images to the paper.

Paul

```python
import tensorflow as tf
import numpy as np
import cv2

def make_colorwheel():
    """
    Generates a color wheel for optical flow visualization as presented in:
        Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007)
        URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf
    Code follows the original C++ source code of Daniel Scharstein.
    Code follows the the Matlab source code of Deqing Sun.
    Returns:
        np.ndarray: Color wheel
    """

    RY = 15
    YG = 6
    GC = 4
    CB = 11
    BM = 13
    MR = 6

    ncols = RY + YG + GC + CB + BM + MR
    colorwheel = np.zeros((ncols, 3))
    col = 0

    # RY
    colorwheel[0:RY, 0] = 255
    colorwheel[0:RY, 1] = np.floor(255 * np.arange(0, RY) / RY)
    col = col + RY
    # YG
    colorwheel[col : col + YG, 0] = 255 - np.floor(255 * np.arange(0, YG) / YG)
    colorwheel[col : col + YG, 1] = 255
    col = col + YG
    # GC
    colorwheel[col : col + GC, 1] = 255
    colorwheel[col : col + GC, 2] = np.floor(255 * np.arange(0, GC) / GC)
    col = col + GC
    # CB
    colorwheel[col : col + CB, 1] = 255 - np.floor(255 * np.arange(CB) / CB)
    colorwheel[col : col + CB, 2] = 255
    col = col + CB
    # BM
    colorwheel[col : col + BM, 2] = 255
    colorwheel[col : col + BM, 0] = np.floor(255 * np.arange(0, BM) / BM)
    col = col + BM
    # MR
    colorwheel[col : col + MR, 2] = 255 - np.floor(255 * np.arange(MR) / MR)
    colorwheel[col : col + MR, 0] = 255
    return colorwheel

def flow_uv_to_colors(u, v, convert_to_bgr=False):
    """
    Applies the flow color wheel to (possibly clipped) flow components u and v.
    According to the C++ source code of Daniel Scharstein
    According to the Matlab source code of Deqing Sun
    Args:
        u (np.ndarray): Input horizontal flow of shape [H,W]
        v (np.ndarray): Input vertical flow of shape [H,W]
        convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
    Returns:
        np.ndarray: Flow visualization image of shape [H,W,3]
    """
    flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8)
    colorwheel = make_colorwheel()  # shape [55x3]
    ncols = colorwheel.shape[0]
    rad = np.sqrt(np.square(u) + np.square(v))
    a = np.arctan2(-v, -u) / np.pi
    fk = (a + 1) / 2 * (ncols - 1)
    k0 = np.floor(fk).astype(np.int32)
    k1 = k0 + 1
    k1[k1 == ncols] = 0
    f = fk - k0
    for i in range(colorwheel.shape[1]):
        tmp = colorwheel[:, i]
        col0 = tmp[k0] / 255.0
        col1 = tmp[k1] / 255.0
        col = (1 - f) * col0 + f * col1
        idx = rad <= 1
        col[idx] = 1 - rad[idx] * (1 - col[idx])
        col[~idx] = col[~idx] * 0.75  # out of range
        # Note the 2-i => BGR instead of RGB
        ch_idx = 2 - i if convert_to_bgr else i
        flow_image[:, :, ch_idx] = np.floor(255 * col)
    return flow_image

def flow_to_color(flow_uv, clip_flow=None, convert_to_bgr=False, flow_norm=None):
    """
    Expects a two dimensional flow image of shape.
    Args:
        flow_uv (np.ndarray): Flow UV image of shape [H,W,2]
        clip_flow (float, optional): Clip maximum of flow values. Defaults to None.
        convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
        flow_norm (float, optional): Use the value to normalize the flows. If None, the max flow value is used
    Returns:
        np.ndarray: Flow visualization image of shape [H,W,3]
    """
    assert flow_uv.ndim == 3, "input flow must have three dimensions"
    assert flow_uv.shape[2] == 2, "input flow must have shape [H,W,2]"
    if clip_flow is not None:
        flow_uv = np.clip(flow_uv, 0, clip_flow)
    u = flow_uv[:, :, 0]
    v = flow_uv[:, :, 1]

    # --> normalization modification by Kedar Tatwawadi
    if flow_norm is not None:
        assert flow_norm > 0
    else:
        rad = np.sqrt(np.square(u) + np.square(v))
        rad_max = np.max(rad)
        epsilon = 1e-5
        flow_norm = rad_max + epsilon
    u = u / flow_norm
    v = v / flow_norm
    # <--
    return flow_uv_to_colors(u, v, convert_to_bgr)

video_file = "data/test.mp4"

# Load TFLite model and allocate tensors
tflite_model_path = 'nanoflownet-cnns/pretrained_models/nanoflownet/nanoflownet_unquantized.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# Get input and output tensors details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Prepare input data
# Note: Replace this with actual preprocessed frame data from your video
input_shape = input_details[0]['shape']
input_data = np.random.random_sample(input_shape).astype(input_details[0]['dtype'])

# Set the tensor to point to the input data to be inferred
interpreter.set_tensor(input_details[0]['index'], input_data)

# Run inference
interpreter.invoke()

# Extract the output and postprocess if necessary
output_data = interpreter.get_tensor(output_details[0]['index'])

# Check and visualize the outputs
print("Output data shape:", output_data.shape)
print("Output data:", output_data)

# Open the video file
cap = cv2.VideoCapture(video_file)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
out = cv2.VideoWriter('output_flow.mp4', fourcc, 20.0, (40, 28))  # (w, h) = (40, 28)

count = 0
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    input_shape = input_details[0]['shape']

    # Convert frame to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Resize and preprocess the frame
    preprocessed_frame = cv2.resize(gray_frame, (input_shape[2], input_shape[1]))  # Resize
    preprocessed_frame = np.expand_dims(preprocessed_frame, axis=2)  # Add channel dimension
    preprocessed_frame = np.expand_dims(preprocessed_frame, axis=0)  # Add batch dimension
    preprocessed_frame = preprocessed_frame.astype(input_details[0]['dtype'])  # Type casting

    # Check shapes
    print("Expected input shape:", input_shape)
    print("Actual input shape:", preprocessed_frame.shape)

    # Run inference
    interpreter.set_tensor(input_details[0]['index'], preprocessed_frame)
    interpreter.invoke()

    output_data = interpreter.get_tensor(output_details[0]['index'])
    output_image = np.squeeze(output_data)  

    # Check shape
    print("Output image shape:", output_image.shape)

    # Visualizing optical flow using provided functions
    # flow_image = flow_to_color(output_image, clip_flow=None, convert_to_bgr=True)
    # Extract u (dx) and v (dy) components of the flow
    u = output_image[:, :, 0]  
    v = output_image[:, :, 1]  

    # Visualizing optical flow using flow_uv_to_colors
    flow_image = flow_uv_to_colors(u, v, convert_to_bgr=True)
    cv2.imwrite(f'images/output_flow_{count}.jpg', flow_image)

    # Blend the original frame and the flow visualization
    # Ensure the original frame is the same size as the flow visualization
    original_resized = cv2.resize(frame, (40, 28))
    alpha = 0.5
    beta = 1 - alpha
    gamma = 0
    blended = cv2.addWeighted(original_resized, alpha, flow_image, beta, gamma)

    # Save the blended frame to the video
    out.write(blended)

    count += 1

# Release everything
cap.release()
out.release()
cv2.destroyAllWindows()
```

gemenerik commented 10 months ago

It looks like you are feeding a single image into the CNN at a time. The CNN is designed to calculate the optical flow between a pair of frames, and must be fed both frames each time. Unlike the models used during training, the TensorFlow Lite model that you are using here actually has two separate inputs for the frames. You can easily visualize this with a tool like netron.

boomer319 commented 7 months ago

Based on @ubergeekNZ 's code provided here I made a google colab notebook that uses your webcam to test out nanoflownet.

gemenerik / nanoflownet-cnns

Inference using python and pre-trained tflite #1