Yatish54321 / HumanDetection-PoseEstimation

0 stars 0 forks source link

Human Recognition and Pose Estimation #1

Open Yatish54321 opened 3 months ago

Yatish54321 commented 3 months ago

With the Help of Machine Learning, Deep Learning and Image Processing. We Build three software, which help to detect objects, and their distance from the Camera and Human skeleton. This technology can be helpful to various field even with AI interaction with Humans and Virtual Reality Robotics and many more fields.

Yatish54321 commented 3 months ago

requirements.txt

Yatish54321 commented 3 months ago

import cv2 import mediapipe as mp import time import ctypes

mpDraw = mp.solutions.drawing_utils mpPose = mp.solutions.pose pose = mpPose.Pose()

cap = cv2.VideoCapture('Videos/7.mp4')

Get screen resolution

user32 = ctypes.windll.user32 screen_width, screen_height = user32.GetSystemMetrics(0), user32.GetSystemMetrics(1)

Get original video size

original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

Calculate the aspect ratio

aspect_ratio = original_width / original_height

print(screen_width, screen_height)

Calculate the new video size to fit the screen

if aspect_ratio > 1: video_width = min(original_width, screen_width) video_height = int(video_width / aspect_ratio) else: video_height = min(original_height, screen_height) video_width = int(video_height * aspect_ratio)

cap.set(cv2.CAP_PROP_FRAME_WIDTH, video_width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, video_height)

pTime = 0 while True: success, img = cap.read() if not success: break

imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = pose.process(imgRGB)

if results.pose_landmarks:
    mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
    for id, lm in enumerate(results.pose_landmarks.landmark):
        h, w, c = img.shape
        cx, cy = int(lm.x * w), int(lm.y * h)
        cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)

cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime

cv2.putText(img, f'FPS: {int(fps)}', (100, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 3)

cv2.imshow("Image", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
    break

cap.release() cv2.destroyAllWindows()

Yatish54321 commented 3 months ago

from ultralytics import YOLO import cv2 import cvzone import math

Function to calculate distance

def calculate_distance(known_width, focal_length, per_width): return (known_width * focal_length) / per_width

Function to get focal length based on object width and known distance

def get_focal_length(known_width, known_distance, per_width): return (per_width * known_distance) / known_width

cap = cv2.VideoCapture(0) cap.set(3, 2500) cap.set(4, 1200)

model = YOLO("../Yolo_Weights/yolov8n.pt") classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ]

Set known width and known distance (calibrate based on your camera and setup)

known_width = 20.0 # Example: width of a standard object in inches known_distance = 24.0 # Example: distance from the camera to the object in inches focal_length = -1 # Initialize focal length, to be calculated during the first iteration

while True: success, img = cap.read() results = model(img, stream=True)

for r in results:
    boxes = r.boxes
    for box in boxes:
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

        w, h = x2 - x1, y2 - y1
        cvzone.cornerRect(img, (x1, y1, w, h))

        conf = math.ceil((box.conf[0] * 100)) / 100
        cls = int(box.cls[0])

        cvzone.putTextRect(img, f'{classNames[cls]} {conf}', (max(0, x1), max(35, y1)), scale=0.7, thickness=1)

        # Calculate distance based on known width and focal length
        if focal_length == -1:
            # Calculate focal length during the first iteration
            focal_length = get_focal_length(known_width, known_distance, w)

        distance = calculate_distance(known_width, focal_length, w)
        cv2.putText(img, f'Distance: {distance:.2f} inches', (x1, y1 - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

cv2.imshow("Image", img)
cv2.waitKey(1)