from ultralytics import YOLO
import cv2
import os
current_dir = os.path.dirname(os.path.abspath(__file__))
det_model = YOLO(os.path.join(current_dir, "yolov8n-pose.pt"))
cls_model = YOLO(os.path.join(current_dir, "yolov8n-cls.pt"))
# det_model = YOLO('yolov8n-pose.pt')
# cls_model = YOLO('yolov8n-cls.pt')
class_name = ['normal', 'raise_hand', 'speak', 'stand', 'turn_head', 'use_phone']

def infer(image):
    det_results = det_model(image, conf=0.5, iou=0.25)
    for r in det_results:
        if len(r) == 0:
            return 'leave'
        box = r.boxes.xyxy
        if len(box) == 1:
            crop_image = image[int(box[0][1]):int(box[0][3]), int(box[0][0]):int(box[0][2])]
            cls_results = cls_model(crop_image)
            return class_name[cls_results[0].probs.top1]
        else:
            return 'many_humans'

# image_path = os.path.join(current_dir, "frames/video1_1.png")
# result = infer(image_path)
# print(result)

cap = cv2.VideoCapture(0)
if cap.isOpened():
    while True:
        ret, frame = cap.read() 
        if ret:
            result = infer(frame)
            print(result)
            # 设置要添加的文本内容和位置
            org = (50, 50)  # 文本起始位置
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 1
            color = (255, 0, 0)  # 文本颜色，BGR格式
            thickness = 2  # 文本字体粗细

            # 添加文本到图像
            cv2.putText(frame, result, org, font, font_scale, color, thickness, cv2.LINE_AA)
            cv2.imshow('test.png', frame)
            cv2.waitKey(1000)