pngInfo/code/inference copy.py


								from ultralytics import YOLO

								import numpy as np

								import cv2

								from paddleocr import PaddleOCR

								import re

								import os

								current_dir = os.path.dirname(os.path.abspath(__file__))

								img_path = os.path.join(current_dir, "test1.jpg")

								img = cv2.imread(img_path)

								model = YOLO(os.path.join(current_dir, "best.pt"))

								results = model.predict(img_path, device='cpu')

								ocr = PaddleOCR(

								    use_gpu=False,

								    use_angle_cls=True,

								    det_model_dir=os.path.join(current_dir, "ocr/simple/ch_PP-OCRv4_det_infer"),

								    rec_model_dir=os.path.join(current_dir, "ocr/simple/ch_PP-OCRv4_rec_infer"),)


								for r in results:

								    boxes = r.boxes

								    clses = np.array(boxes.cls).astype(int)

								    points = np.array(boxes.xyxy).astype(int)

								    target_0 = []

								    target_1 = []

								    target_2 = []

								    target_3 = []

								    for cls, point in zip(clses, points):

								        if cls == 0:

								            target_0.append(point)

								        elif cls == 1:

								            target_1.append(point)

								        elif cls == 2:

								            target_2.append(point)

								        elif cls == 3:

								            target_3.append(point)


								# 初始化结果字典

								results_summary = {

								    'target_0': [],

								    'target_1': [],

								    'target_2': [],

								    'target_3': []

								}


								# 检查类别数量

								if (len(target_0) == 2 and len(target_1) == 1 and (len(target_2) == 1 or len(target_2) == 2) and len(target_3) == 2):


								    # 处理类别0

								    target_0 = sorted(target_0, key=lambda x: x[0])

								    left_point = target_0[0]

								    right_point = target_0[1]

								    for target, name in zip([left_point, right_point], ['地址', '姓名']):

								        target_img = img[target[1]:target[3], target[0]:target[2]]

								        cv2.imwrite(f'{name}.jpg', target_img)

								        result = ocr.ocr(target_img)

								        out = ''

								        if not result or not any(result):

								            out = '未识别到文字'

								        else:

								            for lines in result:

								                for line in lines:

								                    out += line[1][0]

								        results_summary['target_0'].append(f"{name.capitalize()}: {out}")


								    # # 处理类别1

								    # for target in target_1:

								    #     target_img = img[target[1]:target[3], target[0]:target[2]]

								    #     cv2.imwrite(f'当前有功.jpg', target_img)

								    #     result = ocr.ocr(target_img)

								    #     out = ''

								    #     for lines in result:

								    #         for line in lines:

								    #             out += line[1][0]

								    #     out = out[:-2] + '.' + out[-2:]

								    #     results_summary['target_1'].append(f"当前有功: {out}")


								    # 处理类别1

								    for target in target_1:

								        target_img = img[target[1]-5:target[3]+5, target[0]-5:target[2]+5]

								        cv2.imwrite(f'当前有功.jpg', target_img)

								        result = ocr.ocr(target_img, det=False)

								        for lines in result:

								            for line in lines:

								                out = line[0]

								        out = re.sub(r'\.', '', out)

								        out = out[:-2] + '.' + out[-2:]

								        results_summary['target_1'].append(f"当前有功: {out}")


								    # 处理类别2

								    if len(target_2) == 2:

								        target_2_sorted = sorted(target_2, key=lambda x: x[1])

								        top_target = target_2_sorted[0]

								        target_img = img[top_target[1]:top_target[3], top_target[0]:top_target[2]]

								    elif len(target_2) == 1:

								        top_target = target_2[0]

								        target_img = img[top_target[1]:top_target[3], top_target[0]:top_target[2]]

								    cv2.imwrite(f'电表资产号.jpg', target_img)

								    result = ocr.ocr(target_img)

								    longest_line = ""

								    max_length = 0

								    for lines in result:

								        for line in lines:

								            text = line[1][0]

								            if len(text) > max_length:

								                longest_line = text

								                max_length = len(text)

								    results_summary['target_2'].append(f"电表资产号: {longest_line}")


								    # 处理类别3

								    target_3 = sorted(target_3, key=lambda x: x[0])

								    left_point = target_3[0]

								    right_point = target_3[1]

								    for target, name in zip([left_point, right_point], ['封印1', '封印2']):

								        target_img = img[target[1]:target[3], target[0]:target[2]]

								        height, width = target_img.shape[:2]

								        if width <= height:

								            target_img = cv2.transpose(target_img)

								            target_img = cv2.flip(target_img, flipCode=1)

								        cv2.imwrite(f'{name}.jpg', target_img)

								        result = ocr.ocr(target_img)

								        out = ''

								        for lines in result:

								            for line in lines:

								                out += line[1][0]

								        results_summary['target_3'].append(f"{name.capitalize()}: {out}")


								    for category, result_list in results_summary.items():

								        for result in result_list:

								            print(result)

								else:

								    print("图像不清晰或要素不全请重新拍摄或人工记录")