root
/
python_files


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
							# -*- codeing = utf-8 -*-
# @Time : 2024/3/6 19:02
# @Author : Clown
# @File : demo_ocr.py
# @Software : PyCharm
import cv2
import pytesseract
import numpy as np
from PIL import Image
import pandas as pd

path_in = r'C:\Users\ClownHe\Desktop\1业务支持需求单\Screenshot_2024-03-06-12-10-09-94_e39d2c7de19156b0683cd93e8735f348(1).jpg'
# 加载图像
image = Image.open(path_in)
image = np.array(image)

# 图像预处理（灰度化）
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 文字识别
'''
psm值为3表示全自动页面分割模式（默认值）。
psm值为6表示假设文本是单个统一的块。
psm值为7表示将图像视为一个单一的文本行。
psm值为8表示将图像视为一个包含多个文本行的块。
'''
boxes = pytesseract.image_to_boxes(gray_image,lang = 'chi_sim+eng',config = '--psm 6',output_type = pytesseract.Output.STRING)
'''
<char>：识别出的字符。
<left>：字符在图像中的左边界距离图像左侧的距离。
<top>：字符在图像中的上边界距离图像顶部的距离。
<width>：字符的宽度。
<height>：字符的高度。
<conf>：模型对字符属于该类别的置信度，通常是一个介于0和1之间的值
'''
datas = pytesseract.image_to_data(gray_image,lang = 'chi_sim+eng',config = '--psm 6',output_type=pytesseract.Output.DICT)
'''
level：字符的级别，表示该字符在文本中的重要性。
page_num：字符所在的页面编号。
block_num：字符所在的块编号。
par_num：字符所在的段落编号。
line_num：字符所在的行号。
word_num：字符所在的单词编号。
left：字符左边界距离图像左侧的距离。
top：字符上边界距离图像顶部的距离。
width：字符的宽度。
height：字符的高度。
conf：字符的置信度评分，表示识别结果的准确性。
text：识别出的字符本身。
'''
df_out = pd.DataFrame(datas).to_dict('records')
for row in df_out:

    if row['conf'] in range(88,100) and row['text'] == ' ':
        print(row)
        x, y, w, h = int(row['left']), int(row['top']), int(row['width']), int(row['height'])
        cv2.rectangle(image, (x, y), (w+x, h+y), (0, 255, 0), 2)

# 绘制文本框
for box in boxes.splitlines():
    print(box,type(box))
    # coordinates = box.split()
    #
    # x, y, w, h = int(coordinates[1]), int(coordinates[2]), int(coordinates[3]), int(coordinates[4])
    # cv2.rectangle(image, (x, image.shape[0] - y), (w, image.shape[0] - h), (0, 255, 0), 2)


# 缩放图像
scale_percent = 50  # 缩放比例（百分比）
width = int(image.shape[1] * scale_percent / 100)
height = int(image.shape[0] * scale_percent / 100)
dim = (width, height)
resized_image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)


# 显示结果
cv2.imshow('Result', resized_image)
cv2.waitKey(0)
cv2.destroyAllWindows()