Python 实现图片提取文字

最新推荐文章于 2025-05-11 21:41:06 发布

穿梭的编织者

最新推荐文章于 2025-05-11 21:41:06 发布

阅读量825

点赞数 4

CC 4.0 BY-SA版权

分类专栏： Python脚本文章标签： python 开发语言

本文链接：https://blog.csdn.net/kouweizhu/article/details/145979681

Python脚本专栏收录该内容

21 篇文章

订阅专栏

文章目录

一、效果图

使用的图片：

返回文字：

二、库安装

pip install easyocr opencv-python numpy

三、使用示例

ocr = EasyOCRProcessor()
results = ocr.extract_text(
    "test.png",
    "output.png",
    confidence_threshold=0.6
)

四、完整代码

import easyocr
import cv2
import numpy as np


class EasyOCRProcessor:
    def __init__(self, languages=['ch_sim', 'en']):
        """
        初始化EasyOCR处理器

        参数:
            languages: 需要识别的语言列表
        """
        self.reader = easyocr.Reader(languages)

    def enhance_image(self, image):
        """
        图像增强处理

        参数:
            image: OpenCV图像对象
        返回:
            处理后的图像
        """
        # 亮度和对比度调整
        alpha = 1.2  # 对比度
        beta = 10  # 亮度
        adjusted = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)

        # 锐化
        kernel = np.array([[-1, -1, -1],
                           [-1, 9, -1],
                           [-1, -1, -1]])
        sharpened = cv2.filter2D(adjusted, -1, kernel)

        return sharpened

    def extract_text(self, image_path, output_path=None, confidence_threshold=0.5):
        """
        提取图片中的文字

        参数:
            image_path: 图片路径
            output_path: 可选，输出处理后图片的路径
            confidence_threshold: 置信度阈值
        返回:
            提取的文字内容和位置信息
        """
        try:
            # 读取图片
            image = cv2.imread(image_path)
            if image is None:
                raise ValueError("无法读取图片")

            # 图像增强
            enhanced = self.enhance_image(image)

            # 使用EasyOCR识别文字
            results = self.reader.readtext(enhanced)

            # 处理结果
            text_results = []
            for bbox, text, confidence in results:
                if confidence > confidence_threshold:
                    text_results.append({
                        'text': text,
                        'confidence': confidence,
                        'position': bbox
                    })

                    # 在图片上标记文字区域
                    if output_path:
                        points = np.array(bbox, np.int32)
                        cv2.polylines(image, [points], True, (0, 255, 0), 2)
                        cv2.putText(image, f"{text} ({confidence:.2f})",
                                    (int(bbox[0][0]), int(bbox[0][1]) - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            # 保存处理后的图片
            if output_path:
                cv2.imwrite(output_path, image)

            return text_results

        except Exception as e:
            print(f"错误: {str(e)}")
            return None


# 使用示例
ocr = EasyOCRProcessor()
results = ocr.extract_text(
    "test.png",
    "output.png",
    confidence_threshold=0.6
)

# 打印结果
if results:
    for result in results:
        print(f"文字: {result['text']}")
        print(f"置信度: {result['confidence']}")
        print(f"位置: {result['position']}")
        print("---")