make words counter for image with the help of paddlehub model - [camera support]

说明

接着上篇，实现拍摄统计功能。

https://www.cnblogs.com/lightsong/p/14592798.html

功能明细：

实时展示摄像头内容
实时统计
统计结果在展示视频中显示

技术依赖

上篇是基于linux环境，由于需要添加实时展示功能，需要切换到windows。

OCR模块依赖

https://paddlehub.readthedocs.io/zh_CN/develop/quick_experience/cmd_quick_run.html

需要安装 shapely 和 pyclipper 库。

# 下载待测试的图片
$ wget https://paddlehub.bj.bcebos.com/model/image/ocr/test_ocr.jpg

# 该Module依赖于第三方库shapely和pyclipper，需提前安装
$ pip install shapely
$ pip install pyclipper

# 通过命令行方式实现文字识别任务
$ hub run chinese_ocr_db_crnn_mobile --input_path test_ocr.jpg --visualization=True --output_dir='ocr_result'

pyclipper库是 clipper工具的一个封装，需要在windows上安装clipper。

http://www.angusj.com/delphi/clipper.php

pyclipper提供的“From Source”安装方法，可以安装这个依赖。

https://github.com/fonttools/pyclipper

From source

Cython required.

Clone the repository:
git clone git@github.com:fonttools/pyclipper.git
Install:
python setup.py install
After every modification of .pyx files compile with Cython:
python setup.py build_ext --inplace

`cv2.VideoCapture捕捉和显示控制`

注意， cv2.VideoCapture参数为0时候，为捕捉摄像头。

https://blog.csdn.net/j18423532754/article/details/106520257

import time
import cv2
cap = cv2.VideoCapture("D:\\jc\\Myself\\video\\Hacker_glasses_07_Videvo.mov")  # 读取文件
start_time = time.time()
counter = 0
# 获取视频宽度
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# 获取视频高度
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) #视频平均帧率
while (True):
    ret, frame = cap.read()
    # 键盘输入空格暂停，输入q退出
    key = cv2.waitKey(1) & 0xff
    if key == ord(" "):
        cv2.waitKey(0)
    if key == ord("q"):
        break
    counter += 1  # 计算帧数
    if (time.time() - start_time) != 0:  # 实时显示帧数
        cv2.putText(frame, "FPS {0}".format(float('%.1f' % (counter / (time.time() - start_time)))), (500, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                    3)
        src = cv2.resize(frame, (frame_width // 2, frame_height // 2), interpolation=cv2.INTER_CUBIC)  # 窗口大小
        cv2.imshow('frame', src)
        print("FPS: ", counter / (time.time() - start_time))
        counter = 0
        start_time = time.time()
    time.sleep(1 / fps)  # 按原帧率播放
cap.release()
cv2.destroyAllWindows()

Code

https://github.com/fanqingsong/writing_words_counter

# !pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple #由于PaddleHub升级比较快，建议大家直接升级到最新版本的PaddleHub，无需指定版本升级
# !pip install shapely -i https://pypi.tuna.tsinghua.edu.cn/simple #该Module依赖于第三方库shapely，使用该Module之前，请先安装shapely
# !pip install pyclipper -i https://pypi.tuna.tsinghua.edu.cn/simple #该Module依赖于第三方库pyclipper，使用该Module之前，请先安装pyclipper
#
import os
import time

import paddlehub as hub
import cv2 as cv
import shutil


class WordsCounter:
    def __init__(self):
        self._ocr = hub.Module(name="chinese_ocr_db_crnn_server")
        self._workspace_path = "./workspace"
        self._snapshot_path = f"{self._workspace_path}/snapshot"

    def _get_image_data(self, image_path):
        image_data = cv.imread(image_path)

        print(type(image_data))
        print(f"image_data.shape={image_data.shape}")

        return image_data

    def _get_ocr_results_from_image_data(self, image_data):
        if image_data is None:
            print("image_data is none")
            return []

        ocr_results = self._ocr.recognize_text(images=[image_data])
        print(ocr_results)

        return ocr_results

    def _get_text_from_ocr_results(self, ocr_results):
        all_text = []

        for one_result in ocr_results:
            data = one_result["data"]

            for one_info in data:
                one_text = one_info["text"]
                all_text.append(one_text)

            # add empty line before storing next image text
            all_text.append("")

        all_text = "\r\n".join(all_text)

        print("----- all text --------")
        print(all_text)

        return all_text

    def _count_words_in_text(self, text: str):
        pure_text = text.replace("\r\n", "")

        return len(pure_text)

    def count_words_for_one_image(self, image_path):
        image_data = self._get_image_data(image_path)

        ocr_results = self._get_ocr_results_from_image_data(image_data)

        text = self._get_text_from_ocr_results(ocr_results)

        num = self._count_words_in_text(text)

        print(f"num = {num}")

        return num

    def _prepare_for_watch(self):
        if not os.path.exists(self._snapshot_path):
            os.mkdir(self._snapshot_path)

        #shutil.rmtree(self._snapshot_path)

    def watch_camera(self):
        self._prepare_for_watch()

        cap = cv.VideoCapture(0)

        fps = cap.get(cv.CAP_PROP_FPS)  # 视频平均帧率
        print(f"fps = {fps}")

        index = 0
        while True:
            ret, frame = cap.read()

            if not ret:
                print(f"capture failed with ret={ret} frame={frame}")
                break

            ocr_results = self._get_ocr_results_from_image_data(frame)

            text = self._get_text_from_ocr_results(ocr_results)

            num = self._count_words_in_text(text)

            cv.putText(frame, f"Words total = {num}", (50, 50),
                        cv.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                        2)

            cv.imshow('Video', frame)

            image_path = f'{self._snapshot_path}/{index}.jpg'
            cv.imwrite(image_path, frame)

            index += 1

            # 键盘输入空格暂停，输入q退出
            key = cv.waitKey(1) & 0xff
            if key == ord(" "):
                cv.waitKey(0)
            if key == ord("q"):
                break

            time.sleep(1 / fps)  # 按原帧率播放

        cap.release()
        cv.destroyAllWindows()
        print('capture finish, get %d frame' % index)




if __name__ == "__main__":
    words_counter = WordsCounter()

    # realtime counting
    words_counter.watch_camera()

    # testing one picture
    one_writing_path = './workspace/one_student_writing.jpeg'
    # words_counter.count_words_for_one_image(one_writing_path)

Effect

除了实时显示外，在workspace/snapshot目录下，可以查看帧图片，图片上有字数统计。

output -- log

[{'save_path': '', 'data': [{'text': '测', 'confidence': 0.988754391670227, 'text_box_position': [[411, 60], [487, 68], [481, 130], [405, 122]]}, {'text': '式数据', 'confidence': 0.7919352054595947, 'text_box_position': [[380, 188], [457, 202], [414, 446], [337, 432]]}]}]
----- all text --------
测
式数据

output -- image

posted @ 2021-03-30 14:45 lightsong 阅读(173) 评论(0) 收藏举报

刷新页面返回顶部

Stay Hungry,Stay Foolish!

lightsong

{Web: [React, Vue, NodeJS, HTTP]，DevOps:[Jenkins,Docker,K8S], Languages:[Python, JS, C, Lua, Shell, Groovy]}, AI:[LLM, langchain，langraph]