"""
封装百度Paddle detection 和 OCR
"""
# -*- coding: utf-8 -*-
import sys
import os
import cv2
# detection相关包(windows路径格式,注意调整)
from my_detect import MyYoLov5
sys.path.append("baidu_pp_detection\\python")
# from infer import Config,Detector
#from visualize import visualize_box_mask
import numpy as np
# OCR相关包(windows路径格式,注意调整)
sys.path.append("baidu_pp_ocr\\tools\\infer")
sys.path.append("baidu_pp_ocr\\")
import utility as utility
from predict_system import TextSystem
from ppocr.utils.logging import get_logger
logger = get_logger()
class YoLo_Detection:
def __init__(self):
self.model=MyYoLov5(weights="yolov5x.pt",img_size=640, conf_thres=0.25, iou_thres=0.45)
self.labels_en =["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"]
self.labels_zh=['人','自行车','汽车','摩托车','飞机','公交车','火车','卡车','船','交通灯','消防栓','停止标志','停车收费器',
'长椅','鸟','猫','狗','马','羊','牛','大象','熊','斑马','长颈鹿','背包','雨伞','手提包','领带','手提箱',
'飞盘','滑雪板','滑雪板','球类','风筝','棒球棍','棒球手套','滑板','冲浪板','网球拍','瓶子','酒杯','杯子',
'餐叉','刀','勺子','碗','香蕉','苹果','三明治','橙子','西兰花','胡萝卜','热狗','披萨','甜甜圈','蛋糕',
'椅子','沙发','盆栽','床','餐桌','马桶','电视','笔记本电脑','鼠标','遥控器','键盘','手机','微波炉','烤箱',
'烤面包机','水槽','冰箱','书','时钟','花瓶','剪刀','泰迪熊','吹风机','牙刷']
if 1:
print('Warm up detection model')
img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
for i in range(10):
im,results = self.model.detect(img)
pass
def detect_img(self, img):
im,results=self.model.detect(img)
return im,results
# OCR
class Baidu_PP_OCR:
def __init__(self):
args = utility.parse_args()
args.det_model_dir="baidu_pp_ocr/models/ch_ppocr_server_v2.0_det_infer/"
args.rec_model_dir="baidu_pp_ocr/models/ch_ppocr_server_v2.0_rec_infer/"
args.rec_char_dict_path="baidu_pp_ocr/ppocr/utils/ppocr_keys_v1.txt"
args.use_angle_cls=False
args.use_gpu=True
self.text_sys = TextSystem(args)
# 热身
if 1:
print('Warm up ocr model')
img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
for i in range(10):
res = self.text_sys(img)
def ocr_image(self,img):
dt_boxes, rec_res = self.text_sys(img)
text_list = []
for text, score in rec_res:
# logger.info("{}, {:.3f}".format(text, score))
text_list.append(text)
src_im = img
for box in dt_boxes:
box = np.array(box).astype(np.int32).reshape(-1, 2)
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
return src_im,text_list
def test_ocr(self):
image_dir="./fapiao.png"
img = cv2.imread(image_dir)
src_im,text_list = self.ocr_image(img)
print(text_list)
cv2.imwrite('./output.jpg',src_im)
import numpy as np
import torch
import torch.nn as nn
import sys
from models.common import Conv, DWConv
from utils.google_utils import attempt_download
class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
super(CrossConv, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
super(Sum, self).__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y
class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super(GhostConv, self).__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
def forward(self, x):
y = self.cv1(x)
return torch.cat([y, self.cv2(y)], 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super(GhostBottleneck, self).__init__()
c_ = c2 // 2
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class MixConv2d(nn.Module):
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
super(MixConv2d, self).__init__()
groups = len(k)
if equal_ch: # equal c_ per group
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * groups
a = np.eye(groups + 1, groups, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
self.bn = nn.BatchNorm2d(c2)
self.act = nn.LeakyReLU(0.1, inplace=True)
def forward(self, x):
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
super(Ensemble, self).__init__()
def forward(self, x, augment=False):
y = []
for module in self:
y.append(module(x, augment)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, map_location=None):
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
attempt_download(w)
ckpt = torch.load(w, map_location=map_location) # load
model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model
# Compatibility updates
for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
m.inplace = True # pytorch 1.7.0 compatibility
elif type(m) is Conv:
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if len(model) == 1:
return model[-1] # return model
else:
print('Ensemble created with %s\n' % weights)
for k in ['names', 'stride']:
setattr(model, k, getattr(model[-1], k))
return model # return ensemble
posted on
2025-04-29 09:27
po3a
阅读( 5)
评论()
收藏
举报
|