从PaddleSharp学习到的Recognize模块
项目背景
使用PaddleOcrV5-mobile-rec对数字和单位进行检测
需要部署在设备上进行使用,于是参考PaddleSharp
因只使用识别模块,并未引用nuget包
其中结果模型类的 PaddleOcrRecognizerResult 和 RecognizedChar 未进行任何改动
整体代码
都是从源码上扒下来的代码,大佬的代码很好,改动很小就能够使用
public class OnnxPaddleRec : IDisposable
{
private readonly InferenceSession _session;
/// <summary>
/// 训练时图像高度
/// </summary>
private readonly int _height = 48;
/// <summary>
/// 训练时图像宽度
/// </summary>
private readonly int _width = 320;
/// <summary>
/// 字典
/// </summary>
private readonly IReadOnlyList<string> Dict = [];
/// <summary>
///
/// </summary>
/// <param name="modelPath">模型路径</param>
/// <param name="dictPath">字典路径</param>
public OnnxPaddleRec(string modelPath,string dictPath)
{
var opts = new SessionOptions
{
GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL,
InterOpNumThreads = 1,
IntraOpNumThreads = Math.Min(4, Environment.ProcessorCount),
};
_session = new InferenceSession(modelPath, opts);
Dict= [.. LoadDicts(dictPath)];
}
public void Dispose()
{
_session.Dispose();
}
/// <summary>
/// 执行方法
/// </summary>
/// <param name="srcs">要识别的图片结果</param>
/// <returns>每个图片识别的结果</returns>
public PaddleOcrRecognizerResult[] Run(Mat[] srcs)
{
// 输入检查
if (srcs.Length == 0)
{
return new PaddleOcrRecognizerResult[0];
}
for (int i = 0; i < srcs.Length; ++i)
{
Mat src = srcs[i];
if (src.Empty())
{
throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
}
}
//输入检查结束
Mat[] normalizeds = null!;
try
{
// 预处理:通道转换、缩放、归一化
normalizeds = srcs
.Select(src =>
{
using Mat channel3 = src.Channels() switch
{
4 => src.CvtColor(ColorConversionCodes.RGBA2BGR),
1 => src.CvtColor(ColorConversionCodes.GRAY2RGB),
3 => src.Clone(),
var x => throw new Exception($"Unexpect src channel: {x}, allow: (1/3/4)")
};
using Mat resized = ResizePadding(channel3, _height, _width);
return Normalize(resized);
})
.ToArray();
// 构造输入Tensor,执行推理,处理输出
int channel = normalizeds[0].Channels();
float[] inputData = ExtractMat(normalizeds, channel, _height, _width);
var inputTensor = new DenseTensor<float>(inputData, [ normalizeds.Length, channel, _height, _width ]);
var input = new[] { NamedOnnxValue.CreateFromTensor("x", inputTensor) };
using var results = _session.Run(input);
//计算维度和数据
var outTensor = results.First().AsTensor<float>();
var dims = outTensor.Dimensions.ToArray();
float[] data = outTensor.ToArray(); //[B,W/8,类别]
//后处理
GCHandle dataHandle = default;
try
{
dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
int labelCount = dims[2];
int charCount = dims[1];
return Enumerable.Range(0, dims[0]).Select(i =>
{
StringBuilder sb = new();
int lastIndex = 0;
float score = 0;
List<RecognizedChar> ocrRecognizerResultSingleChars = new();
int charIndex = 0;
for (int n = 0; n < charCount; ++n)
{
using Mat mat = Mat.FromPixelData(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
int[] maxIdx = new int[2];
mat.MinMaxIdx(out double _, out double maxVal, [], maxIdx);
if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
{
score += (float)maxVal;
string character = GetLabelByIndex(maxIdx[1], Dict);
sb.Append(character);
ocrRecognizerResultSingleChars.Add(new RecognizedChar(
character,
(float)maxVal,
charIndex
));
charIndex++;
}
lastIndex = maxIdx[1];
}
return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length, ocrRecognizerResultSingleChars);
}).ToArray();
}
finally
{
dataHandle.Free();
}
}
finally
{
foreach (Mat normalized in normalizeds)
{
normalized.Dispose();
}
}
}
/// <summary>
/// 根据索引获取标签,索引0表示空白,1~labels.Count表示对应的标签
/// </summary>
/// <param name="i"></param>
/// <param name="labels"></param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
private static string GetLabelByIndex(int i, IReadOnlyList<string> labels)
{
return i switch
{
var x when x > 0 && x <= labels.Count => labels[x - 1],//但字典并不包含空白,所以需要-1
var x when x == labels.Count + 1 => " ",
_ => throw new Exception($"Unable to GetLabelByIndex: index {i} out of range {labels.Count}, OCR model or labels not matched?"),
};
}
/// <summary>
/// 字典加载,按行读取文本文件,每行一个字符,返回字符列表
/// </summary>
/// <param name="dictName"></param>
/// <returns></returns>
private static IEnumerable<string> LoadDicts(string dictName)
{
using StreamReader reader = new(dictName);
while (!reader.EndOfStream)
{
var str = reader.ReadLine();
if (str is not null)
{
yield return str;
}
}
}
/// <summary>
/// 归一化
/// </summary>
/// <param name="src"></param>
/// <returns></returns>
private static Mat Normalize(Mat src)
{
using Mat normalized = new();
src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
Mat[] bgr = normalized.Split();
float[] scales = new[] { 2.0f, 2.0f, 2.0f };
float[] means = new[] { 0.5f, 0.5f, 0.5f };
for (int i = 0; i < bgr.Length; ++i)
{
bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
}
Mat dest = new();
Cv2.Merge(bgr, dest);
foreach (Mat channel in bgr)
{
channel.Dispose();
}
return dest;
}
/// <summary>
/// 计算Mat数据的指针偏移,提取每个通道的数据到连续的内存中,方便构造输入Tensor
/// </summary>
/// <param name="srcs"></param>
/// <param name="channel"></param>
/// <param name="height"></param>
/// <param name="width"></param>
/// <returns></returns>
private static float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
{
float[] result = new float[srcs.Length * channel * width * height];
GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
try
{
for (int i = 0; i < srcs.Length; ++i)
{
Mat src = srcs[i];
if (src.Channels() != channel)
{
throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
}
for (int c = 0; c < channel; ++c)
{
using Mat dest = Mat.FromPixelData(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
Cv2.ExtractChannel(src, dest, c);
}
}
return result;
}
finally
{
resultHandle.Free();
}
}
/// <summary>
/// 缩放和填充,保持宽高比不变,填充颜色为灰色
/// </summary>
/// <param name="src"></param>
/// <param name="height"></param>
/// <param name="targetWidth"></param>
/// <returns></returns>
private static Mat ResizePadding(Mat src, int height, int targetWidth)
{
Size size = src.Size();
float whRatio = 1.0f * size.Width / size.Height;
int width = (int)Math.Ceiling(height * whRatio);
if (width == targetWidth)
{
return src.Resize(new Size(width, height));
}
else
{
using Mat resized = src.Resize(new Size(width, height));
return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
}
}
}

浙公网安备 33010602011771号