从PaddleSharp学习到的Recognize模块

项目背景

使用PaddleOcrV5-mobile-rec对数字和单位进行检测
需要部署在设备上进行使用,于是参考PaddleSharp
因只使用识别模块,并未引用nuget包
其中结果模型类的 PaddleOcrRecognizerResult 和 RecognizedChar 未进行任何改动

整体代码

都是从源码上扒下来的代码,大佬的代码很好,改动很小就能够使用

public class OnnxPaddleRec : IDisposable
{
    private readonly InferenceSession _session;

    /// <summary>
    /// 训练时图像高度
    /// </summary>
    private readonly int _height = 48;

    /// <summary>
    /// 训练时图像宽度
    /// </summary>
    private readonly int _width = 320;

    /// <summary>
    /// 字典
    /// </summary>
    private readonly IReadOnlyList<string> Dict = [];


    /// <summary>
    /// 
    /// </summary>
    /// <param name="modelPath">模型路径</param>
    /// <param name="dictPath">字典路径</param>
    public OnnxPaddleRec(string modelPath,string dictPath)
    {
        var opts = new SessionOptions
        {
            GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL,
            InterOpNumThreads = 1,
            IntraOpNumThreads = Math.Min(4, Environment.ProcessorCount),
        };

        _session = new InferenceSession(modelPath, opts);
        Dict= [.. LoadDicts(dictPath)];
    }


    public void Dispose()
    {
        _session.Dispose();
    }

    /// <summary>
    /// 执行方法
    /// </summary>
    /// <param name="srcs">要识别的图片结果</param>
    /// <returns>每个图片识别的结果</returns>
    public PaddleOcrRecognizerResult[] Run(Mat[] srcs)
    {
        // 输入检查
        if (srcs.Length == 0)
        {
            return new PaddleOcrRecognizerResult[0];
        }
        for (int i = 0; i < srcs.Length; ++i)
        {
            Mat src = srcs[i];
            if (src.Empty())
            {
                throw new ArgumentException($"src[{i}] size should not be 0, wrong input picture provided?");
            }
        }
        //输入检查结束

        Mat[] normalizeds = null!;

        try
        {
            // 预处理:通道转换、缩放、归一化
            normalizeds = srcs
                .Select(src =>
                {
                    using Mat channel3 = src.Channels() switch
                    {
                        4 => src.CvtColor(ColorConversionCodes.RGBA2BGR),
                        1 => src.CvtColor(ColorConversionCodes.GRAY2RGB),
                        3 => src.Clone(),
                        var x => throw new Exception($"Unexpect src channel: {x}, allow: (1/3/4)")
                    };
                    using Mat resized = ResizePadding(channel3, _height, _width);
                    return Normalize(resized);
                })
                .ToArray();

            // 构造输入Tensor,执行推理,处理输出
            int channel = normalizeds[0].Channels();
            float[] inputData = ExtractMat(normalizeds, channel, _height, _width);
            var inputTensor = new DenseTensor<float>(inputData,  [ normalizeds.Length, channel, _height, _width ]);
            var input = new[] { NamedOnnxValue.CreateFromTensor("x", inputTensor) };
            using var results = _session.Run(input);
            //计算维度和数据
            var outTensor = results.First().AsTensor<float>();
            var dims = outTensor.Dimensions.ToArray();
            float[] data = outTensor.ToArray(); //[B,W/8,类别]

            //后处理 
            GCHandle dataHandle = default;

            try
            {
                dataHandle = GCHandle.Alloc(data, GCHandleType.Pinned);
                IntPtr dataPtr = dataHandle.AddrOfPinnedObject();
                int labelCount = dims[2];
                int charCount = dims[1];

                return Enumerable.Range(0, dims[0]).Select(i =>
                {
                    StringBuilder sb = new();
                    int lastIndex = 0;
                    float score = 0;
                    List<RecognizedChar> ocrRecognizerResultSingleChars = new();
                    int charIndex = 0;
                    for (int n = 0; n < charCount; ++n)
                    {
                        using Mat mat = Mat.FromPixelData(1, labelCount, MatType.CV_32FC1, dataPtr + (n + i * charCount) * labelCount * sizeof(float));
                        int[] maxIdx = new int[2];
                        mat.MinMaxIdx(out double _, out double maxVal, [], maxIdx);
                        if (maxIdx[1] > 0 && (!(n > 0 && maxIdx[1] == lastIndex)))
                        {
                            score += (float)maxVal;
                            string character = GetLabelByIndex(maxIdx[1], Dict);
                            sb.Append(character);

                            ocrRecognizerResultSingleChars.Add(new RecognizedChar(
                                character,
                                (float)maxVal,
                                charIndex
                            ));
                            charIndex++;
                        }
                        lastIndex = maxIdx[1];
                    }
                    return new PaddleOcrRecognizerResult(sb.ToString(), score / sb.Length, ocrRecognizerResultSingleChars);

                }).ToArray();
            }
            finally
            {
                dataHandle.Free();
            }
          
        }
        finally
        {
            foreach (Mat normalized in normalizeds)
            {
                normalized.Dispose();
            }
        }

       
    }

    /// <summary>
    /// 根据索引获取标签,索引0表示空白,1~labels.Count表示对应的标签
    /// </summary>
    /// <param name="i"></param>
    /// <param name="labels"></param>
    /// <returns></returns>
    /// <exception cref="Exception"></exception>
    private static string GetLabelByIndex(int i, IReadOnlyList<string> labels)
    {
        return i switch
        {
            var x when x > 0 && x <= labels.Count => labels[x - 1],//但字典并不包含空白,所以需要-1
            var x when x == labels.Count + 1 => " ",
            _ => throw new Exception($"Unable to GetLabelByIndex: index {i} out of range {labels.Count}, OCR model or labels not matched?"),
        };
    }



    /// <summary>
    /// 字典加载,按行读取文本文件,每行一个字符,返回字符列表
    /// </summary>
    /// <param name="dictName"></param>
    /// <returns></returns>
    private static IEnumerable<string> LoadDicts(string dictName)
    {
        using StreamReader reader = new(dictName);
        while (!reader.EndOfStream)
        {
            var str = reader.ReadLine();
            if (str is not null)
            {
                yield return str;
            }
        }

    }

    /// <summary>
    /// 归一化
    /// </summary>
    /// <param name="src"></param>
    /// <returns></returns>
    private static Mat Normalize(Mat src)
    {
        using Mat normalized = new();
        src.ConvertTo(normalized, MatType.CV_32FC3, 1.0 / 255);
        Mat[] bgr = normalized.Split();
        float[] scales = new[] { 2.0f, 2.0f, 2.0f };
        float[] means = new[] { 0.5f, 0.5f, 0.5f };
        for (int i = 0; i < bgr.Length; ++i)
        {
            bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 * scales[i], (0.0 - means[i]) * scales[i]);
        }

        Mat dest = new();
        Cv2.Merge(bgr, dest);

        foreach (Mat channel in bgr)
        {
            channel.Dispose();
        }

        return dest;
    }



    /// <summary>
    /// 计算Mat数据的指针偏移,提取每个通道的数据到连续的内存中,方便构造输入Tensor
    /// </summary>
    /// <param name="srcs"></param>
    /// <param name="channel"></param>
    /// <param name="height"></param>
    /// <param name="width"></param>
    /// <returns></returns>
    private static float[] ExtractMat(Mat[] srcs, int channel, int height, int width)
    {
        float[] result = new float[srcs.Length * channel * width * height];
        GCHandle resultHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
        IntPtr resultPtr = resultHandle.AddrOfPinnedObject();
        try
        {
            for (int i = 0; i < srcs.Length; ++i)
            {
                Mat src = srcs[i];
                if (src.Channels() != channel)
                {
                    throw new Exception($"src[{i}] channel={src.Channels()}, expected {channel}");
                }
                for (int c = 0; c < channel; ++c)
                {
                    using Mat dest = Mat.FromPixelData(height, width, MatType.CV_32FC1, resultPtr + (c + i * channel) * height * width * sizeof(float));
                    Cv2.ExtractChannel(src, dest, c);
                }
            }
            return result;
        }
        finally
        {
            resultHandle.Free();
        }
    }

    /// <summary>
    /// 缩放和填充,保持宽高比不变,填充颜色为灰色
    /// </summary>
    /// <param name="src"></param>
    /// <param name="height"></param>
    /// <param name="targetWidth"></param>
    /// <returns></returns>
    private static Mat ResizePadding(Mat src, int height, int targetWidth)
    {
        Size size = src.Size();
        float whRatio = 1.0f * size.Width / size.Height;
        int width = (int)Math.Ceiling(height * whRatio);

        if (width == targetWidth)
        {
            return src.Resize(new Size(width, height));
        }
        else
        {
            using Mat resized = src.Resize(new Size(width, height));
            return resized.CopyMakeBorder(0, 0, 0, targetWidth - width, BorderTypes.Constant, Scalar.Gray);
        }
    }




}

posted @ 2026-02-28 14:34  心血来潮改个名~  阅读(2)  评论(0)    收藏  举报