程序项目代做,有需求私信(小程序、网站、爬虫、电路板设计、驱动、应用程序开发、毕设疑难问题处理等)

NLM去噪

一、环境搭建

1.1 安装依赖

1.1.1 VS Code
1.1.2 安装opencv

参考《Rockchip RK3588 - Mali-G610 GPU驱动4.6.3.1节。

1.1.3 安装依赖
sudo apt install libopencv-dev opencl-headers ocl-icd-opencl-dev

1.2 CPU实现

1.2.1 NlmCPU.h
// NlmCPU.h
#ifndef NLM_CPU_H
#define NLM_CPU_H

#include <opencv2/opencv.hpp>
#include <string>
#include <vector>
#include <memory>

class NlmCPUProcessor {
public:
    // 构造函数
    NlmCPUProcessor(int rows = 1600, int cols = 1200, double maxVal = 1023.0);
    
    // 主处理函数 - 改为小写开头
    cv::Mat denoiseRaw9Ch(const std::string& path);
    
    // 获取中间结果
    cv::Mat getNoisyImage() const { return m_noisyImage; }  // 成员变量加m_前缀
    cv::Mat getMeanImage() const { return m_meanImage; }
    cv::Mat getStructureClean() const { return m_structureClean; }
    std::vector<cv::Mat> getStack() const { return m_stack; }
    
    // 获取统计数据
    double getSigmaEst() const { return m_sigmaEst; }
    double getHFinal() const { return m_hFinal; }
    std::vector<double> getTimings() const { return m_timings; }
    
    // 设置参数
    void setRows(int rows) { m_rows = rows; }
    void setCols(int cols) { m_cols = cols; }
    void setMaxVal(double maxVal) { m_maxVal = maxVal; }
    void setPatchSize(int patchSize) { m_patchSize = patchSize; }
    void setSigmaEstimationPatchSize(int patchSize) { m_sigmaEstPatchSize = patchSize; }
    void setHFactor(double hFactor) { m_hFactor = hFactor; }
    void setDiffSigma(double diffSigma) { m_diffSigma = diffSigma; }
    
    // 噪声估计函数
    double noiseEstimation(const cv::Mat& noisyImage, int patchSize = -1);
    
    // 保存中间结果
    void saveIntermediateResults(const std::string& prefix = "cpu") const;
    
    // 打印统计信息
    void printStatistics() const;
    
private:
    // 核心处理函数
    void readRawData(const std::string& path);
    void splitChannels();
    void computeMean();
    void estimateNoise();
    void applyNlmDenoising();
    cv::Mat reconstructImage();
    
    // 工具函数
    void clearResults();
    void updateTiming(double time);
    
private:
    // 输入参数
    int m_rows;
    int m_cols;
    double m_maxVal;
    int m_patchSize;
    int m_sigmaEstPatchSize;
    double m_hFactor;
    double m_diffSigma;
    
    // 中间结果
    cv::Mat m_rawBuffer;
    cv::Mat m_noisyImage;
    std::vector<cv::Mat> m_stack;
    cv::Mat m_meanImage;
    cv::Mat m_meanImage8u;
    cv::Mat m_structureClean;
    
    // 统计信息
    double m_sigmaEst;
    double m_hFinal;
    std::vector<double> m_timings;  // 各个阶段的时间消耗
    
    // 临时变量
    double m_currentTimer;
    
    // 常量
    static const int DEFAULT_ROWS = 1600;
    static const int DEFAULT_COLS = 1200;
    static const double DEFAULT_MAX_VAL;
    static const int DEFAULT_PATCH_SIZE = 7;
    static const int DEFAULT_SIGMA_PATCH_SIZE = 8;
    static const double DEFAULT_H_FACTOR;
    static const double DEFAULT_DIFF_SIGMA;
};

#endif // NLM_CPU_H
1.2.2 NlmCPU.cpp
// NlmCPU.cpp
#include "NlmCPU.h"
#include <iostream>
#include <fstream>
#include <iomanip>
#include <chrono>

const double NlmCPUProcessor::DEFAULT_MAX_VAL = 1023.0;
const double NlmCPUProcessor::DEFAULT_H_FACTOR = 0.8;
const double NlmCPUProcessor::DEFAULT_DIFF_SIGMA = 2.5;

// 构造函数
NlmCPUProcessor::NlmCPUProcessor(int rows, int cols, double maxVal)
    : m_rows(rows), m_cols(cols), m_maxVal(maxVal),
      m_patchSize(DEFAULT_PATCH_SIZE),
      m_sigmaEstPatchSize(DEFAULT_SIGMA_PATCH_SIZE),
      m_hFactor(DEFAULT_H_FACTOR),
      m_diffSigma(DEFAULT_DIFF_SIGMA),
      m_sigmaEst(0.0), m_hFinal(0.0), m_currentTimer(0.0) {
    m_timings.resize(6, 0.0); 
}

// 清空结果
void NlmCPUProcessor::clearResults() {
    m_rawBuffer.release();
    m_noisyImage.release();
    m_stack.clear();
    m_meanImage.release();
    m_meanImage8u.release();
    m_structureClean.release();
    
    m_sigmaEst = 0.0;
    m_hFinal = 0.0;
    std::fill(m_timings.begin(), m_timings.end(), 0.0);
}

// 更新时间统计
void NlmCPUProcessor::updateTiming(double time) {
    m_timings.push_back(time);
}

// 主处理函数
cv::Mat NlmCPUProcessor::denoiseRaw9Ch(const std::string& path) {
    clearResults();
    double totalTimeStart = (double)cv::getTickCount();
    
    // 1. 读取数据
    double stageTimeStart = (double)cv::getTickCount();
    readRawData(path);
    double readTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[0] = readTime;
    
    // 2. 通道拆分
    stageTimeStart = (double)cv::getTickCount();
    splitChannels();
    double splitTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[1] = splitTime;
    
    // 3. 计算均值
    stageTimeStart = (double)cv::getTickCount();
    computeMean();
    double meanTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[2] = meanTime;
    
    // 4. 噪声估计
    stageTimeStart = (double)cv::getTickCount();
    estimateNoise();
    double noiseTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[3] = noiseTime;
    
    // 5. NLM去噪
    stageTimeStart = (double)cv::getTickCount();
    applyNlmDenoising();
    double nlmTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[4] = nlmTime;

    // 总时间
    double totalTime = ((double)cv::getTickCount() - totalTimeStart) / cv::getTickFrequency();
    m_timings[5] = totalTime;

    return m_structureClean;
}

// 读取RAW数据
void NlmCPUProcessor::readRawData(const std::string& path) {
    std::ifstream fileStream(path, std::ios::binary);
    if (!fileStream) {
        throw std::runtime_error("Cannot open file: " + path);
    }
    
    // 读取原始数据
    std::vector<uint16_t> buffer(m_rows * m_cols);
    fileStream.read((char*)buffer.data(), m_rows * m_cols * sizeof(uint16_t));
    fileStream.close();
    
    // 存储原始缓冲区(用于调试)
    m_rawBuffer = cv::Mat(m_rows * m_cols, 1, CV_16UC1, buffer.data()).clone();
    
    // 修正读取映射逻辑
    m_noisyImage = cv::Mat::zeros(m_rows, m_cols, CV_64F);
    for (int col = 0; col < m_cols; col++) {
        for (int row = 0; row < m_rows; row++) {
            m_noisyImage.at<double>(row, col) = (double)buffer[col * m_rows + row];
        }
    }
}

// 通道拆分
void NlmCPUProcessor::splitChannels() {
    int subHeight = m_rows / 3;
    int subWidth = m_cols / 3;
    
    m_stack.resize(9);
    int index = 0;
    
    for (int rowGroup = 0; rowGroup < 3; rowGroup++) {
        for (int colGroup = 0; colGroup < 3; colGroup++) {
            cv::Mat subImage(subHeight, subWidth, CV_64F);
            for (int i = 0; i < subHeight; i++) {
                for (int j = 0; j < subWidth; j++) {
                    subImage.at<double>(i, j) = 
                        m_noisyImage.at<double>(rowGroup + i * 3, colGroup + j * 3);
                }
            }
            m_stack[index++] = subImage;
        }
    }
}

// 计算均值
void NlmCPUProcessor::computeMean() {
    int subHeight = m_rows / 3;
    int subWidth = m_cols / 3;
    
    m_meanImage = cv::Mat::zeros(subHeight, subWidth, CV_64F);
    for (int i = 0; i < 9; i++) {
        m_meanImage += m_stack[i];
    }
    m_meanImage /= 9.0;
}

// 噪声估计
void NlmCPUProcessor::estimateNoise() {
    m_sigmaEst = noiseEstimation(m_meanImage, m_sigmaEstPatchSize);
    m_hFinal = m_hFactor * m_sigmaEst * (m_maxVal / 1023.0);
}

// NLM去噪
void NlmCPUProcessor::applyNlmDenoising() {
    double scale = 255.0 / m_maxVal;
    m_meanImage.convertTo(m_meanImage8u, CV_8U, scale);
    
    cv::Mat clean8u;
    cv::fastNlMeansDenoising(m_meanImage8u, clean8u, 
                             (float)(m_hFinal * scale), 
                             m_patchSize, m_patchSize);
    
    clean8u.convertTo(m_structureClean, CV_64F, 1.0 / scale);
}

// 噪声估计算法
double NlmCPUProcessor::noiseEstimation(const cv::Mat& noisyImage, int patchSize) {
    if (patchSize <= 0) {
        patchSize = m_sigmaEstPatchSize;
    }
    
    int height = noisyImage.rows;
    int width = noisyImage.cols;

    // 中心区域裁剪
    int centerHeight = cvRound(height / 2.0);
    int centerWidth = cvRound(width / 2.0);
    int cropRadius = std::min(200, cvRound(std::min(height, width) / 4.0));

    int rowStart = std::max(0, centerHeight - cropRadius - 1);
    int rowEnd = std::min(height - 1, centerHeight + cropRadius - 1);
    int colStart = std::max(0, centerWidth - cropRadius - 1);
    int colEnd = std::min(width - 1, centerWidth + cropRadius - 1);
    
    cv::Mat croppedImage = noisyImage(cv::Range(rowStart, rowEnd + 1), 
                                     cv::Range(colStart, colEnd + 1));

    // 模拟 im2col sliding
    cv::Mat patches;
    for (int col = 0; col <= croppedImage.cols - patchSize; col++) {
        for (int row = 0; row <= croppedImage.rows - patchSize; row++) {
            cv::Mat patch = croppedImage(cv::Rect(col, row, patchSize, patchSize)).clone();
            patches.push_back(patch.reshape(1, 1)); // 展平为行向量
        }
    }
    patches = patches.t(); // [PatchSize*PatchSize, NumPatches]

    // 降采样 1:5:end
    cv::Mat subSampledPatches;
    for (int col = 0; col < patches.cols; col += 5) {
        subSampledPatches.push_back(patches.col(col).t());
    }
    subSampledPatches = subSampledPatches.t();

    // 去均值
    cv::Mat columnMean;
    cv::reduce(subSampledPatches, columnMean, 1, cv::REDUCE_AVG);
    for (int col = 0; col < subSampledPatches.cols; col++) {
        subSampledPatches.col(col) -= columnMean;
    }

    // 协方差与特征值
    cv::Mat covariance = (subSampledPatches * subSampledPatches.t()) / 
                        (double)(subSampledPatches.cols - 1);
    cv::Mat eigenvalues;
    cv::eigen(covariance, eigenvalues); // OpenCV eigen 返回降序排列的特征值

    // 取最小的 10%
    int numEigenvalues = eigenvalues.rows;
    int count = std::max(1, cvRound(0.1 * numEigenvalues));
    double sumMinEigenvalues = 0;
    for (int i = numEigenvalues - count; i < numEigenvalues; i++) {
        sumMinEigenvalues += eigenvalues.at<double>(i);
    }

    return sqrt(std::max(sumMinEigenvalues / count, 0.0));
}

// 重建图像
cv::Mat NlmCPUProcessor::reconstructImage() {
    cv::Mat denoisedImage = m_noisyImage.clone();
    int subHeight = m_rows / 3;
    int subWidth = m_cols / 3;
    
    int index = 0;
    for (int rowGroup = 0; rowGroup < 3; rowGroup++) {
        for (int colGroup = 0; colGroup < 3; colGroup++) {
            cv::Mat diffMap = m_stack[index] - m_meanImage;
            cv::Mat cleanDiffMap;
            cv::GaussianBlur(diffMap, cleanDiffMap, cv::Size(0, 0), m_diffSigma);
            
            cv::Mat recovered = m_structureClean + cleanDiffMap;
            
            for (int i = 0; i < subHeight; i++) {
                for (int j = 0; j < subWidth; j++) {
                    denoisedImage.at<double>(rowGroup + i * 3, colGroup + j * 3) = 
                        recovered.at<double>(i, j);
                }
            }
            index++;
        }
    }

    cv::threshold(denoisedImage, denoisedImage, m_maxVal, m_maxVal, cv::THRESH_TRUNC);
    cv::threshold(denoisedImage, denoisedImage, 0, 0, cv::THRESH_TOZERO);

    return denoisedImage;
}

// 保存中间结果
void NlmCPUProcessor::saveIntermediateResults(const std::string& prefix) const {
    std::cout << "Saving intermediate results with prefix: " << prefix << std::endl;
    
    if (!m_noisyImage.empty()) {
        cv::Mat noisyNormalized;
        cv::normalize(m_noisyImage, noisyNormalized, 0, 255, cv::NORM_MINMAX, CV_8U);
        cv::imwrite("./images/" + prefix + "_noisy.png", noisyNormalized);
    }
    
    if (!m_meanImage.empty()) {
        cv::Mat meanNormalized;
        cv::normalize(m_meanImage, meanNormalized, 0, 255, cv::NORM_MINMAX, CV_8U);
        cv::imwrite("./images/" + prefix + "_mean.png", meanNormalized);
    }
    
    if (!m_meanImage8u.empty()) {
        cv::imwrite("./images/" + prefix + "_mean_8u.png", m_meanImage8u);
    }
    
    if (!m_structureClean.empty()) {
        cv::Mat structureNormalized;
        cv::normalize(m_structureClean, structureNormalized, 0, 255, cv::NORM_MINMAX, CV_8U);
        cv::imwrite("./images/" + prefix + "_structure_clean.png", structureNormalized);
    }
    
    // 保存堆栈中的每个通道
    for (size_t i = 0; i < m_stack.size(); i++) {
        if (!m_stack[i].empty()) {
            cv::Mat channelNormalized;
            cv::normalize(m_stack[i], channelNormalized, 0, 255, cv::NORM_MINMAX, CV_8U);
            cv::imwrite("./images/" + prefix + "_stack_" + std::to_string(i) + ".png", 
                       channelNormalized);
        }
    }
    
    // 保存统计数据
    std::ofstream statsFile(prefix + "_stats.txt");
    if (statsFile.is_open()) {
        statsFile << "CPU Processing Statistics\n";
        statsFile << "=======================\n\n";
        statsFile << "Parameters:\n";
        statsFile << "  Rows: " << m_rows << "\n";
        statsFile << "  Cols: " << m_cols << "\n";
        statsFile << "  MaxVal: " << m_maxVal << "\n";
        statsFile << "  PatchSize: " << m_patchSize << "\n";
        statsFile << "  SigmaEstPatchSize: " << m_sigmaEstPatchSize << "\n";
        statsFile << "  HFactor: " << m_hFactor << "\n";
        statsFile << "  DiffSigma: " << m_diffSigma << "\n\n";
        
        statsFile << "Results:\n";
        statsFile << "  SigmaEst: " << m_sigmaEst << "\n";
        statsFile << "  HFinal: " << m_hFinal << "\n\n";
        
        statsFile << "Timings (seconds):\n";
        const char* stageNames[] = {
            "Read Data", "Split Channels", "Compute Mean", "Noise Estimation",
            "NLM Denoising", "Reconstruction", "Truncation", "Total"
        };
        
        for (size_t i = 0; i < std::min(m_timings.size(), 
               sizeof(stageNames)/sizeof(stageNames[0])); i++) {
            statsFile << "  " << stageNames[i] << ": " 
                     << std::fixed << std::setprecision(6) << m_timings[i] << "\n";
        }
        statsFile.close();
    }
}

// 打印统计信息
void NlmCPUProcessor::printStatistics() const {
    std::cout << "\n=== CPU Processing Statistics ===" << std::endl;
    std::cout << "Parameters:" << std::endl;
    std::cout << "  Rows: " << m_rows << std::endl;
    std::cout << "  Cols: " << m_cols << std::endl;
    std::cout << "  MaxVal: " << m_maxVal << std::endl;
    std::cout << "  PatchSize: " << m_patchSize << std::endl;
    std::cout << "  SigmaEstPatchSize: " << m_sigmaEstPatchSize << std::endl;
    std::cout << "  HFactor: " << m_hFactor << std::endl;
    std::cout << "  DiffSigma: " << m_diffSigma << std::endl;
    
    std::cout << "\nResults:" << std::endl;
    std::cout << "  SigmaEst: " << m_sigmaEst << std::endl;
    std::cout << "  HFinal: " << m_hFinal << std::endl;
    
    std::cout << "\nTimings (seconds):" << std::endl;
    const char* stageNames[] = {
        "1. Read Data", "2. Split Channels", "3. Compute Mean", "4. Noise Estimation",
        "5. NLM Denoising", "6. Total"
    };
    
    double sumStageTime = 0;
    for (size_t i = 0; i < m_timings.size() - 1; i++) {
        std::cout << "  " << stageNames[i] << ": " 
                 << std::fixed << std::setprecision(6) << m_timings[i] << std::endl;
        if (i < m_timings.size() - 1) {
            sumStageTime += m_timings[i];
        }
    }
    
    std::cout << "  " << stageNames[m_timings.size() - 1] << ": " 
             << std::fixed << std::setprecision(6) << m_timings.back() << std::endl;
    
    if (m_timings.size() > 1) {
        std::cout << "  Sum of stages: " << sumStageTime << std::endl;
    }
    std::cout << "=====================================================" << std::endl;
}

1.3 GPU实现

1.3.1 NlmGPU.h
// NlmGPU.h
#ifndef NLM_GPU_H
#define NLM_GPU_H

#include <opencv2/opencv.hpp>
#include <opencv2/core/ocl.hpp>
#include <string>
#include <vector>

class NlmGPUProcessor {
public:
    // 构造函数
    NlmGPUProcessor(int rows = 1600, int cols = 1200, float maxVal = 1023.0f);
    
    // 主处理函数 - 改为小写开头
    cv::UMat denoiseRaw9Ch(const std::string& path);
    
    // 获取中间结果 - 方法名修改
    cv::UMat getNoisyImageGpu() const { return m_noisyImageGpu; }
    cv::UMat getMeanImageGpu() const { return m_meanImageGpu; }
    cv::UMat getStructureCleanGpu() const { return m_structureCleanGpu; }
    std::vector<cv::UMat> getStackGpu() const { return m_stackGpu; }
    
    // 获取统计数据
    float getSigmaEst() const { return m_sigmaEst; }
    float getHFinal() const { return m_hFinal; }
    bool isOpenCLEnabled() const { return m_openclEnabled; }
    
    // 设置参数
    void setRows(int rows) { m_rows = rows; }
    void setCols(int cols) { m_cols = cols; }
    void setMaxVal(float maxVal) { m_maxVal = maxVal; }
    void setPatchSize(int patchSize) { m_patchSize = patchSize; }
    void setSigmaEstimationPatchSize(int patchSize) { m_sigmaEstPatchSize = patchSize; }
    void setHFactor(double hFactor) { m_hFactor = hFactor; }
    void setDiffSigma(double diffSigma) { m_diffSigma = diffSigma; }
    
    // 噪声估计函数
    double noiseEstimation(const cv::Mat& noisyImage, int patchSize = -1);
    
    // 保存中间结果
    void saveIntermediateResults(const std::string& prefix = "gpu") const;
    
    // 打印统计信息
    void printStatistics() const;
    
private:
    // 核心处理函数
    void readRawData(const std::string& path);
    void splitChannelsGpu();  // 函数名修改
    void computeMeanGpu();    // 函数名修改
    void estimateNoise();
    void applyNlmDenoisingGpu();  // 函数名修改
    
    // 工具函数
    void clearResults();
    void updateTiming(double time);
    
private:
    // 输入参数
    int m_rows;
    int m_cols;
    float m_maxVal;
    int m_patchSize;
    int m_sigmaEstPatchSize;
    double m_hFactor;
    double m_diffSigma;
    bool m_openclEnabled;
    
    // GPU中间结果
    cv::UMat m_noisyImageGpu;
    std::vector<cv::UMat> m_stackGpu;
    cv::UMat m_meanImageGpu;
    cv::UMat m_meanImage8uGpu;
    cv::UMat m_structureCleanGpu;
    
    // 统计信息
    double m_sigmaEst;
    double m_hFinal;
    std::vector<double> m_timings;  // 各个阶段的时间消耗
    
    // 临时变量
    double m_currentTimer;
    
    // 常量
    static const int DEFAULT_ROWS = 1600;
    static const int DEFAULT_COLS = 1200;
    static const double DEFAULT_MAX_VAL;
    static const int DEFAULT_PATCH_SIZE = 7;
    static const int DEFAULT_SIGMA_PATCH_SIZE = 8;
    static const double DEFAULT_H_FACTOR;
    static const double DEFAULT_DIFF_SIGMA;
};

#endif // NLM_GPU_H
1.3.2 NlmGPU.cpp
// NlmGPU.cpp
#include "NlmGPU.h"
#include <fstream>
#include <iostream>

const double NlmGPUProcessor::DEFAULT_MAX_VAL = 1023.0;
const double NlmGPUProcessor::DEFAULT_H_FACTOR = 0.8;
const double NlmGPUProcessor::DEFAULT_DIFF_SIGMA = 2.5;

// 构造函数
NlmGPUProcessor::NlmGPUProcessor(int rows, int cols, float maxVal)
    : m_rows(rows), m_cols(cols), m_maxVal(maxVal),
      m_patchSize(DEFAULT_PATCH_SIZE),
      m_sigmaEstPatchSize(DEFAULT_SIGMA_PATCH_SIZE),
      m_hFactor(DEFAULT_H_FACTOR),
      m_diffSigma(DEFAULT_DIFF_SIGMA),
      m_sigmaEst(0.0f), m_hFinal(0.0f), 
      m_currentTimer(0.0), m_openclEnabled(false) {
    
    cv::ocl::setUseOpenCL(true);
    m_openclEnabled = cv::ocl::useOpenCL();
    m_timings.resize(6, 0.0);  
}

// 清空结果
void NlmGPUProcessor::clearResults() {
    m_noisyImageGpu.release();    
    m_stackGpu.clear();
    m_meanImageGpu.release();
    m_meanImage8uGpu.release();
    m_structureCleanGpu.release();
    
    m_sigmaEst = 0.0;
    m_hFinal = 0.0;
    std::fill(m_timings.begin(), m_timings.end(), 0.0);
}

// 更新时间统计
void NlmGPUProcessor::updateTiming(double time) {
    m_timings.push_back(time);
}

// 主处理函数
cv::UMat NlmGPUProcessor::denoiseRaw9Ch(const std::string& path) {
    clearResults();
    double totalTimeStart = (double)cv::getTickCount();

    if (!m_openclEnabled) {
        std::cerr << "Error: GPU version returned empty result" << std::endl;  
        return cv::UMat();        
    }
    
    // 1. 读取数据
    double stageTimeStart = (double)cv::getTickCount();
    readRawData(path);
    double readTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[0] = readTime;

    // 2. 通道拆分
    stageTimeStart = (double)cv::getTickCount();
    splitChannelsGpu();
    double splitTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[1] = splitTime;
    
    // 3. 计算均值
    stageTimeStart = (double)cv::getTickCount();
    computeMeanGpu();
    double meanTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[2] = meanTime;
    
    // 4. 噪声估计
    stageTimeStart = (double)cv::getTickCount();
    estimateNoise();
    double noiseTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[3] = noiseTime;

    // 5. NLM去噪
    stageTimeStart = (double)cv::getTickCount();
    applyNlmDenoisingGpu();
    double nlmTime = ((double)cv::getTickCount() - stageTimeStart) / cv::getTickFrequency();
    m_timings[4] = nlmTime;
    
    // 总时间
    double totalTime = ((double)cv::getTickCount() - totalTimeStart) / cv::getTickFrequency();
    m_timings[5] = totalTime;

    return m_structureCleanGpu;
}

// 读取RAW数据
void NlmGPUProcessor::readRawData(const std::string& path) {
    std::ifstream fileStream(path, std::ios::binary);
    if (!fileStream) {
        throw std::runtime_error("Cannot open file: " + path);
    }
    
    std::vector<uint16_t> buffer(m_rows * m_cols);
    fileStream.read(reinterpret_cast<char*>(buffer.data()), 
                    m_rows * m_cols * sizeof(uint16_t));
    fileStream.close();
    
    cv::Mat tempMat(m_cols, m_rows, CV_16UC1, buffer.data());
    cv::UMat tempGpu, transposedGpu;
    tempMat.copyTo(tempGpu);
    
    cv::transpose(tempGpu, transposedGpu);
    transposedGpu.convertTo(m_noisyImageGpu, CV_32F);
}

// 通道拆分
void NlmGPUProcessor::splitChannelsGpu()
{
    CV_Assert(m_noisyImageGpu.type() == CV_32F);

    const int subHeight = m_rows / 3;
    const int subWidth = m_cols / 3;

    // 创建输出
    m_stackGpu.resize(9);
    for (int i = 0; i < 9; ++i) {
        m_stackGpu[i].create(subHeight, subWidth, CV_32F);
    }

    m_meanImageGpu.create(subHeight, subWidth, CV_32F);

    // OpenCL kernel
    static const char* kernelSource = R"(
    #define WG_X 8
    #define WG_Y 8

    __kernel void split_and_mean_tile(
        __global const float* src,
        int src_step,
        __global float* dst0,
        __global float* dst1,
        __global float* dst2,
        __global float* dst3,
        __global float* dst4,
        __global float* dst5,
        __global float* dst6,
        __global float* dst7,
        __global float* dst8,
        int dst_step,
        __global float* mean,
        int mean_step,
        int rows,
        int cols
    )
    {
        const int localX = get_local_id(1);
        const int localY = get_local_id(0);
        const int groupX = get_group_id(1) * WG_X;
        const int groupY = get_group_id(0) * WG_Y;

        __local float tile[WG_Y*3][WG_X*3];

        // 加载 tile
        for (int dy = localY; dy < WG_Y*3; dy += WG_Y) {
            int iy = groupY*3 + dy;
            if (iy < rows) {
                __global const float* srcRow =
                    (__global const float*)((__global const char*)src + iy * src_step);

                for (int dx = localX; dx < WG_X*3; dx += WG_X) {
                    int ix = groupX*3 + dx;
                    if (ix < cols)
                        tile[dy][dx] = srcRow[ix];
                }
            }
        }

        barrier(CLK_LOCAL_MEM_FENCE);

        // 存储结果
        int y = groupY + localY;
        int x = groupX + localX;

        if (localY < WG_Y && localX < WG_X &&
            (y*3 + 2) < rows && (x*3 + 2) < cols)
        {
            float v0 = tile[localY*3+0][localX*3+0];
            float v1 = tile[localY*3+0][localX*3+1];
            float v2 = tile[localY*3+0][localX*3+2];
            float v3 = tile[localY*3+1][localX*3+0];
            float v4 = tile[localY*3+1][localX*3+1];
            float v5 = tile[localY*3+1][localX*3+2];
            float v6 = tile[localY*3+2][localX*3+0];
            float v7 = tile[localY*3+2][localX*3+1];
            float v8 = tile[localY*3+2][localX*3+2];

            __global float* d0 = (__global float*)((__global char*)dst0 + y * dst_step);
            __global float* d1 = (__global float*)((__global char*)dst1 + y * dst_step);
            __global float* d2 = (__global float*)((__global char*)dst2 + y * dst_step);
            __global float* d3 = (__global float*)((__global char*)dst3 + y * dst_step);
            __global float* d4 = (__global float*)((__global char*)dst4 + y * dst_step);
            __global float* d5 = (__global float*)((__global char*)dst5 + y * dst_step);
            __global float* d6 = (__global float*)((__global char*)dst6 + y * dst_step);
            __global float* d7 = (__global float*)((__global char*)dst7 + y * dst_step);
            __global float* d8 = (__global float*)((__global char*)dst8 + y * dst_step);

            d0[x] = v0; d1[x] = v1; d2[x] = v2;
            d3[x] = v3; d4[x] = v4; d5[x] = v5;
            d6[x] = v6; d7[x] = v7; d8[x] = v8;

            __global float* m =
                (__global float*)((__global char*)mean + y * mean_step);
            m[x] = (v0+v1+v2+v3+v4+v5+v6+v7+v8) * (1.0f/9.0f);
        }
    }
    )";

    // 构建程序
    cv::ocl::ProgramSource programSource(kernelSource);
    cv::String buildError;
    cv::ocl::Program program =
        cv::ocl::Context::getDefault().getProg(programSource, "", buildError);

    CV_Assert(program.ptr());

    cv::ocl::Kernel kernel("split_and_mean_tile", program);

    kernel.args(
        cv::ocl::KernelArg::PtrReadOnly(m_noisyImageGpu),
        (int)m_noisyImageGpu.step,
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[0]),
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[1]),
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[2]),
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[3]),
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[4]),
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[5]),
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[6]),
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[7]),
        cv::ocl::KernelArg::PtrWriteOnly(m_stackGpu[8]),
        (int)m_stackGpu[0].step,
        cv::ocl::KernelArg::PtrWriteOnly(m_meanImageGpu),
        (int)m_meanImageGpu.step,
        m_rows,
        m_cols
    );

    // 启动 kernel
    size_t localSize[2] = { 8, 8 };
    size_t globalSize[2] = {
        (size_t)((subHeight + 7) / 8) * 8,
        (size_t)((subWidth + 7) / 8) * 8
    };

    kernel.run(2, globalSize, localSize, false);
}

// 计算均值
void NlmGPUProcessor::computeMeanGpu() {
    cv::UMat sum;
    m_stackGpu[0].copyTo(sum);
    
    for (int i = 1; i < 9; i++) {
        cv::add(sum, m_stackGpu[i], sum);
    }
    
    cv::multiply(sum, 1.0f/9.0f, m_meanImageGpu);
}

// 噪声估计
void NlmGPUProcessor::estimateNoise() {
    cv::Mat meanCpu;
    m_meanImageGpu.copyTo(meanCpu);

    m_sigmaEst = noiseEstimation(meanCpu, m_sigmaEstPatchSize);
    m_hFinal = m_hFactor * m_sigmaEst * (m_maxVal / 1023.0f);
}

// NLM去噪
void NlmGPUProcessor::applyNlmDenoisingGpu() {
    float scale = 255.0f / m_maxVal;
    m_meanImageGpu.convertTo(m_meanImage8uGpu, CV_8U, scale);
    
    cv::UMat clean8uGpu;
    cv::fastNlMeansDenoising(m_meanImage8uGpu, clean8uGpu, 
                             (float)(m_hFinal * scale), 
                             m_patchSize, m_patchSize);
    
    clean8uGpu.convertTo(m_structureCleanGpu, CV_32F, 1.0f / scale);
}

// 噪声估计算法
double NlmGPUProcessor::noiseEstimation(const cv::Mat& noisyImage, int patchSize) {
    if (patchSize <= 0) {
        patchSize = m_sigmaEstPatchSize;
    }
    
    int height = noisyImage.rows;
    int width = noisyImage.cols;

    // 中心区域裁剪
    int centerHeight = cvRound(height / 2.0);
    int centerWidth = cvRound(width / 2.0);
    int cropRadius = std::min(200, cvRound(std::min(height, width) / 4.0));

    int rowStart = std::max(0, centerHeight - cropRadius - 1);
    int rowEnd = std::min(height - 1, centerHeight + cropRadius - 1);
    int colStart = std::max(0, centerWidth - cropRadius - 1);
    int colEnd = std::min(width - 1, centerWidth + cropRadius - 1);
    
    cv::Mat croppedImage = noisyImage(cv::Range(rowStart, rowEnd + 1), 
                                     cv::Range(colStart, colEnd + 1));

    // 模拟 im2col sliding
    cv::Mat patches;
    for (int col = 0; col <= croppedImage.cols - patchSize; col++) {
        for (int row = 0; row <= croppedImage.rows - patchSize; row++) {
            cv::Mat patch = croppedImage(cv::Rect(col, row, patchSize, patchSize)).clone();
            patches.push_back(patch.reshape(1, 1)); // 展平为行向量
        }
    }
    patches = patches.t(); // [PatchSize*PatchSize, NumPatches]

    // 降采样 1:5:end
    cv::Mat subSampledPatches;
    for (int col = 0; col < patches.cols; col += 5) {
        subSampledPatches.push_back(patches.col(col).t());
    }
    subSampledPatches = subSampledPatches.t();

    // 去均值
    cv::Mat columnMean;
    cv::reduce(subSampledPatches, columnMean, 1, cv::REDUCE_AVG);
    for (int col = 0; col < subSampledPatches.cols; col++) {
        subSampledPatches.col(col) -= columnMean;
    }

    // 协方差与特征值
    cv::Mat covariance = (subSampledPatches * subSampledPatches.t()) / 
                        (double)(subSampledPatches.cols - 1);
    cv::Mat eigenvalues;
    cv::eigen(covariance, eigenvalues); // OpenCV eigen 返回降序排列的特征值

    // 取最小的 10%
    int numEigenvalues = eigenvalues.rows;
    int count = std::max(1, cvRound(0.1 * numEigenvalues));
    double sumMinEigenvalues = 0;
    for (int i = numEigenvalues - count; i < numEigenvalues; i++) {
        sumMinEigenvalues += eigenvalues.at<double>(i);
    }

    return sqrt(std::max(sumMinEigenvalues / count, 0.0));
}

// 保存中间结果
void NlmGPUProcessor::saveIntermediateResults(const std::string& prefix) const {
    std::cout << "Saving intermediate results with prefix: " << prefix << std::endl;
    
    if (!m_noisyImageGpu.empty()) {
        cv::Mat noisyNormalized;
        cv::normalize(m_noisyImageGpu, noisyNormalized, 0, 255, cv::NORM_MINMAX, CV_8U);
        cv::imwrite("./images/" + prefix + "_noisy.png", noisyNormalized);
    }
    
    if (!m_meanImageGpu.empty()) {
        cv::Mat meanNormalized;
        cv::normalize(m_meanImageGpu, meanNormalized, 0, 255, cv::NORM_MINMAX, CV_8U);
        cv::imwrite("./images/" + prefix + "_mean.png", meanNormalized);
    }
    
    if (!m_meanImage8uGpu.empty()) {
        cv::imwrite("./images/" + prefix + "_mean_8u.png", m_meanImage8uGpu);
    }
    
    if (!m_structureCleanGpu.empty()) {
        cv::Mat structureNormalized;
        cv::normalize(m_structureCleanGpu, structureNormalized, 0, 255, cv::NORM_MINMAX, CV_8U);
        cv::imwrite("./images/" + prefix + "_structure_clean.png", structureNormalized);
    }
    
    // 保存堆栈中的每个通道
    for (size_t i = 0; i < m_stackGpu.size(); i++) {
        if (!m_stackGpu[i].empty()) {
            cv::Mat channelNormalized;
            cv::normalize(m_stackGpu[i], channelNormalized, 0, 255, cv::NORM_MINMAX, CV_8U);
            cv::imwrite("./images/" + prefix + "_stack_" + std::to_string(i) + ".png", 
                       channelNormalized);
        }
    }
    
    // 保存统计数据
    std::ofstream statsFile(prefix + "_stats_gpu.txt");
    if (statsFile.is_open()) {
        statsFile << "GPU Processing Statistics\n";
        statsFile << "=======================\n\n";
        statsFile << "Parameters:\n";
        statsFile << "  Rows: " << m_rows << "\n";
        statsFile << "  Cols: " << m_cols << "\n";
        statsFile << "  MaxVal: " << m_maxVal << "\n";
        statsFile << "  PatchSize: " << m_patchSize << "\n";
        statsFile << "  SigmaEstPatchSize: " << m_sigmaEstPatchSize << "\n";
        statsFile << "  HFactor: " << m_hFactor << "\n";
        statsFile << "  DiffSigma: " << m_diffSigma << "\n\n";
        
        statsFile << "Results:\n";
        statsFile << "  SigmaEst: " << m_sigmaEst << "\n";
        statsFile << "  HFinal: " << m_hFinal << "\n\n";
        
        statsFile << "Timings (seconds):\n";
        const char* stageNames[] = {
            "Read Data", "Split Channels", "Compute Mean", "Noise Estimation",
            "NLM Denoising", "Total"
        };
        
        for (size_t i = 0; i < std::min(m_timings.size(), 
               sizeof(stageNames)/sizeof(stageNames[0])); i++) {
            statsFile << "  " << stageNames[i] << ": " 
                     << std::fixed << std::setprecision(6) << m_timings[i] << "\n";
        }
        statsFile.close();
    }
}

// 打印统计信息
void NlmGPUProcessor::printStatistics() const {
    std::cout << "\n=== GPU Processing Statistics ===" << std::endl;
    std::cout << "Parameters:" << std::endl;
    std::cout << "  Rows: " << m_rows << std::endl;
    std::cout << "  Cols: " << m_cols << std::endl;
    std::cout << "  MaxVal: " << m_maxVal << std::endl;
    std::cout << "  PatchSize: " << m_patchSize << std::endl;
    std::cout << "  SigmaEstPatchSize: " << m_sigmaEstPatchSize << std::endl;
    std::cout << "  HFactor: " << m_hFactor << std::endl;
    std::cout << "  DiffSigma: " << m_diffSigma << std::endl;
    
    std::cout << "\nResults:" << std::endl;
    std::cout << "  SigmaEst: " << m_sigmaEst << std::endl;
    std::cout << "  HFinal: " << m_hFinal << std::endl;
    
    std::cout << "\nTimings (seconds):" << std::endl;
    const char* stageNames[] = {
        "1. Read Data", "2. Split Channels", "3. Compute Mean", "4. Noise Estimation",
        "5. NLM Denoising", "6. Total"
    };
    
    double sumStageTime = 0;
    for (size_t i = 0; i < m_timings.size() - 1; i++) {
        std::cout << "  " << stageNames[i] << ": " 
                 << std::fixed << std::setprecision(6) << m_timings[i] << std::endl;
        if (i < m_timings.size() - 1) {
            sumStageTime += m_timings[i];
        }
    }
    
    std::cout << "  " << stageNames[m_timings.size() - 1] << ": " 
             << std::fixed << std::setprecision(6) << m_timings.back() << std::endl;
    
    if (m_timings.size() > 1) {
        std::cout << "  Sum of stages: " << sumStageTime << std::endl;
    }
    std::cout << "=====================================================" << std::endl;
}

1.4 Compare

1.4.1 Compare.h
// Compare.h
#ifndef COMPARE_H
#define COMPARE_H

#include <opencv2/opencv.hpp>
#include <string>
#include <cmath>

/**
 * @brief 比较CPU和GPU处理结果的函数
 * 
 * @param cpuMat CPU处理结果(cv::Mat)
 * @param gpuUmat GPU处理结果(cv::UMat)
 * @param name 比较对象的名称(用于输出标识)
 * @param tolerance 容差值,默认1e-5
 */
void compareCPUvsGPU(const cv::Mat& cpuMat, const cv::UMat& gpuUmat, 
                     const std::string& name, float tolerance = 1e-5f);

#endif // COMPARE_H
1.4.2 Compare.cpp
// Compare.cpp
#include "Compare.h"
#include <iostream>
#include <iomanip>

// 增强版的比较函数,自动处理类型转换
void compareCPUvsGPU(const cv::Mat& cpuMat, const cv::UMat& gpuUmat, 
                     const std::string& name, float tolerance) {
    std::cout << "\n=== Comparing CPU vs GPU: " << name << " ===" << std::endl;
    
    // 下载GPU数据
    cv::Mat gpuMat;
    gpuUmat.copyTo(gpuMat);
    
    // 检查尺寸
    if (cpuMat.size() != gpuMat.size()) {
        std::cout << "ERROR: Size mismatch!" << std::endl;
        std::cout << "  CPU: " << cpuMat.rows << "x" << cpuMat.cols << std::endl;
        std::cout << "  GPU: " << gpuMat.rows << "x" << gpuMat.cols << std::endl;
        return;
    }
    
    // 输出类型信息
    std::cout << "CPU type: " << cpuMat.type() 
              << " (CV_64F=" << CV_64F << ", CV_32F=" << CV_32F 
              << ", CV_8U=" << CV_8U << ")" << std::endl;
    std::cout << "GPU type: " << gpuMat.type() << std::endl;
    
    // 处理类型不匹配:统一转换为float进行比较
    cv::Mat cpuFloatMat, gpuFloatMat;
    
    if (cpuMat.type() != gpuMat.type()) {
        std::cout << "INFO: Type mismatch, converting both to CV_32F for comparison" << std::endl;
        
        // 转换CPU数据
        if (cpuMat.depth() == CV_64F) {
            cpuMat.convertTo(cpuFloatMat, CV_32F);
        } else if (cpuMat.depth() == CV_8U) {
            cpuMat.convertTo(cpuFloatMat, CV_32F);
        } else {
            cpuFloatMat = cpuMat.clone();
        }
        
        // 转换GPU数据
        if (gpuMat.depth() == CV_64F) {
            gpuMat.convertTo(gpuFloatMat, CV_32F);
        } else if (gpuMat.depth() == CV_8U) {
            gpuMat.convertTo(gpuFloatMat, CV_32F);
        } else {
            gpuFloatMat = gpuMat.clone();
        }
        
        // 再次检查
        if (cpuFloatMat.type() != gpuFloatMat.type()) {
            std::cout << "ERROR: Failed to convert to same type!" << std::endl;
            return;
        }
    } else {
        // 类型相同,直接使用
        if (cpuMat.type() == CV_64F) {
            cpuMat.convertTo(cpuFloatMat, CV_32F);
            gpuMat.convertTo(gpuFloatMat, CV_32F);
        } else {
            cpuFloatMat = cpuMat.clone();
            gpuFloatMat = gpuMat.clone();
        }
    }
    
    std::cout << "After conversion: CPU type=" << cpuFloatMat.type() 
              << ", GPU type=" << gpuFloatMat.type() << std::endl;
    
    // 逐元素比较
    int mismatchCount = 0;
    float maxDiff = 0.0f;
    int maxDiffRow = -1, maxDiffCol = -1;
    float cpuMaxDiffVal = 0.0f, gpuMaxDiffVal = 0.0f;
    
    for (int row = 0; row < cpuFloatMat.rows; row++) {
        const float* cpuRow = cpuFloatMat.ptr<float>(row);
        const float* gpuRow = gpuFloatMat.ptr<float>(row);
        
        for (int col = 0; col < cpuFloatMat.cols; col++) {
            float diff = std::abs(cpuRow[col] - gpuRow[col]);
            
            if (diff > maxDiff) {
                maxDiff = diff;
                maxDiffRow = row;
                maxDiffCol = col;
                cpuMaxDiffVal = cpuRow[col];
                gpuMaxDiffVal = gpuRow[col];
            }
            
            if (diff > tolerance) {
                mismatchCount++;
                if (mismatchCount <= 5) {  // 只打印前5个不匹配
                    std::cout << "  Mismatch at [" << row << "," << col << "]: "
                              << "CPU=" << cpuRow[col] << ", GPU=" << gpuRow[col]
                              << ", diff=" << diff << std::endl;
                }
            }
        }
    }
    
    // 输出统计信息
    std::cout << "\nComparison Summary:" << std::endl;
    std::cout << "  Total elements: " << cpuFloatMat.total() << std::endl;
    std::cout << "  Mismatch count: " << mismatchCount << std::endl;
    std::cout << "  Mismatch percentage: " 
              << std::fixed << std::setprecision(4)
              << (100.0f * mismatchCount / cpuFloatMat.total()) << "%" << std::endl;
    std::cout << "  Max difference: " << maxDiff << std::endl;
    std::cout << "    at [" << maxDiffRow << "," << maxDiffCol << "]" << std::endl;
    std::cout << "    CPU: " << cpuMaxDiffVal << std::endl;
    std::cout << "    GPU: " << gpuMaxDiffVal << std::endl;
    
    // 计算整体差异
    cv::Mat diffMat;
    cv::absdiff(cpuFloatMat, gpuFloatMat, diffMat);
    double avgDiff = cv::mean(diffMat)[0];
    
    std::cout << "  Average difference: " << avgDiff << std::endl;
    
    if (mismatchCount == 0) {
        std::cout << "✓ CPU and GPU results match within tolerance!" << std::endl;
    } else if (mismatchCount < cpuFloatMat.total() * 0.01) {
        std::cout << "⚠ Minor differences (<1%) between CPU and GPU" << std::endl;
    } else {
        std::cout << "✗ Significant differences between CPU and GPU!" << std::endl;
    }
    std::cout << "===================================\n" << std::endl;
}

1.5 main.cpp

// main.cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core/ocl.hpp>
#include "NlmCPU.h"
#include "NlmGPU.h"
#include "Compare.h"

#define DEBUG 0

// --- 主要处理函数 ---
int main() {
    std::vector<cv::ocl::PlatformInfo> platforms;
    cv::ocl::getPlatfomsInfo(platforms);  // 注意:原代码拼写错误应为getPlatformsInfo
    
    if (platforms.empty()) {
        std::cerr << "No OpenCL platforms found!" << std::endl;
        return -1;
    }
    
    const cv::ocl::PlatformInfo* platform = &platforms[0];
    std::cout << "Platform Name: " << platform->name().c_str() << std::endl;

    cv::ocl::Device device;
    platform->getDevice(device, 0);
    std::cout << "Device name: " << device.name().c_str() << std::endl;
    
    cv::ocl::setUseOpenCL(true);
    std::cout << "Use OpenCL Device? " << cv::ocl::useOpenCL() << std::endl;

    cv::UMat displayFrame;
    
    // 创建处理器实例
    NlmGPUProcessor gpuProcessor;  // 改为驼峰命名
    gpuProcessor.setRows(1600);
    gpuProcessor.setCols(1200);

    NlmCPUProcessor cpuProcessor;  // 改为驼峰命名
    cpuProcessor.setRows(1600);
    cpuProcessor.setCols(1200);

    std::string path = "bai2.raw";
     while (true) {
        // CPU处理
        cv::Mat denoisedCpu = cpuProcessor.denoiseRaw9Ch(path);  // 函数名改为小写开头
        cpuProcessor.saveIntermediateResults();
        cpuProcessor.printStatistics();

        // GPU处理
        cv::UMat denoisedGpu = gpuProcessor.denoiseRaw9Ch(path);  // 函数名改为小写开头
        gpuProcessor.saveIntermediateResults();
        gpuProcessor.printStatistics();  

#if DEBUG
        compareCPUvsGPU(cpuProcessor.getNoisyImage(),gpuProcessor.getNoisyImageGPU(), "读取数据");
        compareCPUvsGPU(cpuProcessor.getStack().at(0),gpuProcessor.getStackGPU().at(0), "通道0");
        compareCPUvsGPU(cpuProcessor.getStack().at(1),gpuProcessor.getStackGPU().at(1), "通道1");
        compareCPUvsGPU(cpuProcessor.getStack().at(2),gpuProcessor.getStackGPU().at(2), "通道2");
        compareCPUvsGPU(cpuProcessor.getMeanImage(),gpuProcessor.getMeanImageGPU(), "均值");        
        compareCPUvsGPU(cpuProcessor.getStructureClean(),gpuProcessor.getStructureCleanGPU(), "NLM");
#endif

        if (denoisedGpu.empty()) {
            std::cerr << "无法读取文件或处理失败。" << std::endl;
        } else {
            // 转换回8U用于显示
            denoisedGpu.convertTo(displayFrame, CV_8U);
            
            // 显示结果
            cv::imshow("Denoised Frame", displayFrame);
        }

        int key = cv::waitKey(30);
        if (key == 27 || key == 'q' || key == 'Q') {
            break;
        }
    }

    // 关闭所有窗口
    cv::destroyAllWindows();
    return 0;
}

1.6 编译运行

pi@NanoPC-T6:/opt/opencl-project/nlm_project$ ./build/nlm_project
arm_release_ver: g13p0-01eac0, rk_so_ver: 10
Platform Name: ARM Platform
Device name: Mali-G610 r0p0
Use OpenCL Device? 1
Saving intermediate results with prefix: cpu

=== CPU Processing Statistics ===
Parameters:
  Rows: 1600
  Cols: 1200
  MaxVal: 1023
  PatchSize: 7
  SigmaEstPatchSize: 8
  HFactor: 0.8
  DiffSigma: 2.5

Results:
  SigmaEst: 2.42909
  HFinal: 1.94327

Timings (seconds):
  1. Read Data: 0.053899
  2. Split Channels: 0.036228
  3. Compute Mean: 0.003276
  4. Noise Estimation: 0.106404
  5. NLM Denoising: 0.066914
  6. Total: 0.266724
  Sum of stages: 0.266722
=====================================================
Saving intermediate results with prefix: gpu

=== GPU Processing Statistics ===
Parameters:
  Rows: 1600
  Cols: 1200
  MaxVal: 1023.000000
  PatchSize: 7
  SigmaEstPatchSize: 8
  HFactor: 0.800000
  DiffSigma: 2.500000

Results:
  SigmaEst: 0.000000
  HFinal: 0.000000

Timings (seconds):
  1. Read Data: 0.011386
  2. Split Channels: 0.042756
  3. Compute Mean: 0.000962
  4. Noise Estimation: 0.101066
  5. NLM Denoising: 0.001585
  6. Total: 0.157755
  Sum of stages: 0.157754
=====================================================
Gtk-Message: 16:07:40.455: Failed to load module "canberra-gtk-module"
Saving intermediate results with prefix: cpu

=== CPU Processing Statistics ===
Parameters:
  Rows: 1600
  Cols: 1200
  MaxVal: 1023.000000
  PatchSize: 7
  SigmaEstPatchSize: 8
  HFactor: 0.800000
  DiffSigma: 2.500000

Results:
  SigmaEst: 2.429091
  HFinal: 1.943273

Timings (seconds):
  1. Read Data: 0.046851
  2. Split Channels: 0.028041
  3. Compute Mean: 0.004390
  4. Noise Estimation: 0.102197
  5. NLM Denoising: 0.045189
  6. Total: 0.226671
  Sum of stages: 0.226668
=====================================================
Saving intermediate results with prefix: gpu

=== GPU Processing Statistics ===
Parameters:
  Rows: 1600
  Cols: 1200
  MaxVal: 1023.000000
  PatchSize: 7
  SigmaEstPatchSize: 8
  HFactor: 0.800000
  DiffSigma: 2.500000

Results:
  SigmaEst: 0.000000
  HFinal: 0.000000

Timings (seconds):
  1. Read Data: 0.011834
  2. Split Channels: 0.000097
  3. Compute Mean: 0.000463
  4. Noise Estimation: 0.087953
  5. NLM Denoising: 0.000411
  6. Total: 0.100760
  Sum of stages: 0.100759

posted @ 2025-12-31 00:21  大奥特曼打小怪兽  阅读(0)  评论(0)    收藏  举报
如果有任何技术小问题,欢迎大家交流沟通,共同进步