TensorRT&Sample&Python[introductory_parser_samples]


本文是基于TensorRT 5.0.2基础上,关于其内部的introductory_parser_samples例子的分析和介绍。

1 引言

假设当前路径为:

TensorRT-5.0.2.6/samples

其对应当前例子文件目录树为:

# tree python

python/
├── common.py
├── introductory_parser_samples
│   ├── caffe_resnet50.py
│   ├── onnx_resnet50.py
│   ├── README.md
│   ├── requirements.txt
│   └── uff_resnet50.py

该例子展示如何使用TensorRT和包含的对应解析器(UFF,Caffe,ONNX解析器),基于在不同框架下训练的ResNet-50结构来进行inference。

  • caffe_resnet50: 该例子展示如何构建基于Caffe解析器去解析Caffe训练的模型,并构建引擎然后进行inference;
  • onnx_resnet50:该例子展示如何基于开源的ONNX解析ONNX模型,并inference;
  • uff_resnet50: 该例子展示如何从一个UFF模型文件(从一个tf protobuf转换过来)构建引擎,然后inference。

2 caffe_resnet50

所需要的文件内容包含:

/TensorRT-5.0.2.6/python/data/resnet50/
├── binoculars-cc0.jpeg
├── binoculars.jpeg
├── canon-cc0.jpeg
├── class_labels.txt
├── mug-cc0.jpeg
├── reflex_camera.jpeg
├── ResNet50_fp32.caffemodel
├── resnet50-infer-5.uff
├── ResNet50_N2.prototxt
├── ResNet50.onnx
└── tabby_tiger_cat.jpg

先上完整代码,从main函数开始,逐个调用外部的参数完成整个流程,整个代码还是挺简单的:

# 该例子使用Caffe ResNet50 模型去创建一个TensorRT Inference Engine
import random
import argparse
from collections import namedtuple
from PIL import Image
import numpy as np

import pycuda.driver as cuda
import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的创建和清理工作

import tensorrt as trt

import sys, os

# sys.path.insert(1, os.path.join(sys.path[0], ".."))
# import common
# 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。
def GiB(val):
    '''以GB为单位,计算所需要的存储值,向左位移10bit表示KB,20bit表示MB '''
    return val * 1 << 30

def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
    '''该函数就是一个参数解析函数。
    Parses sample arguments.
    Args:
        description (str): Description of the sample.
        subfolder (str): The subfolder containing data relevant to this sample
        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
    Returns:
        str: Path of data directory.
    Raises:
        FileNotFoundError
    '''
    # 为了简洁,这里直接将路径硬编码到代码中。
    data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")

    subfolder_path = os.path.join(data_root, subfolder)
    if not os.path.exists(subfolder_path):
        print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
    data_path = subfolder_path if os.path.exists(subfolder_path) else data_root

    if not (os.path.exists(data_path)):
        raise FileNotFoundError(data_path + " does not exist.")

    for index, f in enumerate(find_files):
        find_files[index] = os.path.abspath(os.path.join(data_path, f))
        if not os.path.exists(find_files[index]):
            raise FileNotFoundError(find_files[index] + " does not exist. ")

    if find_files:
        return data_path, find_files
    else:
        return data_path
#-----------------

_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'DEPLOY_PATH', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE'])
ModelData = _ModelData(MODEL_PATH = "ResNet50_fp32.caffemodel",
                       DEPLOY_PATH = "ResNet50_N2.prototxt",
                       INPUT_SHAPE = (3, 224, 224),
                       OUTPUT_NAME = "prob",
                       DTYPE = trt.float32  ) # 可以将TensorRT数据类型用trt.nptype()转换到numpy类型
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


'''main中第二步:构建一个tensorRT engine '''
# The Caffe path is used for Caffe2 models.
def build_engine_caffe(model_file, deploy_file):

    with trt.Builder(TRT_LOGGER) as builder,  \
           builder.create_network() as network,  \
           trt.CaffeParser() as parser:

        # Workspace size是builder在构建engine时候最大可以使用的内存大小,其越高越好
        builder.max_workspace_size = GiB(1)

        # 载入caffe模型,然后进行解析,并填充TensorRT的network。该函数返回一个对象,其可以通过name进行检索tensors
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)

        # 对于caffe,需要手动标记网络的输出;因为我们原本就该知道输出tensor的name,所以可以直接找到
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))

        return builder.build_cuda_engine(network)


'''main中第三步:分配host和device端的buffers,然后创建一个流 '''
def allocate_buffers(engine):

    # 设定维度,然后在host端内存创建page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))

    # 为输入和输出分配device端内存.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)

    # 创建一个流来copy输入/输出,并用于执行inference。
    stream = cuda.Stream()

    return h_input, d_input, h_output, d_output, stream


'''main中第四步:读取测试样本,并归一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):

    # 将输入图像变换成CHW Numpy数组
    def normalize_image(image):
        c, h, w = ModelData.INPUT_SHAPE
        return np.asarray(image.resize((w, h), 
                                Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()

    # 归一化该图片,然后copy到内存设定的pagelocked buffer区域.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))

    return test_image


'''main中第五步:执行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):

    # 将输入数据移动到GPU的device端
    cuda.memcpy_htod_async(d_input, h_input, stream)

    # 执行inference
    context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)

    # 将结果从device端移动到host端
    cuda.memcpy_dtoh_async(h_output, d_output, stream)
    # 同步流操作
    stream.synchronize()


def main():

    ''' 1 - 读取模型文件,测试样本等等 '''
    data_path, data_files = find_sample_data(
                                            description="Runs a ResNet50 network with a TensorRT inference engine.", 
                                            subfolder="resnet50", 
                                            find_files=["binoculars.jpeg", 
                                                "reflex_camera.jpeg", 
                                                "tabby_tiger_cat.jpg", 
                                                ModelData.MODEL_PATH, 
                                                ModelData.DEPLOY_PATH, 
                                               "class_labels.txt"])
    test_images = data_files[0:3]  # 三张测试图片
    caffe_model_file, caffe_deploy_file, labels_file = data_files[3:] # caffe的模型文件,部署文件和标签文件
    labels = open(labels_file, 'r').read().split('\n') # 读取标签

    ''' 2 - 用build_engine_caffe函数构建一个TensorRT engine. '''
    with build_engine_caffe(caffe_model_file, caffe_deploy_file) as engine:
        # Inference不论用哪个parser构建engine都是这个流程

        ''' 3 - 分配buffer和创建一个CUDA流. '''
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)

        ''' 4 - 下面的context用于执行inference '''
        with engine.create_execution_context() as context:

            ''' 选择测试样本,然后进行归一化,并塞入host端的page-locked buffer '''
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)

            # 运行该engine。输出是一个1000的向量,每个值表示分到该类的概率。
            do_inference(context, h_input, d_input, h_output, d_output, stream)
            # 提取最高概率的元素,并将其索引映射到对应的label上
            pred = labels[np.argmax(h_output)]

            if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)

if __name__ == '__main__':
    main()

3 onnx_resnet50

从下面的代码和上面例子代码进行对比,发现还是相对一致的流程,就是其中个别函数有所不同。

# # 该例子使用ONNX ResNet50 模型去创建一个TensorRT Inference Engine
import random
from PIL import Image
from collections import namedtuple
import numpy as np

import pycuda.driver as cuda
import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的创建和清理工作

import tensorrt as trt

import sys, os

# import common
# 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。
def GiB(val):
    '''以GB为单位,计算所需要的存储值,向左位移10bit表示KB,20bit表示MB '''
    return val * 1 << 30

def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
    '''该函数就是一个参数解析函数。
    Parses sample arguments.
    Args:
        description (str): Description of the sample.
        subfolder (str): The subfolder containing data relevant to this sample
        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
    Returns:
        str: Path of data directory.
    Raises:
        FileNotFoundError
    '''
    # 为了简洁,这里直接将路径硬编码到代码中。
    data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")

    subfolder_path = os.path.join(data_root, subfolder)
    if not os.path.exists(subfolder_path):
        print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
    data_path = subfolder_path if os.path.exists(subfolder_path) else data_root

    if not (os.path.exists(data_path)):
        raise FileNotFoundError(data_path + " does not exist.")

    for index, f in enumerate(find_files):
        find_files[index] = os.path.abspath(os.path.join(data_path, f))
        if not os.path.exists(find_files[index]):
            raise FileNotFoundError(find_files[index] + " does not exist. ")

    if find_files:
        return data_path, find_files
    else:
        return data_path
#-----------------

_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_SHAPE',  'DTYPE'])
ModelData = _ModelData(MODEL_PATH = "ResNet50.onnx",
                       INPUT_SHAPE = (3, 224, 224),
                       DTYPE = trt.float32  ) # 可以将TensorRT数据类型用trt.nptype()转换到numpy类型

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


'''main中第二步:构建一个tensorRT engine '''
# The Onnx path is used for Onnx models.
def build_engine_onnx(model_file):

    with trt.Builder(TRT_LOGGER) as builder, \
           builder.create_network() as network, \
           trt.OnnxParser(network, TRT_LOGGER) as parser:

        # Workspace size是builder在构建engine时候最大可以使用的内存大小,其越高越好
        builder.max_workspace_size = GiB(1)

        ''' 载入caffe模型,然后进行解析,并填充TensorRT的network'''
        with open(model_file, 'rb') as model:
            parser.parse(model.read())

        return builder.build_cuda_engine(network)


'''main中第三步:分配host和device端的buffers,然后创建一个流 '''
def allocate_buffers(engine):

    # 设定维度,然后在host端内存创建page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))

    # 为输入和输出分配device端内存.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)

    # 创建一个流来copy输入/输出,并用于执行inference。
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream


'''main中第四步:读取测试样本,并归一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):

    # 将输入图像变换成CHW Numpy数组
    def normalize_image(image):

        '''这个函数与第一个例子略有不同 '''
        c, h, w = ModelData.INPUT_SHAPE
        image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
        # 该ResNet 5-需要一些预处理,特别是均值归一化

        return (image_arr / 255.0 - 0.45) / 0.225

    # 归一化该图片,然后copy到内存设定的pagelocked buffer区域.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))

    return test_image


'''main中第五步:执行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):

    # 将输入数据移动到GPU的device端
    cuda.memcpy_htod_async(d_input, h_input, stream)

    # 执行inference
    context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)

    # 将结果从device端移动到host端
    cuda.memcpy_dtoh_async(h_output, d_output, stream)

    # 同步流操作
    stream.synchronize()


def main():

    ''' 1 - 读取模型文件,测试样本等等 '''
    data_path, data_files = find_sample_data(
                                description="Runs a ResNet50 network with a TensorRT inference engine.", 
                                subfolder="resnet50", 
                                find_files=["binoculars.jpeg", 
                                    "reflex_camera.jpeg", 
                                    "tabby_tiger_cat.jpg", 
                                    ModelData.MODEL_PATH, 
                                    "class_labels.txt"])
    test_images = data_files[0:3] # 三张测试图片
    onnx_model_file, labels_file = data_files[3:] # ONNX模型文件和标签文件
    labels = open(labels_file, 'r').read().split('\n') # 读取标签

    ''' 2 - 用build_engine_onnx函数构建一个TensorRT engine. '''
    with build_engine_onnx(onnx_model_file) as engine:
        # Inference不论用哪个parser构建engine都是这个流程,因为这里都是resnet-50结构

        ''' 3 - 分配buffer和创建一个CUDA流. '''
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)

        ''' 4 - 下面的context用于执行inference '''
        with engine.create_execution_context() as context:

            ''' 选择测试样本,然后进行归一化,并塞入host端的page-locked buffer '''
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)

            # 运行该engine。输出是一个1000的向量,每个值表示分到该类的概率。
            do_inference(context, h_input, d_input, h_output, d_output, stream)

            # 提取最高概率的元素,并将其索引映射到对应的label上
            pred = labels[np.argmax(h_output)]

            if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)

if __name__ == '__main__':
    main()

4 uff_resnet50

从下面的例子可以看出,这三个例子流程大致一致,只有个别区域有少许变化。
UFF是TensorRT内部使用的统一框架格式,用于表示优化前的网络结构图,可以将诸如pb等模型格式先转换成uff格式tensorrt-3-faster-tensorflow-inference

# 该例子使用UFF ResNet50 模型去创建一个TensorRT Inference Engine
import random
from PIL import Image
import numpy as np

import pycuda.driver as cuda
import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的创建和清理工作

import tensorrt as trt

import sys, os

#sys.path.insert(1, os.path.join(sys.path[0], ".."))
# import common
# 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。
def GiB(val):
    '''以GB为单位,计算所需要的存储值,向左位移10bit表示KB,20bit表示MB '''
    return val * 1 << 30

def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
    '''该函数就是一个参数解析函数。
    Parses sample arguments.
    Args:
        description (str): Description of the sample.
        subfolder (str): The subfolder containing data relevant to this sample
        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
    Returns:
        str: Path of data directory.
    Raises:
        FileNotFoundError
    '''
    # 为了简洁,这里直接将路径硬编码到代码中。
    data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")

    subfolder_path = os.path.join(data_root, subfolder)
    if not os.path.exists(subfolder_path):
        print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
    data_path = subfolder_path if os.path.exists(subfolder_path) else data_root

    if not (os.path.exists(data_path)):
        raise FileNotFoundError(data_path + " does not exist.")

    for index, f in enumerate(find_files):
        find_files[index] = os.path.abspath(os.path.join(data_path, f))
        if not os.path.exists(find_files[index]):
            raise FileNotFoundError(find_files[index] + " does not exist. ")

    if find_files:
        return data_path, find_files
    else:
        return data_path
#-----------------

class ModelData(object):
    MODEL_PATH = "resnet50-infer-5.uff"
    INPUT_NAME = "input"
    INPUT_SHAPE = (3, 224, 224)
    OUTPUT_NAME = "GPU_0/tower_0/Softmax"
    # We can convert TensorRT data types to numpy types with trt.nptype()
    DTYPE = trt.float32

_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_NAME', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE'])
ModelData = _ModelData(   
                          MODEL_PATH = "resnet50-infer-5.uff",
                          INPUT_NAME = "input",
                          INPUT_SHAPE = (3, 224, 224),
                          OUTPUT_NAME = "GPU_0/tower_0/Softmax",
                          DTYPE = trt.float32  ) # 可以将TensorRT数据类型用trt.nptype()转换到numpy类型

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


'''main中第二步:构建一个tensorRT engine '''
# The UFF path is used for TensorFlow models. You can convert a frozen TensorFlow graph to UFF using the included convert-to-uff utility.
def build_engine_uff(model_file):

    with trt.Builder(TRT_LOGGER) as builder, \
             builder.create_network() as network, \
             trt.UffParser() as parser:

        # Workspace size是builder在构建engine时候最大可以使用的内存大小,其越高越好
        builder.max_workspace_size = GiB(1)
        
        ''' 这里需要手动注册输入和输出节点到UFF'''
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)

        ''' 载入UFF模型,然后进行解析,并填充TensorRT的network'''
        parser.parse(model_file, network)

        return builder.build_cuda_engine(network)


'''main中第三步:分配host和device端的buffers,然后创建一个流 '''
def allocate_buffers(engine):

    # 设定维度,然后在host端内存创建page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))

    # 为输入和输出分配device端内存.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)

    # 创建一个流来copy输入/输出,并用于执行inference。
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream


'''main中第四步:读取测试样本,并归一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):

    # 将输入图像变换成CHW Numpy数组
    def normalize_image(image):
        c, h, w = ModelData.INPUT_SHAPE
        return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()

    # 归一化该图片,然后copy到内存设定的pagelocked buffer区域.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))

    return test_image


'''main中第五步:执行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):

    # 将输入数据移动到GPU的device端
    cuda.memcpy_htod_async(d_input, h_input, stream)

    # 执行inference
    context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)

    # 将结果从device端移动到host端
    cuda.memcpy_dtoh_async(h_output, d_output, stream)

    # 同步流操作
    stream.synchronize()


def main():

    ''' 1 - 读取模型文件,测试样本等等 '''
    data_path, data_files = find_sample_data(
                                  description="Runs a ResNet50 network with a TensorRT inference engine.", 
                                  subfolder="resnet50", 
                                  find_files=["binoculars.jpeg", 
                                        "reflex_camera.jpeg", 
                                        "tabby_tiger_cat.jpg", 
                                        ModelData.MODEL_PATH, 
                                        "class_labels.txt"])
    test_images = data_files[0:3] # 三张测试图片
    uff_model_file, labels_file = data_files[3:] # UFF模型文件和标签文件
    labels = open(labels_file, 'r').read().split('\n') # 读取标签

    ''' 2 - 用build_engine_uff函数构建一个TensorRT engine. '''
    with build_engine_uff(uff_model_file) as engine:
        # Inference不论用哪个parser构建engine都是这个流程,因为这里都是resnet-50结构

        ''' 3 - 分配buffer和创建一个CUDA流. '''
        h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)

        ''' 4 - 下面的context用于执行inference '''
        with engine.create_execution_context() as context:

            ''' 选择测试样本,然后进行归一化,并塞入host端的page-locked buffer '''
            test_image = random.choice(test_images)
            test_case = load_normalized_test_case(test_image, h_input)

            # 运行该engine。输出是一个1000的向量,每个值表示分到该类的概率。
            do_inference(context, h_input, d_input, h_output, d_output, stream)

            # 提取最高概率的元素,并将其索引映射到对应的label上
            pred = labels[np.argmax(h_output)]

            if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
                print("Correctly recognized " + test_case + " as " + pred)
            else:
                print("Incorrectly recognized " + test_case + " as " + pred)

if __name__ == '__main__':
    main()

.

posted @ 2019-03-13 17:13  仙守  阅读(2333)  评论(1编辑  收藏  举报