jetson nano 部署yolov8(python)
八级玄仙 2024-10-08 11:05:03 阅读 100
前言
jetson nano 环境如下
sudo apt-cache show nvidia-jetpack
一、nano运行yolov8 pt模型
1、环境搭建
conda create -n yolo python=3.8
conda activate yolo
pip install ultralytics onnx lapx numpy==1.23.1 -i https://pypi.tuna.tsinghua.edu.cn/simple
# 安装Jetson的Pytorch GPU版本
pip install torch-*.whl torchvision-*.whl
# torch-1.11.0a0+gitbc2c6ed-cp38-cp38-linux_aarch64.whl
# torchvision-0.12.0a0+9b5a3fe-cp38-cp38-linux_aarch64.whl
安装后pip list查看
python -c "import torch;print(torch.cuda.is_available(), torch.__version__)"
2、推理测试
在终端运行, 同级目录需要有yolov8n.pt,bus.jpg文件
yolo task=detect mode=predict model=yolov8n.pt source=bus.jpg show=True
如果报错:OSError: libomp.so.5: cannot open shared object file: No such file or directory
执行sudo apt-get install libomp5可解决
结果
3、性能测试
内存/GPU占用
yolov8n.pt 1.71G
yolov8s.pt 1.77G
检测速度
yolov8n.pt FPS: 5.35
yolov8s.pt FPS: <3
m、l、x模型分别如下
通过yolov8直接运行.pt模型,GPU占用大,检测速度慢!
来自:https://i7y.org/en/yolov8-on-jetson-nano/
测试代码
<code>import time
from ultralytics import YOLO
import cv2
def detect_objects(model_path, image_path, iterations=100, report_interval=20):
# Load the model
model = YOLO(model_path)
# Load the image
img = cv2.imread(image_path)
# Initialize variables
total_time = 0.0
start_time = time.time()
for i in range(iterations):
# Perform the object detection
results = model.predict(source=img, conf=0.5) # conf is the confidence threshold
# Measure the time taken for prediction
end_time = time.time()
elapsed_time = end_time - start_time
start_time = end_time
# Print the single iteration time
# print(f"Iteration {i + 1}: Detection took {elapsed_time:.4f} seconds")
total_time += elapsed_time
# Print the results every 20 iterations
if (i + 1) % report_interval == 0:
avg_time = total_time / report_interval
fps = 1 / avg_time
print(f"Iteration {i + 1}: Average Time: {avg_time:.4f} seconds, FPS: {fps:.2f}")
total_time = 0.0 # Reset total time for next interval
# Final print after all iterations
print("Finished running all iterations.")
# Define the paths to the model and the image
model_path = "yolov8s.pt"
image_path = "bus.jpg"
# Call the detection function
detect_objects(model_path, image_path, iterations=100, report_interval=20)
二、TensorRT Python Bindings
由于yolov8需要python3.8以上的版本,jetson nano自带的python版tensorrt时绑定的python3.6, 采用tensorrt加速yolov8模型时不兼容,需要安装python3.8版本tensorrt。
参考:
Jetson NX实现TensorRT加速部署YOLOv8_yolov8模型部署nx-CSDN博客
Jetson/L4T/TRT Customized Example - eLinux.org
https://github.com/NVIDIA/TensorRT/tree/release/8.2
Index of /pool/main/p/python3.8
二、TensorRT Python Bindings
1. Building python3.9
$ sudo apt install zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev libbz2-dev
$ wget https://www.python.org/ftp/python/3.9.1/Python-3.9.1.tar.xz
$ tar xvf Python-3.9.1.tar.xz Python-3.9.1/
$ mkdir build-python-3.9.1
$ cd build-python-3.9.1/
$ ../Python-3.9.1/configure --enable-optimizations
$ make -j $(nproc)
$ sudo -H make altinstall
$ cd ../
2. Build cmake 3.13.5
$ sudo apt-get install -y protobuf-compiler libprotobuf-dev openssl libssl-dev libcurl4-openssl-dev
$ wget https://github.com/Kitware/CMake/releases/download/v3.13.5/cmake-3.13.5.tar.gz
$ tar xvf cmake-3.13.5.tar.gz
$ rm cmake-3.13.5.tar.gz
$ cd cmake-3.13.5/
$ ./bootstrap --system-curl
$ make -j$(nproc)
$ echo 'export PATH='${PWD}'/bin/:$PATH' >> ~/.bashrc
$ source ~/.bashrc
$ cd ../
sudo apt install zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev libbz2-dev
Installation
Download pybind11
Create a directory for external sources and download pybind11 into it.
export EXT_PATH=~/external
mkdir -p $EXT_PATH && cd $EXT_PATH
git clone https://github.com/pybind/pybind11.git
Download Python headers
Add Main Headers
Get the source code from the official python sources
下载 python3.8.19
Python Release Python 3.8.19 | Python.org
tar xvf Python-3.8.19.tar.xz Python-3.8.19
Building python3.9
$ sudo apt install zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev libbz2-dev
$ wget https://www.python.org/ftp/python/3.9.1/Python-3.9.1.tar.xz
$ tar xvf Python-3.9.1.tar.xz Python-3.9.1/
$ mkdir build-python-3.9.1
$ cd build-python-3.9.1/
$ ../Python-3.9.1/configure --enable-optimizations
$ make -j $(nproc)
$ sudo -H make altinstall
$ cd ../
Add PyConfig.h
从官方获取python源代码 Python Source Releases | Python.org,下载对应的python版本。将python源码中<code>Include路径下的内容拷贝到~/external/python3.8/include
中(python3.8/include 该目录自己新建的
)。
下载 Python-3.8.19.tar.xz
tar xvf Python-3.9.2.tar.xz Python-3.9.2
cp -r Python-3.9.2/Include
将 libpython3.9-dev_3.9.2-1_arm64.deb 放到 ~/work/tool/,
下载地址:
http://ftp.us.debian.org/debian/pool/main/p/python3.9/
Index of /pool/main/p/python3.8
<code>ar x libpython3.8-dev_3.8.2-1ubuntu1_arm64.deb
tar -xvf data.tar.xz
cp ./usr/include/aarch64-linux-gnu/python3.8/pyconfig.h ~/external/python3.8/include/
Build Python bindings
TRT_OSSPATH=${PWD}/.. EXT_PATH=${PWD}/../.. TARGET=aarch64 PYTHON_MINOR_VERSION=9 bash build.sh (用下面的方法)
修改TensorRT/python/bash.sh
中的内容。
bash.sh
中找到以下内容:
#原内容
PYTHON_MAJOR_VERSION=${PYTHON_MAJOR_VERSION:-3}
PYTHON_MINOR_VERSION=${PYTHON_MINOR_VERSION:-8}
TARGET=${TARGET_ARCHITECTURE:-x86_64}
CUDA_ROOT=${CUDA_ROOT:-/usr/local/cuda}
ROOT_PATH=${TRT_OSSPATH:-/workspace/TensorRT}
EXT_PATH=${EXT_PATH:-/tmp/external}
WHEEL_OUTPUT_DIR=${ROOT_PATH}/python/build
将TARGET
修改为-aarch64
。将ROOT_PATH
改为你TensoRT对应的绝对路径。将EXT_PATH
改为你创建的external
对应的绝对路径。
#修改后如下:
PYTHON_MAJOR_VERSION=${PYTHON_MAJOR_VERSION:-3}
PYTHON_MINOR_VERSION=${PYTHON_MINOR_VERSION:-8}
TARGET=${TARGET_ARCHITECTURE:-aarch64}
CUDA_ROOT=${CUDA_ROOT:-/usr/local/cuda}
ROOT_PATH=${TRT_OSSPATH:-/home/xxx/TensorRT}
EXT_PATH=${EXT_PATH:-/home/xxx/external}
WHEEL_OUTPUT_DIR=${ROOT_PATH}/python/build
最后运行bash.sh
。运行前检查setuptools
是否为最新版本。
pip install -U pip setuptools
bash ./build.sh
Install the python wheel
<code>pip install build/dist/tensorrt-8.2.3.0-cp38-none-linux_aarch64.whl
#-----------------------------------------------
$ git clone -b release/8.2 https://github.com/NVIDIA/TensorRT.git
$ cd TensorRT
$ git submodule update --init --recursive
$
$ cmake .. -DGPU_ARCHS="53" -DTRT_LIB_DIR=/usr/lib/aarch64-linux-gnu/ -DCMAKE_C_COMPILER=/usr/bin/gcc
$ make -j$(nproc)
编译tensorrt 生成trtexec
cd ~/external/TensorRT/build
cmake ..
使用:cmake -DCMAKE_CUDA_ARCHITECTURES=53 ..
(yolo8) xxx@miivii-tegra:~/external/TensorRT/build$ cmake -DCMAKE_CUDA_ARCHITECTURES=53 ..
Building for TensorRT version: 8.2.3, library version: 8
-- Targeting TRT Platform: aarch64
-- CUDA version set to 10.2.89
-- cuDNN version set to 8.2
-- Protobuf version set to 3.0.0
-- Setting up another Protobuf build for cross compilation targeting aarch64-Linux
-- Using libprotobuf /home/home58/suo58/external/TensorRT/build/third_party.protobuf_aarch64/lib/libprotobuf.a
-- ========================= Importing and creating target nvinfer ==========================
-- Looking for library nvinfer
-- Library that was found /usr/lib/aarch64-linux-gnu/libnvinfer.so
-- ==========================================================================================
-- ========================= Importing and creating target nvuffparser ==========================
-- Looking for library nvparsers
-- Library that was found /usr/lib/aarch64-linux-gnu/libnvparsers.so
-- ==========================================================================================
-- GPU_ARCHS is not defined. Generating CUDA code for default SMs: 53;60;61;70;75;72
-- Protobuf proto/trtcaffe.proto -> proto/trtcaffe.pb.cc proto/trtcaffe.pb.h
-- /home/home58/suo58/external/TensorRT/build/parsers/caffe
Generated: /home/xxx/external/TensorRT/build/parsers/onnx/third_party/onnx/onnx/onnx_onnx2trt_onnx-ml.proto
Generated: /home/home58/suo58/external/TensorRT/build/parsers/onnx/third_party/onnx/onnx/onnx-operators_onnx2trt_onnx-ml.proto
Generated: /home/home58/suo58/external/TensorRT/build/parsers/onnx/third_party/onnx/onnx/onnx-data_onnx2trt_onnx.proto
--
-- ******** Summary ********
-- CMake version : 3.20.4
-- CMake command : /home/xxx/miniforge3/envs/yolo8/lib/python3.8/site-packages/cmake/data/bin/cmake
-- System : Linux
-- C++ compiler : /usr/bin/g++
-- C++ compiler version : 7.5.0
-- CXX flags : -Wno-deprecated-declarations -DBUILD_SYSTEM=cmake_oss -Wall -Wno-deprecated-declarations -Wno-unused-function -Wnon-virtual-dtor
-- Build type : Release
-- Compile definitions : _PROTOBUF_INSTALL_DIR=/home/xxx/external/TensorRT/build/third_party.protobuf;SOURCE_LENGTH=37;ONNX_NAMESPACE=onnx2trt_onnx
-- CMAKE_PREFIX_PATH :
-- CMAKE_INSTALL_PREFIX : /home/xxx/external/TensorRT/build/..
-- CMAKE_MODULE_PATH :
--
-- ONNX version : 1.8.0
-- ONNX NAMESPACE : onnx2trt_onnx
-- ONNX_BUILD_TESTS : OFF
-- ONNX_BUILD_BENCHMARKS : OFF
-- ONNX_USE_LITE_PROTO : OFF
-- ONNXIFI_DUMMY_BACKEND : OFF
-- ONNXIFI_ENABLE_EXT : OFF
--
-- Protobuf compiler :
-- Protobuf includes :
-- Protobuf libraries :
-- BUILD_ONNX_PYTHON : OFF
-- Found CUDA headers at /usr/local/cuda-10.2/include
-- Found TensorRT headers at /home/xxx/external/TensorRT/include
-- Find TensorRT libs at /usr/lib/aarch64-linux-gnu/libnvinfer.so;/home/xxx/external/TensorRT/lib/libnvinfer_plugin.so
ONNX_INCLUDE_DIR
-- Adding new sample: sample_algorithm_selector
-- - Parsers Used: caffe
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_char_rnn
-- - Parsers Used: uff;caffe;onnx
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_dynamic_reshape
-- - Parsers Used: onnx
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_fasterRCNN
-- - Parsers Used: caffe
-- - InferPlugin Used: ON
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_googlenet
-- - Parsers Used: caffe
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_int8
-- - Parsers Used: caffe
-- - InferPlugin Used: ON
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_int8_api
-- - Parsers Used: onnx
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_mnist
-- - Parsers Used: caffe
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_mnist_api
-- - Parsers Used: caffe
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_nmt
-- - Parsers Used: none
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_onnx_mnist
-- - Parsers Used: onnx
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_io_formats
-- - Parsers Used: caffe
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_ssd
-- - Parsers Used: caffe
-- - InferPlugin Used: ON
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_uff_fasterRCNN
-- - Parsers Used: uff
-- - InferPlugin Used: ON
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_uff_maskRCNN
-- - Parsers Used: uff
-- - InferPlugin Used: ON
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_uff_mnist
-- - Parsers Used: uff
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_uff_plugin_v2_ext
-- - Parsers Used: uff
-- - InferPlugin Used: OFF
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_uff_ssd
-- - Parsers Used: uff
-- - InferPlugin Used: ON
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: sample_onnx_mnist_coord_conv_ac
-- - Parsers Used: onnx
-- - InferPlugin Used: ON
-- - Licensing: samples
ONNX_INCLUDE_DIR
-- Adding new sample: trtexec
-- - Parsers Used: caffe;uff;onnx
-- - InferPlugin Used: OFF
-- - Licensing: samples
-- Configuring done
-- Generating done
-- Build files have been written to: /home/xxx/external/TensorRT/build
make -j4
make install
三、YOLOv8 模型加速
参考:Jetson nano部署YOLOv8_jetson nano yolov8-CSDN博客
https://zhuanlan.zhihu.com/p/665546297
1、模型转换:采用infer框架trtexec工具进行模型转换
# 模型转换工具
git clone https://github.com/shouxieai/infer.git
# yolov8源码
git clone https://github.com/ultralytics/ultralytics.git
(1)将pt模型导出ONNX
编写exportOnnx.py放入ultralytics下(开发板上)
from ultralytics import YOLO
model = YOLO("../yolov8/yolov8n.pt")
success = model.export(imgsz=640,format="onnx", batch=1)code>
运行 python exportOnnx.py后,在yolov8n.pt所在目录下生成 yolov8n.onnx
(2)将yolov8n.onnx模型优化生成yolov8n.transd.onnx
参考:Jetson nano部署YOLOv8_jetson nano yolov8-CSDN博客
进入infer/workspace/,执行 python v8trans.py yolov8n.onnx
v8trans.py代码如下:
import onnx
import onnx.helper as helper
import sys
import os
def main():
if len(sys.argv) < 2:
print("Usage:\n python v8trans.py yolov8n.onnx")
return 1
file = sys.argv[1]
if not os.path.exists(file):
print(f"Not exist path: {file}")
return 1
prefix, suffix = os.path.splitext(file)
dst = prefix + ".transd" + suffix
model = onnx.load(file)
node = model.graph.node[-1]
old_output = node.output[0]
node.output[0] = "pre_transpose"
for specout in model.graph.output:
if specout.name == old_output:
shape0 = specout.type.tensor_type.shape.dim[0]
shape1 = specout.type.tensor_type.shape.dim[1]
shape2 = specout.type.tensor_type.shape.dim[2]
new_out = helper.make_tensor_value_info(
specout.name,
specout.type.tensor_type.elem_type,
[0, 0, 0]
)
new_out.type.tensor_type.shape.dim[0].CopyFrom(shape0)
new_out.type.tensor_type.shape.dim[2].CopyFrom(shape1)
new_out.type.tensor_type.shape.dim[1].CopyFrom(shape2)
specout.CopyFrom(new_out)
model.graph.node.append(
helper.make_node("Transpose", ["pre_transpose"], [old_output], perm=[0, 2, 1])
)
print(f"Model save to {dst}")
onnx.save(model, dst)
return 0
if __name__ == "__main__":
sys.exit(main())
生成
(3) engine生成
执行 trtexec --onnx=yolov8n.transd.onnx --saveEngine=yolov8n.transd.engine
生成 yolov8n.transd.engine
直接转换:
#将pt模型转换为onnx模型
yolo export model=yolov8n.pt format=onnx opset=12
# 将onnx模型转换为engine模型
trtexec --onnx=yolov8n.onnx --saveEngine=yolov8n.engine --fp16
<code>(yolo8) xxx@miivii-tegra:~/work/yolov8$ trtexec --onnx=yolov8n.onnx --saveEngine=yolov8n.engine --fp16
&&&& RUNNING TensorRT.trtexec # trtexec --onnx=yolov8n.onnx --saveEngine=yolov8n.engine --fp16
[08/12/2024-09:51:36] [I] === Model Options ===
[08/12/2024-09:51:36] [I] Format: ONNX
[08/12/2024-09:51:36] [I] Model: yolov8n.onnx
[08/12/2024-09:51:36] [I] Output:
[08/12/2024-09:51:36] [I] === Build Options ===
[08/12/2024-09:51:36] [I] Max batch: 1
[08/12/2024-09:51:36] [I] Workspace: 16 MB
[08/12/2024-09:51:36] [I] minTiming: 1
[08/12/2024-09:51:36] [I] avgTiming: 8
[08/12/2024-09:51:36] [I] Precision: FP32+FP16
[08/12/2024-09:51:36] [I] Calibration:
[08/12/2024-09:51:36] [I] Safe mode: Disabled
[08/12/2024-09:51:36] [I] Save engine: yolov8n.engine
[08/12/2024-09:51:36] [I] Load engine:
[08/12/2024-09:51:36] [I] Builder Cache: Enabled
[08/12/2024-09:51:36] [I] NVTX verbosity: 0
[08/12/2024-09:51:36] [I] Inputs format: fp32:CHW
[08/12/2024-09:51:36] [I] Outputs format: fp32:CHW
[08/12/2024-09:51:36] [I] Input build shapes: model
[08/12/2024-09:51:36] [I] Input calibration shapes: model
[08/12/2024-09:51:36] [I] === System Options ===
[08/12/2024-09:51:36] [I] Device: 0
[08/12/2024-09:51:36] [I] DLACore:
[08/12/2024-09:51:36] [I] Plugins:
[08/12/2024-09:51:36] [I] === Inference Options ===
[08/12/2024-09:51:36] [I] Batch: 1
[08/12/2024-09:51:36] [I] Input inference shapes: model
[08/12/2024-09:51:36] [I] Iterations: 10
[08/12/2024-09:51:36] [I] Duration: 3s (+ 200ms warm up)
[08/12/2024-09:51:36] [I] Sleep time: 0ms
[08/12/2024-09:51:36] [I] Streams: 1
[08/12/2024-09:51:36] [I] ExposeDMA: Disabled
[08/12/2024-09:51:36] [I] Spin-wait: Disabled
[08/12/2024-09:51:36] [I] Multithreading: Disabled
[08/12/2024-09:51:36] [I] CUDA Graph: Disabled
[08/12/2024-09:51:36] [I] Skip inference: Disabled
[08/12/2024-09:51:36] [I] Inputs:
[08/12/2024-09:51:36] [I] === Reporting Options ===
[08/12/2024-09:51:36] [I] Verbose: Disabled
[08/12/2024-09:51:36] [I] Averages: 10 inferences
[08/12/2024-09:51:36] [I] Percentile: 99
[08/12/2024-09:51:36] [I] Dump output: Disabled
[08/12/2024-09:51:36] [I] Profile: Disabled
[08/12/2024-09:51:36] [I] Export timing to JSON file:
[08/12/2024-09:51:36] [I] Export output to JSON file:
[08/12/2024-09:51:36] [I] Export profile to JSON file:
[08/12/2024-09:51:36] [I]
----------------------------------------------------------------
Input filename: yolov8n.onnx
ONNX IR version: 0.0.7
Opset version: 12
Producer name: pytorch
Producer version: 1.11.0
Domain:
Model version: 0
Doc string:
----------------------------------------------------------------
[08/12/2024-09:51:38] [W] [TRT] onnx2trt_utils.cpp:220: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[08/12/2024-09:52:52] [I] [TRT] Some tactics do not have sufficient workspace memory to run. Increasing workspace size may increase performance, please check verbose output.
[08/12/2024-09:59:08] [I] [TRT] Detected 1 inputs and 3 output network tensors.
[08/12/2024-09:59:08] [I] Starting inference threads
[08/12/2024-09:59:12] [I] Warmup completed 4 queries over 200 ms
[08/12/2024-09:59:12] [I] Timing trace has 60 queries over 3.11545 s
[08/12/2024-09:59:12] [I] Trace averages of 10 runs:
[08/12/2024-09:59:12] [I] Average on 10 runs - GPU latency: 51.1508 ms - Host latency: 51.9235 ms (end to end 51.9339 ms, enqueue 6.89342 ms)
[08/12/2024-09:59:12] [I] Average on 10 runs - GPU latency: 51.1141 ms - Host latency: 51.8855 ms (end to end 51.8961 ms, enqueue 6.94103 ms)
[08/12/2024-09:59:12] [I] Average on 10 runs - GPU latency: 51.1348 ms - Host latency: 51.9039 ms (end to end 51.9146 ms, enqueue 6.94259 ms)
[08/12/2024-09:59:12] [I] Average on 10 runs - GPU latency: 51.1422 ms - Host latency: 51.9132 ms (end to end 51.9238 ms, enqueue 6.89012 ms)
[08/12/2024-09:59:12] [I] Average on 10 runs - GPU latency: 51.1737 ms - Host latency: 51.9433 ms (end to end 51.9536 ms, enqueue 6.95898 ms)
[08/12/2024-09:59:12] [I] Average on 10 runs - GPU latency: 51.14 ms - Host latency: 51.9092 ms (end to end 51.9192 ms, enqueue 6.85737 ms)
[08/12/2024-09:59:12] [I] Host Latency
[08/12/2024-09:59:12] [I] min: 51.7911 ms (end to end 51.802 ms)
[08/12/2024-09:59:12] [I] max: 52.0718 ms (end to end 52.083 ms)
[08/12/2024-09:59:12] [I] mean: 51.9131 ms (end to end 51.9235 ms)
[08/12/2024-09:59:12] [I] median: 51.9051 ms (end to end 51.9152 ms)
[08/12/2024-09:59:12] [I] percentile: 52.0718 ms at 99% (end to end 52.083 ms at 99%)
[08/12/2024-09:59:12] [I] throughput: 19.2589 qps
[08/12/2024-09:59:12] [I] walltime: 3.11545 s
[08/12/2024-09:59:12] [I] Enqueue Time
[08/12/2024-09:59:12] [I] min: 6.57861 ms
[08/12/2024-09:59:12] [I] max: 7.72876 ms
[08/12/2024-09:59:12] [I] median: 6.8739 ms
[08/12/2024-09:59:12] [I] GPU Compute
[08/12/2024-09:59:12] [I] min: 51.0255 ms
[08/12/2024-09:59:12] [I] max: 51.2957 ms
[08/12/2024-09:59:12] [I] mean: 51.1426 ms
[08/12/2024-09:59:12] [I] median: 51.1315 ms
[08/12/2024-09:59:12] [I] percentile: 51.2957 ms at 99%
[08/12/2024-09:59:12] [I] total compute time: 3.06856 s
&&&& PASSED TensorRT.trtexec # trtexec --onnx=yolov8n.onnx --saveEngine=yolov8n.engine --fp16
trtexec参数
trtexec是NVIDIA TensorRT SDK中的一个实用工具,它允许用户从命令行轻松运行和测试TensorRT引擎。trtexec命令行工具可以使用以下参数:
其中一些重要的参数如下:
--uff:指定输入为UFF模型,后面跟上模型文件的路径。
--onnx:指定输入为ONNX模型,后面跟上模型文件的路径。
--model:指定输入为序列化的引擎文件,后面跟上文件路径。
--deploy:指定输入为Caffe deploy文件的路径。
--output:指定输出Tensor名称。
--batch:指定执行推理时每个batch的大小,默认为1。
--device:指定执行推理的设备编号,默认为0。
--workspace:指定GPU内存的最大使用量,默认为1GB。
--fp16:启用FP16精度,可提高推理性能和减少内存使用。
--int8:启用INT8精度,可进一步提高推理性能和减少内存使用。
--calib:指定INT8校准数据集的路径。
--useDLA:指定使用哪个DLA,以及在DLA上运行哪些层。
--allowGPUFallback:如果使用DLA,当某些层无法在DLA上运行时,是否允许将其回退到GPU。
--iterations:指定测试迭代次数。
--avgRuns:指定平均运行次数。
--verbose:打印更详细的输出信息。
--loadEngine:指定加载的TensorRT引擎文件,后面跟上文件路径
--saveEngine:指定生成的TensorRT引擎文件,后面跟上文件路径
1.2 模型转换:基于wang-xinyu/tensorrtx 进行模型转换
cd tensorrtx/yolov8
mkdir build
cd bulid
cmake ..
make -j4
cmake .. 报错
cmake -DCMAKE_CUDA_ARCHITECTURES=53 ..
make 报错
查看 yolov8/build/CMakeFiles/CMakeError.log,内容如下
Performing C SOURCE FILE Test CMAKE_HAVE_LIBC_PTHREAD failed with the following output:
Change Dir: /home/xxx/work/yolov8/tensorrtx/yolov8/build/CMakeFiles/CMakeTmp
Run Build Command(s):/usr/bin/make -f Makefile cmTC_eb756/fast && /usr/bin/make -f CMakeFiles/cmTC_eb756.dir/build.make CMakeFiles/cmTC_eb756.dir/build
make[1]: Entering directory '/home/xxx/work/yolov8/tensorrtx/yolov8/build/CMakeFiles/CMakeTmp'
Building C object CMakeFiles/cmTC_eb756.dir/src.c.o
/usr/bin/cc -DCMAKE_HAVE_LIBC_PTHREAD -fPIC -o CMakeFiles/cmTC_eb756.dir/src.c.o -c /home/xxx/work/yolov8/tensorrtx/yolov8/build/CMakeFiles/CMakeTmp/src.c
Linking C executable cmTC_eb756
/home/xxx/miniforge3/envs/yolo8/lib/python3.8/site-packages/cmake/data/bin/cmake -E cmake_link_script CMakeFiles/cmTC_eb756.dir/link.txt --verbose=1
/usr/bin/cc -fPIC CMakeFiles/cmTC_eb756.dir/src.c.o -o cmTC_eb756
CMakeFiles/cmTC_eb756.dir/src.c.o: In function `main':
src.c:(.text+0x48): undefined reference to `pthread_create'
src.c:(.text+0x50): undefined reference to `pthread_detach'
src.c:(.text+0x58): undefined reference to `pthread_cancel'
src.c:(.text+0x64): undefined reference to `pthread_join'
src.c:(.text+0x74): undefined reference to `pthread_atfork'
collect2: error: ld returned 1 exit status
CMakeFiles/cmTC_eb756.dir/build.make:98: recipe for target 'cmTC_eb756' failed
make[1]: *** [cmTC_eb756] Error 1
make[1]: Leaving directory '/home/xxx/work/yolov8/tensorrtx/yolov8/build/CMakeFiles/CMakeTmp'
Makefile:127: recipe for target 'cmTC_eb756/fast' failed
make: *** [cmTC_eb756/fast] Error 2
Source file was:
#include <pthread.h>
static void* test_func(void* data)
{
return data;
}
int main(void)
{
pthread_t thread;
pthread_create(&thread, NULL, test_func, NULL);
pthread_detach(thread);
pthread_cancel(thread);
pthread_join(thread, NULL);
pthread_atfork(NULL, NULL, NULL);
pthread_exit(NULL);
return 0;
}
Determining if the function pthread_create exists in the pthreads failed with the following output:
Change Dir: /home/xxx/work/yolov8/tensorrtx/yolov8/build/CMakeFiles/CMakeTmp
Run Build Command(s):/usr/bin/make -f Makefile cmTC_74e77/fast && /usr/bin/make -f CMakeFiles/cmTC_74e77.dir/build.make CMakeFiles/cmTC_74e77.dir/build
make[1]: Entering directory '/home/xxx/work/yolov8/tensorrtx/yolov8/build/CMakeFiles/CMakeTmp'
Building C object CMakeFiles/cmTC_74e77.dir/CheckFunctionExists.c.o
/usr/bin/cc -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create -o CMakeFiles/cmTC_74e77.dir/CheckFunctionExists.c.o -c /home/xxx/miniforge3/envs/yolo8/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFunctionExists.c
Linking C executable cmTC_74e77
/home/xxx/miniforge3/envs/yolo8/lib/python3.8/site-packages/cmake/data/bin/cmake -E cmake_link_script CMakeFiles/cmTC_74e77.dir/link.txt --verbose=1
/usr/bin/cc -fPIC -DCHECK_FUNCTION_EXISTS=pthread_create CMakeFiles/cmTC_74e77.dir/CheckFunctionExists.c.o -o cmTC_74e77 -lpthreads
/usr/bin/ld: cannot find -lpthreads
collect2: error: ld returned 1 exit status
CMakeFiles/cmTC_74e77.dir/build.make:98: recipe for target 'cmTC_74e77' failed
make[1]: *** [cmTC_74e77] Error 1
make[1]: Leaving directory '/home/xxx/work/yolov8/tensorrtx/yolov8/build/CMakeFiles/CMakeTmp'
Makefile:127: recipe for target 'cmTC_74e77/fast' failed
make: *** [cmTC_74e77/fast] Error 2
2、模型推理
jetson orin nano 部署yolov8模型-Python_jetson orin nano yolov8-CSDN博客
https://zhuanlan.zhihu.com/p/665546297
声明
本文内容仅代表作者观点,或转载于其他网站,本站不以此文作为商业用途
如有涉及侵权,请联系本站进行删除
转载本站原创文章,请注明来源及作者。