Merge pull request #513 from Nuzhny007/master

Nuzhny007 · web-flow · commit e92b287f1d96 · 2026-04-19T06:41:45.000+03:00
Change default params for very small objects
diff --git a/data/settings_yolov26m.ini b/data/settings_yolov26m.ini
@@ -47,6 +47,7 @@ net_type = YOLOV26
 # INT8
 # FP16
 # FP32
+# FP8
 inference_precision = FP16
 
 
diff --git a/example/MotionDetectorExample.h b/example/MotionDetectorExample.h
@@ -17,7 +17,7 @@ class MotionDetectorExample final : public VideoExample
 {
 public:
     MotionDetectorExample(const cv::CommandLineParser& parser)
-        : VideoExample(parser), m_minObjWidth(10)
+        : VideoExample(parser)
     {
 #ifdef USE_CLIP
 		std::string clipModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336";
@@ -38,8 +38,7 @@ class MotionDetectorExample final : public VideoExample
     {
 		m_logger->info("MotionDetectorExample::InitDetector");
 
-        //m_minObjWidth = frame.cols / 20;
-		m_minObjWidth = 4;
+		m_minObjWidth = 2;
 
         config_t config;
 		config.emplace("useRotatedRect", "0");
@@ -97,7 +96,7 @@ class MotionDetectorExample final : public VideoExample
 
 		if (!m_trackerSettingsLoaded)
 		{
-            m_trackerSettings.SetDistance(tracking::DistJaccard);
+            m_trackerSettings.SetDistance(tracking::DistCenters);
 			m_trackerSettings.m_kalmanType = tracking::KalmanLinear;
 			m_trackerSettings.m_filterGoal = tracking::FilterCenter;
             m_trackerSettings.m_lostTrackType = tracking::TrackNone; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect
diff --git a/src/Detector/ONNXTensorRTDetector.cpp b/src/Detector/ONNXTensorRTDetector.cpp
@@ -72,6 +72,7 @@ bool ONNXTensorRTDetector::Init(const config_t& config)
         dictPrecision["INT8"] = tensor_rt::INT8;
         dictPrecision["FP16"] = tensor_rt::FP16;
         dictPrecision["FP32"] = tensor_rt::FP32;
+		dictPrecision["FP8"] = tensor_rt::FP8;
         auto precision = dictPrecision.find(inference_precision->second);
         if (precision != dictPrecision.end())
             m_localConfig.m_inferencePrecision = precision->second;
diff --git a/src/Detector/tensorrt_onnx/YoloONNX.cpp b/src/Detector/tensorrt_onnx/YoloONNX.cpp
@@ -19,6 +19,7 @@ bool YoloONNX::Init(const SampleYoloParams& params)
 
     m_params = params;
 
+    sample::setReportableSeverity(sample::Logger::Severity::kINFO);
     initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), "");
 
     auto GetBindings = [&]()
@@ -79,15 +80,16 @@ bool YoloONNX::Init(const SampleYoloParams& params)
             file.close();
         }
 
-        nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(sample::gLogger);
+        m_inferRuntime = std::shared_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(sample::gLogger));
         if (m_params.m_dlaCore >= 0)
-            infer->setDLACore(m_params.m_dlaCore);
+            m_inferRuntime->setDLACore(m_params.m_dlaCore);
 
-        m_engine = std::shared_ptr<nvinfer1::ICudaEngine>(infer->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter());
+        m_engine = std::shared_ptr<nvinfer1::ICudaEngine>(m_inferRuntime->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter());
 #if (NV_TENSORRT_MAJOR < 8)
-		infer->destroy();
+        m_inferRuntime->destroy();
+        m_inferRuntime.reset();
 #else
-        //delete infer;
+        //m_inferRuntime.reset();
 #endif
 
         if (m_engine)
@@ -233,6 +235,12 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr<nvinfer1::IBuilder>& builder,
     {
     case tensor_rt::Precision::FP16:
         config->setFlag(nvinfer1::BuilderFlag::kFP16);
+        sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP16)" << std::endl;
+        break;
+
+    case tensor_rt::Precision::FP8:
+        config->setFlag(nvinfer1::BuilderFlag::kFP8);
+        sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP8)" << std::endl;
         break;
 
     case tensor_rt::Precision::INT8:
@@ -243,6 +251,7 @@ bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr<nvinfer1::IBuilder>& builder,
         BatchStream calibrationStream(m_params.m_explicitBatchSize, m_params.m_nbCalBatches, m_params.m_calibrationBatches, m_params.m_dataDirs);
         calibrator.reset(new Int8EntropyCalibrator2<BatchStream>(calibrationStream, 0, "Yolo", m_params.m_inputTensorNames[0].c_str()));
         config->setFlag(nvinfer1::BuilderFlag::kINT8);
+        sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kINT8)" << std::endl;
         config->setInt8Calibrator(calibrator.get());
     }
         break;
diff --git a/src/Detector/tensorrt_onnx/YoloONNX.hpp b/src/Detector/tensorrt_onnx/YoloONNX.hpp
@@ -86,6 +86,7 @@ class YoloONNX
 
 private:    
     std::shared_ptr<nvinfer1::ICudaEngine> m_engine; //!< The TensorRT engine used to run the network
+    std::shared_ptr<nvinfer1::IRuntime> m_inferRuntime;
 
     cv::Mat m_resized;
     std::vector<cv::Mat> m_resizedBatch;
diff --git a/src/Detector/tensorrt_onnx/class_detector.cpp b/src/Detector/tensorrt_onnx/class_detector.cpp
@@ -134,6 +134,8 @@ namespace tensor_rt
             dictprecision[tensor_rt::INT8] = "kINT8";
             dictprecision[tensor_rt::FP16] = "kHALF";
             dictprecision[tensor_rt::FP32] = "kFLOAT";
+            dictprecision[tensor_rt::FP8] = "kFP8";
+
             auto precision = dictprecision.find(m_params.m_precision);
             if (precision != dictprecision.end())
                 precisionStr = precision->second;
diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h
@@ -76,7 +76,8 @@ namespace tensor_rt
 	{
 		INT8 = 0,
 		FP16,
-		FP32
+		FP32,
+        FP8
 	};
 
     ///
diff --git a/src/Detector/tensorrt_onnx/common/sampleInference.cpp b/src/Detector/tensorrt_onnx/common/sampleInference.cpp
@@ -46,6 +46,7 @@
 #include "sampleOptions.h"
 #include "sampleReporting.h"
 #include "sampleUtils.h"
+#include <cuda_fp8.h>
 using namespace nvinfer1;
 namespace sample
 {
@@ -1320,7 +1321,15 @@ void Binding::fill()
         fillBuffer<uint8_t>(buffer->getHostBuffer(), volume, 0, 255);
         break;
     }
-    case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported");
+    case nvinfer1::DataType::kFP8:
+    {
+#if 0
+        ASSERT(false && "FP8 is not supported");
+#else
+        fillBuffer<__nv_fp8_e4m3>(buffer->getHostBuffer(), volume, __nv_fp8_e4m3(- 1.0f), __nv_fp8_e4m3(1.0f));
+#endif
+        break;
+    }
 #if (NV_TENSORRT_MAJOR > 8)
     case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported");
 #endif
@@ -1388,7 +1397,15 @@ void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
         break;
     }
 #endif
-    case nvinfer1::DataType::kFP8: ASSERT(false && "FP8 is not supported");
+    case nvinfer1::DataType::kFP8:
+    {
+#if 0
+        ASSERT(false && "FP8 is not supported");
+#else
+        dumpBuffer<__nv_fp8_e4m3>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+#endif
+        break;
+    }
 #if (NV_TENSORRT_MAJOR > 8)
     case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported");
 #endif
diff --git a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp
@@ -18,6 +18,7 @@
 #include "sampleUtils.h"
 #include "bfloat16.h"
 #include "half.h"
+#include <cuda_fp8.h>
 
 using namespace nvinfer1;
 
@@ -433,6 +434,11 @@ void print(std::ostream& os, __half v)
     os << static_cast<float>(v);
 }
 
+void print(std::ostream& os, __nv_fp8_e4m3 v)
+{
+    os << static_cast<float>(v);
+}
+
 template <typename T>
 void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
     Dims const& strides, int32_t vectorDim, int32_t spv)
@@ -482,6 +488,8 @@ template void dumpBuffer<uint8_t>(void const* buffer, std::string const& separat
     Dims const& strides, int32_t vectorDim, int32_t spv);
 template void dumpBuffer<int64_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
     Dims const& strides, int32_t vectorDim, int32_t spv);
+template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
 
 template <typename T>
 void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
@@ -566,7 +574,7 @@ void fillBuffer(void* buffer, int64_t volume, T min, T max)
 {
     T* typedBuffer = static_cast<T*>(buffer);
     std::default_random_engine engine;
-    std::uniform_real_distribution<float> distribution(min, max);
+    std::uniform_real_distribution<float> distribution((float)min, (float)max);
     auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
     std::generate(typedBuffer, typedBuffer + volume, generator);
 }
@@ -580,6 +588,7 @@ template void fillBuffer<int8_t>(void* buffer, int64_t volume, int8_t min, int8_
 template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max);
 template void fillBuffer<BFloat16>(void* buffer, int64_t volume, BFloat16 min, BFloat16 max);
 template void fillBuffer<uint8_t>(void* buffer, int64_t volume, uint8_t min, uint8_t max);
+template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, __nv_fp8_e4m3 min, __nv_fp8_e4m3 max);
 
 bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target)
 {

-Original file line number
+Diff line change
 # INT8
 # FP16
 # FP32
 +# FP8
 inference_precision = FP16
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@ class MotionDetectorExample final : public VideoExample`
`17`	`17`	`{`
`18`	`18`	`public:`
`19`	`19`	`MotionDetectorExample(const cv::CommandLineParser& parser)`
`20`		`- : VideoExample(parser), m_minObjWidth(10)`
	`20`	`+ : VideoExample(parser)`
`21`	`21`	`{`
`22`	`22`	`#ifdef USE_CLIP`
`23`	`23`	`std::string clipModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336";`
`@@ -38,8 +38,7 @@ class MotionDetectorExample final : public VideoExample`
`38`	`38`	`{`
`39`	`39`	`m_logger->info("MotionDetectorExample::InitDetector");`
`40`	`40`
`41`		`- //m_minObjWidth = frame.cols / 20;`
`42`		`- m_minObjWidth = 4;`
	`41`	`+ m_minObjWidth = 2;`
`43`	`42`
`44`	`43`	`config_t config;`
`45`	`44`	`config.emplace("useRotatedRect", "0");`
`@@ -97,7 +96,7 @@ class MotionDetectorExample final : public VideoExample`
`97`	`96`
`98`	`97`	`if (!m_trackerSettingsLoaded)`
`99`	`98`	`{`
`100`		`- m_trackerSettings.SetDistance(tracking::DistJaccard);`
	`99`	`+ m_trackerSettings.SetDistance(tracking::DistCenters);`
`101`	`100`	`m_trackerSettings.m_kalmanType = tracking::KalmanLinear;`
`102`	`101`	`m_trackerSettings.m_filterGoal = tracking::FilterCenter;`
`103`	`102`	`m_trackerSettings.m_lostTrackType = tracking::TrackNone; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect`
Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,8 @@ namespace tensor_rt`
`76`	`76`	`{`
`77`	`77`	`INT8 = 0,`
`78`	`78`	`FP16,`
`79`		`- FP32`
	`79`	`+ FP32,`
	`80`	`+ FP8`
`80`	`81`	`};`
`81`	`82`
`82`	`83`	`///`