|
Jetson Inference
DNN Vision Library
|
Go to the documentation of this file.
23 #ifndef __TENSOR_NET_H__
24 #define __TENSOR_NET_H__
43 #if NV_TENSORRT_MAJOR >= 6
46 #define DIMS_C(x) x.d[0]
47 #define DIMS_H(x) x.d[1]
48 #define DIMS_W(x) x.d[2]
50 #elif NV_TENSORRT_MAJOR >= 2
51 typedef nvinfer1::DimsCHW
Dims3;
53 #define DIMS_C(x) x.d[0]
54 #define DIMS_H(x) x.d[1]
55 #define DIMS_W(x) x.d[2]
64 #ifndef NV_TENSORRT_MAJOR
65 #define NV_TENSORRT_MAJOR 1
66 #define NV_TENSORRT_MINOR 0
70 #if NV_TENSORRT_MAJOR >= 8
71 #define NOEXCEPT noexcept
82 #define TENSORRT_VERSION_CHECK(major, minor, patch) (NV_TENSORRT_MAJOR > major || (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR > minor) || (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR == minor && NV_TENSORRT_PATCH >= patch))
88 #define DEFAULT_MAX_BATCH_SIZE 1
94 #define LOG_TRT "[TRT] "
235 bool LoadNetwork(
const char* prototxt,
const char* model,
const char* mean=NULL,
236 const char* input_blob=
"data",
const char* output_blob=
"prob",
239 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
250 bool LoadNetwork(
const char* prototxt,
const char* model,
const char* mean,
251 const char* input_blob,
const std::vector<std::string>& output_blobs,
254 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
265 bool LoadNetwork(
const char* prototxt,
const char* model,
const char* mean,
266 const std::vector<std::string>& input_blobs,
267 const std::vector<std::string>& output_blobs,
271 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
283 bool LoadNetwork(
const char* prototxt,
const char* model,
const char* mean,
284 const char* input_blob,
const Dims3& input_dims,
285 const std::vector<std::string>& output_blobs,
289 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
301 bool LoadNetwork(
const char* prototxt,
const char* model,
const char* mean,
302 const std::vector<std::string>& input_blobs,
303 const std::vector<Dims3>& input_dims,
304 const std::vector<std::string>& output_blobs,
308 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
317 const std::vector<std::string>& input_blobs,
318 const std::vector<std::string>& output_blobs,
319 nvinfer1::IPluginFactory* pluginFactory=NULL,
321 cudaStream_t stream=NULL );
330 bool LoadEngine(
char* engine_stream,
size_t engine_size,
331 const std::vector<std::string>& input_blobs,
332 const std::vector<std::string>& output_blobs,
333 nvinfer1::IPluginFactory* pluginFactory=NULL,
335 cudaStream_t stream=NULL );
344 bool LoadEngine( nvinfer1::ICudaEngine* engine,
345 const std::vector<std::string>& input_blobs,
346 const std::vector<std::string>& output_blobs,
348 cudaStream_t stream=NULL );
353 bool LoadEngine(
const char* filename,
char** stream,
size_t* size );
360 static bool LoadClassLabels(
const char* filename, std::vector<std::string>& descriptions,
int expectedClasses=-1 );
367 static bool LoadClassLabels(
const char* filename, std::vector<std::string>& descriptions, std::vector<std::string>& synsets,
int expectedClasses=-1 );
374 static bool LoadClassColors(
const char* filename, float4* colors,
int expectedClasses,
float defaultAlpha=255.0f );
381 static bool LoadClassColors(
const char* filename, float4** colors,
int expectedClasses,
float defaultAlpha=255.0f );
575 LogInfo(
LOG_TRT "------------------------------------------------\n");
577 LogInfo(
LOG_TRT "------------------------------------------------\n");
587 LogInfo(
LOG_TRT "------------------------------------------------\n\n");
589 static bool first_run=
true;
593 LogWarning(
LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"
594 " to disable DVFS for more accurate profiling/timing measurements\n\n");
626 bool ProfileModel(
const std::string& deployFile,
const std::string& modelFile,
627 const std::vector<std::string>& inputs,
const std::vector<Dims3>& inputDims,
628 const std::vector<std::string>& outputs, uint32_t maxBatchSize,
630 nvinfer1::IInt8Calibrator* calibrator,
char** engineStream,
size_t* engineSize );
635 #if NV_TENSORRT_MAJOR >= 8
636 bool ConfigureBuilder( nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
637 uint32_t maxBatchSize, uint32_t workspaceSize,
precisionType precision,
639 nvinfer1::IInt8Calibrator* calibrator );
644 nvinfer1::IInt8Calibrator* calibrator );
650 bool ValidateEngine(
const char* model_path,
const char* cache_path,
const char* checksum_path );
660 if( severity == Severity::kWARNING )
664 else if( severity == Severity::kINFO )
668 #if NV_TENSORRT_MAJOR >= 6
669 else if( severity == Severity::kVERBOSE )
703 const uint32_t evt = query*2;
704 const uint32_t flag = (1 << query);
718 const uint32_t evt = query*2+1;
730 LogWarning(
LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"
731 " to disable DVFS for more accurate profiling/timing measurements\n");
740 const uint32_t flag = (1 << query);
762 const uint32_t evt = query*2;
763 float cuda_time = 0.0f;
static std::vector< precisionType > DetectNativePrecisions(deviceType device=DEVICE_GPU)
Detect the precisions supported natively on a device.
@ DEVICE_DLA_0
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:133
Logger class for GIE info/warning/errors.
Definition: tensorNet.h:655
precisionType GetPrecision() const
Retrieve the type of precision being used.
Definition: tensorNet.h:412
@ MODEL_UFF
UFF.
Definition: tensorNet.h:160
static bool DetectNativePrecision(const std::vector< precisionType > &nativeTypes, precisionType type)
Detect if a particular precision is supported natively.
#define CUDA(x)
Execute a CUDA call and print out any errors.
Definition: cudaUtility.h:41
bool ProcessNetwork(bool sync=true)
Execute processing of the network.
@ MODEL_CUSTOM
Created directly with TensorRT API.
Definition: tensorNet.h:157
uint32_t binding
Definition: tensorNet.h:813
#define LogInfo(format, args...)
Log a printf-style info message (Log::INFO)
Definition: logging.h:168
bool mAllowGPUFallback
Definition: tensorNet.h:805
float timeFloat(const timespec &a)
Convert to 32-bit float (in milliseconds).
Definition: timespec.h:149
modelType modelTypeFromStr(const char *str)
Parse the model format from a string.
float GetNetworkFPS()
Retrieve the network frames per second (FPS).
Definition: tensorNet.h:547
uint32_t size
Definition: tensorNet.h:812
std::string mModelFile
Definition: tensorNet.h:781
void timestamp(timespec *timestampOut)
Retrieve a timestamp of the current system time.
Definition: timespec.h:37
@ PROFILER_CPU
CPU walltime.
Definition: tensorNet.h:208
bool LoadNetwork(const char *prototxt, const char *model, const char *mean=NULL, const char *input_blob="data", const char *output_blob="prob", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
Load a new network instance.
@ TYPE_FP16
16-bit floating-point half precision (FP16)
Definition: tensorNet.h:107
const char * deviceTypeToStr(deviceType type)
Stringize function that returns deviceType in text.
const char * GetPrototxtPath() const
Retrieve the path to the network prototxt file.
Definition: tensorNet.h:462
bool mEnableProfiler
Definition: tensorNet.h:803
@ MODEL_ENGINE
TensorRT engine/plan.
Definition: tensorNet.h:161
uint32_t GetOutputSize(uint32_t layer=0) const
Retrieve the size (in bytes) of network output layer.
Definition: tensorNet.h:537
bool IsModelType(modelType type) const
Return true if the model is of the specified format.
Definition: tensorNet.h:482
bool ValidateEngine(const char *model_path, const char *cache_path, const char *checksum_path)
Validate that the model already has a built TensorRT engine that exists and doesn't need updating.
@ DEVICE_DLA
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:132
void EnableLayerProfiler()
Manually enable layer profiling times.
profilerDevice
Profiler device.
Definition: tensorNet.h:206
@ TYPE_INT8
8-bit integer precision (INT8)
Definition: tensorNet.h:108
virtual void reportLayerTime(const char *layerName, float ms) NOEXCEPT
Definition: tensorNet.h:689
const char * precisionTypeToStr(precisionType type)
Stringize function that returns precisionType in text.
bool IsPrecision(precisionType type) const
Check if a particular precision is being used.
Definition: tensorNet.h:417
uint32_t mWorkspaceSize
Definition: tensorNet.h:801
std::string mMeanPath
Definition: tensorNet.h:782
#define LOG_TRT
Prefix used for tagging printed log output from TensorRT.
Definition: tensorNet.h:94
@ PROFILER_VISUALIZE
Definition: tensorNet.h:192
tensorNet::Profiler gProfiler
void log(Severity severity, const char *msg) NOEXCEPT override
Definition: tensorNet.h:658
float2 mProfilerTimes[PROFILER_TOTAL+1]
Definition: tensorNet.h:798
const char * profilerQueryToStr(profilerQuery query)
Stringize function that returns profilerQuery in text.
float * CPU
Definition: tensorNet.h:814
profilerQuery
Profiling queries.
Definition: tensorNet.h:187
bool LoadEngine(const char *engine_filename, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, nvinfer1::IPluginFactory *pluginFactory=NULL, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
Load a network instance from a serialized engine plan file.
const char * GetNetworkName() const
Retrieve the network name (it's filename).
Definition: tensorNet.h:557
@ DEVICE_GPU
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice()
Definition: tensorNet.h:131
modelType
Enumeration indicating the format of the model that's imported in TensorRT (either caffe,...
Definition: tensorNet.h:155
@ PROFILER_CUDA
CUDA kernel time.
Definition: tensorNet.h:209
float timingAccumulator
Definition: tensorNet.h:695
nvinfer1::Dims3 Dims3
Definition: tensorNet.h:58
static precisionType FindFastestPrecision(deviceType device=DEVICE_GPU, bool allowInt8=true)
Determine the fastest native precision on a device.
void PrintProfilerTimes()
Print the profiler times (in millseconds).
Definition: tensorNet.h:572
uint8_t classID
The class ID of the point.
Definition: cudaPointCloud.h:17
uint32_t GetInputWidth(uint32_t layer=0) const
Retrieve the width of network input layer.
Definition: tensorNet.h:502
uint32_t GetOutputHeight(uint32_t layer=0) const
Retrieve the height of network output layer.
Definition: tensorNet.h:532
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:129
cudaStream_t mStream
Definition: tensorNet.h:790
modelType GetModelType() const
Retrieve the format of the network model.
Definition: tensorNet.h:477
uint32_t GetOutputLayers() const
Retrieve the number of output layers to the network.
Definition: tensorNet.h:492
void EnableDebug()
Manually enable debug messages and synchronization.
void ** mBindings
Definition: tensorNet.h:806
Definition: tensorNet.h:27
#define NOEXCEPT
Definition: tensorNet.h:73
uint32_t mMaxBatchSize
Definition: tensorNet.h:802
@ PROFILER_POSTPROCESS
Definition: tensorNet.h:191
uint32_t GetInputHeight(uint32_t layer=0) const
Retrieve the height of network input layer.
Definition: tensorNet.h:507
static bool LoadClassLabels(const char *filename, std::vector< std::string > &descriptions, int expectedClasses=-1)
Load class descriptions from a label file.
bool ConfigureBuilder(nvinfer1::IBuilder *builder, uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator)
Configure builder options.
precisionType precisionTypeFromStr(const char *str)
Parse the precision type from a string.
std::string mCacheEnginePath
Definition: tensorNet.h:783
float2 GetProfilerTime(profilerQuery query)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:562
modelType mModelType
Definition: tensorNet.h:789
#define DIMS_H(x)
Definition: tensorNet.h:61
@ TYPE_FASTEST
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:105
float * GetOutputPtr(uint32_t layer=0) const
Get the CUDA pointer to the output memory.
Definition: tensorNet.h:542
Profiler()
Definition: tensorNet.h:687
bool mEnableDebug
Definition: tensorNet.h:804
Dims3 GetInputDims(uint32_t layer=0) const
Retrieve the dimensions of network input layer.
Definition: tensorNet.h:497
@ NUM_DEVICES
Number of device types defined.
Definition: tensorNet.h:135
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:102
deviceType deviceTypeFromStr(const char *str)
Parse the device type from a string.
static precisionType SelectPrecision(precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true)
Resolve a desired precision to a specific one that's available.
deviceType GetDevice() const
Retrieve the device being used for execution.
Definition: tensorNet.h:407
#define LogWarning(format, args...)
Log a printf-style warning message (Log::WARNING)
Definition: logging.h:156
modelType modelTypeFromPath(const char *path)
Parse the model format from a file path.
@ DEVICE_DLA_1
Deep Learning Accelerator (DLA) Core 1 (only on Jetson Xavier)
Definition: tensorNet.h:134
bool AllowGPUFallback() const
Return true if GPU fallback is enabled.
Definition: tensorNet.h:402
void SetStream(cudaStream_t stream)
Set the stream that the device is operating on.
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:218
const char * GetModelPath() const
Retrieve the full path to model file, including the filename.
Definition: tensorNet.h:467
std::vector< layerInfo > mInputs
Definition: tensorNet.h:818
nvinfer1::ICudaEngine * mEngine
Definition: tensorNet.h:795
Dims3 GetOutputDims(uint32_t layer=0) const
Retrieve the dimensions of network output layer.
Definition: tensorNet.h:522
@ PROFILER_NETWORK
Definition: tensorNet.h:190
cudaEvent_t mEventsGPU[PROFILER_TOTAL *2]
Definition: tensorNet.h:791
#define DIMS_W(x)
Definition: tensorNet.h:62
@ TYPE_FP32
32-bit floating-point precision (FP32)
Definition: tensorNet.h:106
Dims3 dims
Definition: tensorNet.h:811
@ MODEL_ONNX
ONNX.
Definition: tensorNet.h:159
const char * modelTypeToStr(modelType type)
Stringize function that returns modelType in text.
void PROFILER_END(profilerQuery query)
End a profiling query, after the network is run.
Definition: tensorNet.h:716
float * CUDA
Definition: tensorNet.h:815
nvinfer1::IExecutionContext * mContext
Definition: tensorNet.h:796
#define LogVerbose(format, args...)
Log a printf-style verbose message (Log::VERBOSE)
Definition: logging.h:174
@ NUM_PRECISIONS
Number of precision types defined.
Definition: tensorNet.h:109
precisionType mPrecision
Definition: tensorNet.h:788
uint32_t GetInputSize(uint32_t layer=0) const
Retrieve the size (in bytes) of network input layer.
Definition: tensorNet.h:512
float GetProfilerTime(profilerQuery query, profilerDevice device)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:567
float * GetInputPtr(uint32_t layer=0) const
Get the CUDA pointer to the input layer's memory.
Definition: tensorNet.h:517
std::string mChecksumPath
Definition: tensorNet.h:785
const char * GetModelFilename() const
Retrieve the filename of the file, excluding the directory.
Definition: tensorNet.h:472
deviceType mDevice
Definition: tensorNet.h:787
@ PROFILER_TOTAL
Definition: tensorNet.h:193
std::string name
Definition: tensorNet.h:810
uint32_t GetOutputWidth(uint32_t layer=0) const
Retrieve the width of network output layer.
Definition: tensorNet.h:527
@ MODEL_CAFFE
caffemodel
Definition: tensorNet.h:158
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:88
cudaStream_t GetStream() const
Retrieve the stream that the device is operating on.
Definition: tensorNet.h:447
timespec mEventsCPU[PROFILER_TOTAL *2]
Definition: tensorNet.h:792
uint32_t mProfilerQueriesUsed
Definition: tensorNet.h:799
@ PROFILER_PREPROCESS
Definition: tensorNet.h:189
uint32_t GetInputLayers() const
Retrieve the number of input layers to the network.
Definition: tensorNet.h:487
virtual ~tensorNet()
Destory.
static float4 GenerateColor(uint32_t classID, float alpha=255.0f)
Procedurally generate a color for a given class index with the specified alpha value.
#define LogError(format, args...)
Log a printf-style error message (Log::ERROR)
Definition: logging.h:150
bool ProfileModel(const std::string &deployFile, const std::string &modelFile, const std::vector< std::string > &inputs, const std::vector< Dims3 > &inputDims, const std::vector< std::string > &outputs, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator, char **engineStream, size_t *engineSize)
Create and output an optimized network model.
bool PROFILER_QUERY(profilerQuery query)
Query the CUDA part of a profiler query.
Definition: tensorNet.h:738
std::string mPrototxtPath
Definition: tensorNet.h:779
static bool LoadClassColors(const char *filename, float4 *colors, int expectedClasses, float defaultAlpha=255.0f)
Load class colors from a text file.
std::string mCacheCalibrationPath
Definition: tensorNet.h:784
tensorNet::Logger gLogger
cudaStream_t CreateStream(bool nonBlocking=true)
Create and use a new stream for execution.
nvinfer1::IRuntime * mInfer
Definition: tensorNet.h:794
void PROFILER_BEGIN(profilerQuery query)
Begin a profiling query, before network is run.
Definition: tensorNet.h:701
Profiler interface for measuring layer timings.
Definition: tensorNet.h:684
float GetNetworkTime()
Retrieve the network runtime (in milliseconds).
Definition: tensorNet.h:552
uint32_t mProfilerQueriesDone
Definition: tensorNet.h:800
std::vector< layerInfo > mOutputs
Definition: tensorNet.h:819
Definition: tensorNet.h:808
std::string mModelPath
Definition: tensorNet.h:780
__device__ cudaVectorTypeInfo< T >::Base alpha(T vec, typename cudaVectorTypeInfo< T >::Base default_alpha=255)
Definition: cudaVector.h:98
void timeDiff(const timespec &start, const timespec &end, timespec *result)
Find the difference between two timestamps.
Definition: timespec.h:73
@ TYPE_DISABLED
Unknown, unspecified, or disabled type.
Definition: tensorNet.h:104