23 #ifndef __TENSOR_NET_H__ 24 #define __TENSOR_NET_H__ 40 #if NV_TENSORRT_MAJOR > 1 41 typedef nvinfer1::DimsCHW
Dims3;
43 #define DIMS_C(x) x.d[0] 44 #define DIMS_H(x) x.d[1] 45 #define DIMS_W(x) x.d[2] 54 #ifndef NV_TENSORRT_MAJOR 55 #define NV_TENSORRT_MAJOR 1 56 #define NV_TENSORRT_MINOR 0 65 #define DEFAULT_MAX_BATCH_SIZE 1 71 #define LOG_TRT "[TRT] " 205 bool LoadNetwork(
const char* prototxt,
const char* model,
const char* mean=NULL,
206 const char* input_blob=
"data",
const char* output_blob=
"prob",
209 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
220 bool LoadNetwork(
const char* prototxt,
const char* model,
const char* mean,
221 const char* input_blob,
const std::vector<std::string>& output_blobs,
224 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
236 bool LoadNetwork(
const char* prototxt,
const char* model,
const char* mean,
237 const char* input_blob,
const Dims3& input_dims,
238 const std::vector<std::string>& output_blobs,
242 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
247 void EnableLayerProfiler();
287 static bool DetectNativePrecision(
const std::vector<precisionType>& nativeTypes,
precisionType type );
297 inline cudaStream_t
GetStream()
const {
return mStream; }
302 cudaStream_t CreateStream(
bool nonBlocking=
true );
307 void SetStream( cudaStream_t stream );
350 printf(
LOG_TRT "----------------------------------------------\n");
351 printf(
LOG_TRT "Timing Report %s\n", GetModelPath());
352 printf(
LOG_TRT "----------------------------------------------\n");
358 if( PROFILER_QUERY(query) )
362 printf(
LOG_TRT "----------------------------------------------\n\n");
364 static bool first_run=
true;
368 printf(
LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n" 369 " to disable DVFS for more accurate profiling/timing measurements\n\n");
392 bool ProfileModel(
const std::string& deployFile,
const std::string& modelFile,
393 const char* input,
const Dims3& inputDims,
394 const std::vector<std::string>& outputs, uint32_t maxBatchSize,
396 nvinfer1::IInt8Calibrator* calibrator, std::ostream& modelStream);
403 void log( Severity severity,
const char* msg )
override 405 if( severity != Severity::kINFO )
420 printf(
LOG_TRT "layer %s - %f ms\n", layerName, ms);
421 timingAccumulator += ms;
433 const uint32_t evt = query*2;
434 const uint32_t flag = (1 << query);
436 CUDA(cudaEventRecord(mEventsGPU[evt], mStream));
439 mProfilerQueriesUsed |= flag;
440 mProfilerQueriesDone &= ~flag;
448 const uint32_t evt = query*2+1;
450 CUDA(cudaEventRecord(mEventsGPU[evt]));
453 timeDiff(mEventsCPU[evt-1], mEventsCPU[evt], &cpuTime);
454 mProfilerTimes[query].x =
timeFloat(cpuTime);
458 printf(
LOG_TRT "layer network time - %f ms\n", gProfiler.timingAccumulator);
459 gProfiler.timingAccumulator = 0.0f;
460 printf(
LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n" 461 " to disable DVFS for more accurate profiling/timing measurements\n");
470 const uint32_t flag = (1 << query);
488 else if( mProfilerQueriesUsed & flag )
490 if( !(mProfilerQueriesDone & flag) )
492 const uint32_t evt = query*2;
493 float cuda_time = 0.0f;
494 CUDA(cudaEventElapsedTime(&cuda_time, mEventsGPU[evt], mEventsGPU[evt+1]));
495 mProfilerTimes[query].y = cuda_time;
496 mProfilerQueriesDone |= flag;
modelType
Enumeration indicating the format of the model that's imported in TensorRT (either caffe...
Definition: tensorNet.h:132
modelType modelTypeFromStr(const char *str)
Parse the model format from a string.
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:79
float timeFloat(const timespec &a)
Convert to 32-bit float (in milliseconds).
Definition: timespec.h:132
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice() ...
Definition: tensorNet.h:108
profilerQuery
Profiling queries.
Definition: tensorNet.h:157
float2 GetProfilerTime(profilerQuery query)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:337
float GetNetworkTime()
Retrieve the network runtime (in milliseconds).
Definition: tensorNet.h:332
bool mAllowGPUFallback
Definition: tensorNet.h:538
deviceType GetDevice() const
Retrieve the device being used for execution.
Definition: tensorNet.h:262
deviceType deviceTypeFromStr(const char *str)
Parse the device type from a string.
float GetProfilerTime(profilerQuery query, profilerDevice device)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:342
uint32_t mMaxBatchSize
Definition: tensorNet.h:535
Dims3 dims
Definition: tensorNet.h:545
Definition: tensorNet.h:163
Logger class for GIE info/warning/errors.
Definition: tensorNet.h:401
const char * deviceTypeToStr(deviceType type)
Stringize function that returns deviceType in text.
const char * modelTypeToStr(modelType type)
Stringize function that returns modelType in text.
Definition: tensorNet.h:160
void PROFILER_END(profilerQuery query)
End a profiling query, after the network is run.
Definition: tensorNet.h:446
nvinfer1::ICudaEngine * mEngine
Definition: tensorNet.h:524
std::string mCacheCalibrationPath
Definition: tensorNet.h:514
deviceType mDevice
Definition: tensorNet.h:516
std::vector< outputLayer > mOutputs
Definition: tensorNet.h:551
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:106
precisionType mPrecision
Definition: tensorNet.h:517
virtual void reportLayerTime(const char *layerName, float ms)
Definition: tensorNet.h:418
ONNX.
Definition: tensorNet.h:136
32-bit floating-point precision (FP32)
Definition: tensorNet.h:83
float * CUDA
Definition: tensorNet.h:548
void timestamp(timespec *timestampOut)
Retrieve a timestamp of the current system time.
Definition: timespec.h:36
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:82
#define LOG_TRT
Prefix used for tagging printed log output from TensorRT.
Definition: tensorNet.h:71
Definition: tensorNet.h:27
uint32_t mHeight
Definition: tensorNet.h:528
modelType GetModelType() const
Retrieve the format of the network model.
Definition: tensorNet.h:322
Number of precision types defined.
Definition: tensorNet.h:86
cudaStream_t mStream
Definition: tensorNet.h:519
std::string mModelPath
Definition: tensorNet.h:510
uint32_t mProfilerQueriesUsed
Definition: tensorNet.h:533
Definition: tensorNet.h:542
Definition: tensorNet.h:159
8-bit integer precision (INT8)
Definition: tensorNet.h:85
uint32_t mWidth
Definition: tensorNet.h:527
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:109
Dims3 mInputDims
Definition: tensorNet.h:540
bool PROFILER_QUERY(profilerQuery query)
Query the CUDA part of a profiler query.
Definition: tensorNet.h:468
float * mInputCPU
Definition: tensorNet.h:530
void timeDiff(const timespec &start, const timespec &end, timespec *result)
Find the difference between two timestamps.
Definition: timespec.h:78
const char * GetPrototxtPath() const
Retrieve the path to the network prototxt file.
Definition: tensorNet.h:312
nvinfer1::IRuntime * mInfer
Definition: tensorNet.h:523
bool mEnableProfiler
Definition: tensorNet.h:536
uint32_t mProfilerQueriesDone
Definition: tensorNet.h:534
float timingAccumulator
Definition: tensorNet.h:424
uint32_t mInputSize
Definition: tensorNet.h:529
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:110
bool mEnableDebug
Definition: tensorNet.h:537
Created directly with TensorRT API.
Definition: tensorNet.h:134
std::string mCacheEnginePath
Definition: tensorNet.h:513
16-bit floating-point half precision (FP16)
Definition: tensorNet.h:84
CPU walltime.
Definition: tensorNet.h:178
Unknown, unspecified, or disabled type.
Definition: tensorNet.h:81
UFF.
Definition: tensorNet.h:137
Deep Learning Accelerator (DLA) Core 1 (only on Jetson Xavier)
Definition: tensorNet.h:111
void PROFILER_BEGIN(profilerQuery query)
Begin a profiling query, before network is run.
Definition: tensorNet.h:431
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:65
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:188
bool IsPrecision(precisionType type) const
Check if a particular precision is being used.
Definition: tensorNet.h:272
const char * precisionTypeToStr(precisionType type)
Stringize function that returns precisionType in text.
void PrintProfilerTimes()
Print the profiler times (in millseconds).
Definition: tensorNet.h:347
cudaStream_t GetStream() const
Retrieve the stream that the device is operating on.
Definition: tensorNet.h:297
const char * profilerQueryToStr(profilerQuery query)
Stringize function that returns profilerQuery in text.
float * mInputCUDA
Definition: tensorNet.h:531
bool IsModelType(modelType type) const
Return true if the model is of the specified format.
Definition: tensorNet.h:327
CUDA kernel time.
Definition: tensorNet.h:179
#define CUDA(x)
Execute a CUDA call and print out any errors.
Definition: cudaUtility.h:38
float * CPU
Definition: tensorNet.h:547
modelType mModelType
Definition: tensorNet.h:518
Number of device types defined.
Definition: tensorNet.h:112
precisionType GetPrecision() const
Retrieve the type of precision being used.
Definition: tensorNet.h:267
const char * GetModelPath() const
Retrieve the path to the network model file.
Definition: tensorNet.h:317
Profiler interface for measuring layer timings.
Definition: tensorNet.h:413
Definition: tensorNet.h:161
std::string mPrototxtPath
Definition: tensorNet.h:509
nvinfer1::IExecutionContext * mContext
Definition: tensorNet.h:525
precisionType precisionTypeFromStr(const char *str)
Parse the precision type from a string.
std::string name
Definition: tensorNet.h:544
Definition: tensorNet.h:162
std::string mInputBlobName
Definition: tensorNet.h:512
profilerDevice
Profiler device.
Definition: tensorNet.h:176
caffemodel
Definition: tensorNet.h:135
Profiler()
Definition: tensorNet.h:416
bool AllowGPUFallback() const
Return true if GPU fallback is enabled.
Definition: tensorNet.h:257
nvinfer1::Dims3 Dims3
Definition: tensorNet.h:48
std::string mMeanPath
Definition: tensorNet.h:511
uint32_t size
Definition: tensorNet.h:546