Jetson Inference
DNN Vision Library
tensorNet.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef __TENSOR_NET_H__
24 #define __TENSOR_NET_H__
25 
26 // forward declaration of IInt8Calibrator
27 namespace nvinfer1 { class IInt8Calibrator; }
28 
29 // includes
30 #include <NvInfer.h>
31 
35 #include <jetson-utils/timespec.h>
36 #include <jetson-utils/logging.h>
37 
38 #include <vector>
39 #include <sstream>
40 #include <math.h>
41 
42 
43 #if NV_TENSORRT_MAJOR >= 6
44 typedef nvinfer1::Dims3 Dims3;
45 
46 #define DIMS_C(x) x.d[0]
47 #define DIMS_H(x) x.d[1]
48 #define DIMS_W(x) x.d[2]
49 
50 #elif NV_TENSORRT_MAJOR >= 2
51 typedef nvinfer1::DimsCHW Dims3;
52 
53 #define DIMS_C(x) x.d[0]
54 #define DIMS_H(x) x.d[1]
55 #define DIMS_W(x) x.d[2]
56 
57 #else
59 
60 #define DIMS_C(x) x.c
61 #define DIMS_H(x) x.h
62 #define DIMS_W(x) x.w
63 
64 #ifndef NV_TENSORRT_MAJOR
65 #define NV_TENSORRT_MAJOR 1
66 #define NV_TENSORRT_MINOR 0
67 #endif
68 #endif
69 
70 #if NV_TENSORRT_MAJOR >= 8
71 #define NOEXCEPT noexcept
72 #else
73 #define NOEXCEPT
74 #endif
75 
76 
81 #define DEFAULT_MAX_BATCH_SIZE 1
82 
87 #define LOG_TRT "[TRT] "
88 
89 
96 {
103 };
104 
109 const char* precisionTypeToStr( precisionType type );
110 
115 precisionType precisionTypeFromStr( const char* str );
116 
123 {
129 };
130 
135 const char* deviceTypeToStr( deviceType type );
136 
141 deviceType deviceTypeFromStr( const char* str );
142 
149 {
155 };
156 
161 const char* modelTypeToStr( modelType type );
162 
167 modelType modelTypeFromStr( const char* str );
168 
173 modelType modelTypeFromPath( const char* path );
174 
181 {
187 };
188 
193 const char* profilerQueryToStr( profilerQuery query );
194 
200 {
203 };
204 
205 
212 {
213 public:
217  virtual ~tensorNet();
218 
228  bool LoadNetwork( const char* prototxt, const char* model, const char* mean=NULL,
229  const char* input_blob="data", const char* output_blob="prob",
230  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
231  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
232  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
233 
243  bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
244  const char* input_blob, const std::vector<std::string>& output_blobs,
245  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
246  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
247  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
248 
258  bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
259  const std::vector<std::string>& input_blobs,
260  const std::vector<std::string>& output_blobs,
261  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
262  precisionType precision=TYPE_FASTEST,
263  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
264  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
265 
276  bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
277  const char* input_blob, const Dims3& input_dims,
278  const std::vector<std::string>& output_blobs,
279  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
280  precisionType precision=TYPE_FASTEST,
281  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
282  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
283 
294  bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
295  const std::vector<std::string>& input_blobs,
296  const std::vector<Dims3>& input_dims,
297  const std::vector<std::string>& output_blobs,
298  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
299  precisionType precision=TYPE_FASTEST,
300  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
301  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
302 
309  bool LoadEngine( const char* engine_filename,
310  const std::vector<std::string>& input_blobs,
311  const std::vector<std::string>& output_blobs,
312  nvinfer1::IPluginFactory* pluginFactory=NULL,
313  deviceType device=DEVICE_GPU,
314  cudaStream_t stream=NULL );
315 
323  bool LoadEngine( char* engine_stream, size_t engine_size,
324  const std::vector<std::string>& input_blobs,
325  const std::vector<std::string>& output_blobs,
326  nvinfer1::IPluginFactory* pluginFactory=NULL,
327  deviceType device=DEVICE_GPU,
328  cudaStream_t stream=NULL );
329 
337  bool LoadEngine( nvinfer1::ICudaEngine* engine,
338  const std::vector<std::string>& input_blobs,
339  const std::vector<std::string>& output_blobs,
340  deviceType device=DEVICE_GPU,
341  cudaStream_t stream=NULL );
342 
346  bool LoadEngine( const char* filename, char** stream, size_t* size );
347 
351  void EnableLayerProfiler();
352 
356  void EnableDebug();
357 
361  inline bool AllowGPUFallback() const { return mAllowGPUFallback; }
362 
366  inline deviceType GetDevice() const { return mDevice; }
367 
371  inline precisionType GetPrecision() const { return mPrecision; }
372 
376  inline bool IsPrecision( precisionType type ) const { return (mPrecision == type); }
377 
381  static precisionType SelectPrecision( precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true );
382 
386  static precisionType FindFastestPrecision( deviceType device=DEVICE_GPU, bool allowInt8=true );
387 
391  static std::vector<precisionType> DetectNativePrecisions( deviceType device=DEVICE_GPU );
392 
396  static bool DetectNativePrecision( const std::vector<precisionType>& nativeTypes, precisionType type );
397 
401  static bool DetectNativePrecision( precisionType precision, deviceType device=DEVICE_GPU );
402 
406  inline cudaStream_t GetStream() const { return mStream; }
407 
411  cudaStream_t CreateStream( bool nonBlocking=true );
412 
416  void SetStream( cudaStream_t stream );
417 
421  inline const char* GetPrototxtPath() const { return mPrototxtPath.c_str(); }
422 
426  inline const char* GetModelPath() const { return mModelPath.c_str(); }
427 
431  inline modelType GetModelType() const { return mModelType; }
432 
436  inline bool IsModelType( modelType type ) const { return (mModelType == type); }
437 
441  inline uint32_t GetInputLayers() const { return mInputs.size(); }
442 
446  inline uint32_t GetOutputLayers() const { return mOutputs.size(); }
447 
451  inline Dims3 GetInputDims( uint32_t layer=0 ) const { return mInputs[layer].dims; }
452 
456  inline uint32_t GetInputWidth( uint32_t layer=0 ) const { return DIMS_W(mInputs[layer].dims); }
457 
461  inline uint32_t GetInputHeight( uint32_t layer=0 ) const { return DIMS_H(mInputs[layer].dims); }
462 
466  inline uint32_t GetInputSize( uint32_t layer=0 ) const { return mInputs[layer].size; }
467 
471  inline Dims3 GetOutputDims( uint32_t layer=0 ) const { return mOutputs[layer].dims; }
472 
476  inline uint32_t GetOutputWidth( uint32_t layer=0 ) const { return DIMS_W(mOutputs[layer].dims); }
477 
481  inline uint32_t GetOutputHeight( uint32_t layer=0 ) const { return DIMS_H(mOutputs[layer].dims); }
482 
486  inline uint32_t GetOutputSize( uint32_t layer=0 ) const { return mOutputs[layer].size; }
487 
491  inline float GetNetworkFPS() { return 1000.0f / GetNetworkTime(); }
492 
496  inline float GetNetworkTime() { return GetProfilerTime(PROFILER_NETWORK, PROFILER_CUDA); }
497 
501  inline float2 GetProfilerTime( profilerQuery query ) { PROFILER_QUERY(query); return mProfilerTimes[query]; }
502 
506  inline float GetProfilerTime( profilerQuery query, profilerDevice device ) { PROFILER_QUERY(query); return (device == PROFILER_CPU) ? mProfilerTimes[query].x : mProfilerTimes[query].y; }
507 
511  inline void PrintProfilerTimes()
512  {
513  LogInfo("\n");
514  LogInfo(LOG_TRT "------------------------------------------------\n");
515  LogInfo(LOG_TRT "Timing Report %s\n", GetModelPath());
516  LogInfo(LOG_TRT "------------------------------------------------\n");
517 
518  for( uint32_t n=0; n <= PROFILER_TOTAL; n++ )
519  {
520  const profilerQuery query = (profilerQuery)n;
521 
522  if( PROFILER_QUERY(query) )
523  LogInfo(LOG_TRT "%-12s CPU %9.5fms CUDA %9.5fms\n", profilerQueryToStr(query), mProfilerTimes[n].x, mProfilerTimes[n].y);
524  }
525 
526  LogInfo(LOG_TRT "------------------------------------------------\n\n");
527 
528  static bool first_run=true;
529 
530  if( first_run )
531  {
532  LogWarning(LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"
533  " to disable DVFS for more accurate profiling/timing measurements\n\n");
534 
535  first_run = false;
536  }
537  }
538 
539 protected:
540 
544  tensorNet();
545 
553  bool ProcessNetwork( bool sync=true );
554 
565  bool ProfileModel( const std::string& deployFile, const std::string& modelFile,
566  const std::vector<std::string>& inputs, const std::vector<Dims3>& inputDims,
567  const std::vector<std::string>& outputs, uint32_t maxBatchSize,
568  precisionType precision, deviceType device, bool allowGPUFallback,
569  nvinfer1::IInt8Calibrator* calibrator, char** engineStream, size_t* engineSize );
570 
574 #if NV_TENSORRT_MAJOR >= 8
575  bool ConfigureBuilder( nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
576  uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision,
577  deviceType device, bool allowGPUFallback,
578  nvinfer1::IInt8Calibrator* calibrator );
579 #else
580  bool ConfigureBuilder( nvinfer1::IBuilder* builder, uint32_t maxBatchSize,
581  uint32_t workspaceSize, precisionType precision,
582  deviceType device, bool allowGPUFallback,
583  nvinfer1::IInt8Calibrator* calibrator );
584 #endif
585 
589  class Logger : public nvinfer1::ILogger
590  {
591  public:
592  void log( Severity severity, const char* msg ) NOEXCEPT override
593  {
594  if( severity == Severity::kWARNING )
595  {
596  LogWarning(LOG_TRT "%s\n", msg);
597  }
598  else if( severity == Severity::kINFO )
599  {
600  LogInfo(LOG_TRT "%s\n", msg);
601  }
602  #if NV_TENSORRT_MAJOR >= 6
603  else if( severity == Severity::kVERBOSE )
604  {
605  LogVerbose(LOG_TRT "%s\n", msg);
606  }
607  #endif
608  else
609  {
610  LogError(LOG_TRT "%s\n", msg);
611  }
612  }
613  } static gLogger;
614 
618  class Profiler : public nvinfer1::IProfiler
619  {
620  public:
621  Profiler() : timingAccumulator(0.0f) { }
622 
623  virtual void reportLayerTime(const char* layerName, float ms) NOEXCEPT
624  {
625  LogVerbose(LOG_TRT "layer %s - %f ms\n", layerName, ms);
626  timingAccumulator += ms;
627  }
628 
630  } gProfiler;
631 
635  inline void PROFILER_BEGIN( profilerQuery query )
636  {
637  const uint32_t evt = query*2;
638  const uint32_t flag = (1 << query);
639 
640  CUDA(cudaEventRecord(mEventsGPU[evt], mStream));
641  timestamp(&mEventsCPU[evt]);
642 
643  mProfilerQueriesUsed |= flag;
644  mProfilerQueriesDone &= ~flag;
645  }
646 
650  inline void PROFILER_END( profilerQuery query )
651  {
652  const uint32_t evt = query*2+1;
653 
654  CUDA(cudaEventRecord(mEventsGPU[evt]));
655  timestamp(&mEventsCPU[evt]);
656  timespec cpuTime;
657  timeDiff(mEventsCPU[evt-1], mEventsCPU[evt], &cpuTime);
658  mProfilerTimes[query].x = timeFloat(cpuTime);
659 
660  if( mEnableProfiler && query == PROFILER_NETWORK )
661  {
662  LogVerbose(LOG_TRT "layer network time - %f ms\n", gProfiler.timingAccumulator);
663  gProfiler.timingAccumulator = 0.0f;
664  LogWarning(LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"
665  " to disable DVFS for more accurate profiling/timing measurements\n");
666  }
667  }
668 
672  inline bool PROFILER_QUERY( profilerQuery query )
673  {
674  const uint32_t flag = (1 << query);
675 
676  if( query == PROFILER_TOTAL )
677  {
678  mProfilerTimes[PROFILER_TOTAL].x = 0.0f;
679  mProfilerTimes[PROFILER_TOTAL].y = 0.0f;
680 
681  for( uint32_t n=0; n < PROFILER_TOTAL; n++ )
682  {
683  if( PROFILER_QUERY((profilerQuery)n) )
684  {
685  mProfilerTimes[PROFILER_TOTAL].x += mProfilerTimes[n].x;
686  mProfilerTimes[PROFILER_TOTAL].y += mProfilerTimes[n].y;
687  }
688  }
689 
690  return true;
691  }
692  else if( mProfilerQueriesUsed & flag )
693  {
694  if( !(mProfilerQueriesDone & flag) )
695  {
696  const uint32_t evt = query*2;
697  float cuda_time = 0.0f;
698  CUDA(cudaEventElapsedTime(&cuda_time, mEventsGPU[evt], mEventsGPU[evt+1]));
699  mProfilerTimes[query].y = cuda_time;
700  mProfilerQueriesDone |= flag;
701  //mProfilerQueriesUsed &= ~flag;
702  }
703 
704  return true;
705  }
706 
707  return false;
708  }
709 
710 protected:
711 
712  /* Member Variables */
713  std::string mPrototxtPath;
714  std::string mModelPath;
715  std::string mMeanPath;
716  std::string mCacheEnginePath;
718 
722  cudaStream_t mStream;
723  cudaEvent_t mEventsGPU[PROFILER_TOTAL * 2];
724  timespec mEventsCPU[PROFILER_TOTAL * 2];
725 
726  nvinfer1::IRuntime* mInfer;
727  nvinfer1::ICudaEngine* mEngine;
728  nvinfer1::IExecutionContext* mContext;
729 
730  float2 mProfilerTimes[PROFILER_TOTAL + 1];
733  uint32_t mWorkspaceSize;
734  uint32_t mMaxBatchSize;
738  void** mBindings;
739 
740  struct layerInfo
741  {
742  std::string name;
744  uint32_t size;
745  uint32_t binding;
746  float* CPU;
747  float* CUDA;
748  };
749 
750  std::vector<layerInfo> mInputs;
751  std::vector<layerInfo> mOutputs;
752 };
753 
754 #endif
modelType
Enumeration indicating the format of the model that&#39;s imported in TensorRT (either caffe...
Definition: tensorNet.h:148
modelType modelTypeFromStr(const char *str)
Parse the model format from a string.
uint32_t GetOutputLayers() const
Retrieve the number of output layers to the network.
Definition: tensorNet.h:446
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:95
float timeFloat(const timespec &a)
Convert to 32-bit float (in milliseconds).
Definition: timespec.h:143
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice() ...
Definition: tensorNet.h:124
uint32_t GetOutputHeight(uint32_t layer=0) const
Retrieve the height of network output layer.
Definition: tensorNet.h:481
Dims3 GetOutputDims(uint32_t layer=0) const
Retrieve the dimensions of network output layer.
Definition: tensorNet.h:471
uint32_t size
Definition: tensorNet.h:744
profilerQuery
Profiling queries.
Definition: tensorNet.h:180
float2 GetProfilerTime(profilerQuery query)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:501
modelType modelTypeFromPath(const char *path)
Parse the model format from a file path.
uint32_t GetOutputSize(uint32_t layer=0) const
Retrieve the size (in bytes) of network output layer.
Definition: tensorNet.h:486
#define DIMS_W(x)
Definition: tensorNet.h:62
float GetNetworkTime()
Retrieve the network runtime (in milliseconds).
Definition: tensorNet.h:496
bool mAllowGPUFallback
Definition: tensorNet.h:737
#define CUDA(x)
Execute a CUDA call and print out any errors.
Definition: cudaUtility.h:41
deviceType GetDevice() const
Retrieve the device being used for execution.
Definition: tensorNet.h:366
Dims3 GetInputDims(uint32_t layer=0) const
Retrieve the dimensions of network input layer.
Definition: tensorNet.h:451
deviceType deviceTypeFromStr(const char *str)
Parse the device type from a string.
float GetProfilerTime(profilerQuery query, profilerDevice device)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:506
uint32_t mMaxBatchSize
Definition: tensorNet.h:734
Definition: tensorNet.h:186
uint32_t binding
Definition: tensorNet.h:745
float * CPU
Definition: tensorNet.h:746
float GetNetworkFPS()
Retrieve the network frames per second (FPS).
Definition: tensorNet.h:491
Logger class for GIE info/warning/errors.
Definition: tensorNet.h:589
const char * deviceTypeToStr(deviceType type)
Stringize function that returns deviceType in text.
const char * modelTypeToStr(modelType type)
Stringize function that returns modelType in text.
Definition: tensorNet.h:183
void PROFILER_END(profilerQuery query)
End a profiling query, after the network is run.
Definition: tensorNet.h:650
nvinfer1::ICudaEngine * mEngine
Definition: tensorNet.h:727
std::string mCacheCalibrationPath
Definition: tensorNet.h:717
deviceType mDevice
Definition: tensorNet.h:719
std::vector< layerInfo > mOutputs
Definition: tensorNet.h:751
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:122
precisionType mPrecision
Definition: tensorNet.h:720
ONNX.
Definition: tensorNet.h:152
32-bit floating-point precision (FP32)
Definition: tensorNet.h:99
#define LogInfo(format, args...)
Log a printf-style info message (Log::INFO)
Definition: logging.h:168
void timestamp(timespec *timestampOut)
Retrieve a timestamp of the current system time.
Definition: timespec.h:37
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:98
#define LOG_TRT
Prefix used for tagging printed log output from TensorRT.
Definition: tensorNet.h:87
Definition: tensorNet.h:27
virtual void reportLayerTime(const char *layerName, float ms) NOEXCEPT
Definition: tensorNet.h:623
modelType GetModelType() const
Retrieve the format of the network model.
Definition: tensorNet.h:431
Number of precision types defined.
Definition: tensorNet.h:102
cudaStream_t mStream
Definition: tensorNet.h:722
std::string mModelPath
Definition: tensorNet.h:714
uint32_t mProfilerQueriesUsed
Definition: tensorNet.h:731
uint32_t GetInputHeight(uint32_t layer=0) const
Retrieve the height of network input layer.
Definition: tensorNet.h:461
Definition: tensorNet.h:182
8-bit integer precision (INT8)
Definition: tensorNet.h:101
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:125
#define LogVerbose(format, args...)
Log a printf-style verbose message (Log::VERBOSE)
Definition: logging.h:174
bool PROFILER_QUERY(profilerQuery query)
Query the CUDA part of a profiler query.
Definition: tensorNet.h:672
void timeDiff(const timespec &start, const timespec &end, timespec *result)
Find the difference between two timestamps.
Definition: timespec.h:73
const char * GetPrototxtPath() const
Retrieve the path to the network prototxt file.
Definition: tensorNet.h:421
nvinfer1::IRuntime * mInfer
Definition: tensorNet.h:726
void ** mBindings
Definition: tensorNet.h:738
bool mEnableProfiler
Definition: tensorNet.h:735
uint32_t mProfilerQueriesDone
Definition: tensorNet.h:732
Definition: tensorNet.h:740
float timingAccumulator
Definition: tensorNet.h:629
uint32_t GetInputSize(uint32_t layer=0) const
Retrieve the size (in bytes) of network input layer.
Definition: tensorNet.h:466
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:126
bool mEnableDebug
Definition: tensorNet.h:736
uint32_t GetInputLayers() const
Retrieve the number of input layers to the network.
Definition: tensorNet.h:441
std::vector< layerInfo > mInputs
Definition: tensorNet.h:750
Created directly with TensorRT API.
Definition: tensorNet.h:150
Dims3 dims
Definition: tensorNet.h:743
std::string mCacheEnginePath
Definition: tensorNet.h:716
16-bit floating-point half precision (FP16)
Definition: tensorNet.h:100
#define LogWarning(format, args...)
Log a printf-style warning message (Log::WARNING)
Definition: logging.h:156
CPU walltime.
Definition: tensorNet.h:201
Unknown, unspecified, or disabled type.
Definition: tensorNet.h:97
UFF.
Definition: tensorNet.h:153
Deep Learning Accelerator (DLA) Core 1 (only on Jetson Xavier)
Definition: tensorNet.h:127
void PROFILER_BEGIN(profilerQuery query)
Begin a profiling query, before network is run.
Definition: tensorNet.h:635
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:81
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:211
bool IsPrecision(precisionType type) const
Check if a particular precision is being used.
Definition: tensorNet.h:376
float * CUDA
Definition: tensorNet.h:747
uint32_t GetInputWidth(uint32_t layer=0) const
Retrieve the width of network input layer.
Definition: tensorNet.h:456
const char * precisionTypeToStr(precisionType type)
Stringize function that returns precisionType in text.
void PrintProfilerTimes()
Print the profiler times (in millseconds).
Definition: tensorNet.h:511
cudaStream_t GetStream() const
Retrieve the stream that the device is operating on.
Definition: tensorNet.h:406
const char * profilerQueryToStr(profilerQuery query)
Stringize function that returns profilerQuery in text.
TensorRT engine/plan.
Definition: tensorNet.h:154
bool IsModelType(modelType type) const
Return true if the model is of the specified format.
Definition: tensorNet.h:436
CUDA kernel time.
Definition: tensorNet.h:202
std::string name
Definition: tensorNet.h:742
uint32_t mWorkspaceSize
Definition: tensorNet.h:733
modelType mModelType
Definition: tensorNet.h:721
Number of device types defined.
Definition: tensorNet.h:128
precisionType GetPrecision() const
Retrieve the type of precision being used.
Definition: tensorNet.h:371
const char * GetModelPath() const
Retrieve the path to the network model file.
Definition: tensorNet.h:426
Profiler interface for measuring layer timings.
Definition: tensorNet.h:618
Definition: tensorNet.h:184
std::string mPrototxtPath
Definition: tensorNet.h:713
#define LogError(format, args...)
Log a printf-style error message (Log::ERROR)
Definition: logging.h:150
nvinfer1::IExecutionContext * mContext
Definition: tensorNet.h:728
#define NOEXCEPT
Definition: tensorNet.h:73
void log(Severity severity, const char *msg) NOEXCEPT override
Definition: tensorNet.h:592
precisionType precisionTypeFromStr(const char *str)
Parse the precision type from a string.
Definition: tensorNet.h:185
profilerDevice
Profiler device.
Definition: tensorNet.h:199
caffemodel
Definition: tensorNet.h:151
Profiler()
Definition: tensorNet.h:621
bool AllowGPUFallback() const
Return true if GPU fallback is enabled.
Definition: tensorNet.h:361
uint32_t GetOutputWidth(uint32_t layer=0) const
Retrieve the width of network output layer.
Definition: tensorNet.h:476
nvinfer1::Dims3 Dims3
Definition: tensorNet.h:58
std::string mMeanPath
Definition: tensorNet.h:715
#define DIMS_H(x)
Definition: tensorNet.h:61