Jetson Inference
DNN Vision Library
tensorNet.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef __TENSOR_NET_H__
24 #define __TENSOR_NET_H__
25 
26 // forward declaration of IInt8Calibrator
27 namespace nvinfer1 { class IInt8Calibrator; }
28 
29 // includes
30 #include <NvInfer.h>
31 
35 #include <jetson-utils/timespec.h>
36 #include <jetson-utils/logging.h>
37 
38 #include <vector>
39 #include <sstream>
40 #include <math.h>
41 
42 
43 #if NV_TENSORRT_MAJOR >= 6
44 typedef nvinfer1::Dims3 Dims3;
45 
46 #define DIMS_C(x) x.d[0]
47 #define DIMS_H(x) x.d[1]
48 #define DIMS_W(x) x.d[2]
49 
50 #elif NV_TENSORRT_MAJOR >= 2
51 typedef nvinfer1::DimsCHW Dims3;
52 
53 #define DIMS_C(x) x.d[0]
54 #define DIMS_H(x) x.d[1]
55 #define DIMS_W(x) x.d[2]
56 
57 #else
59 
60 #define DIMS_C(x) x.c
61 #define DIMS_H(x) x.h
62 #define DIMS_W(x) x.w
63 
64 #ifndef NV_TENSORRT_MAJOR
65 #define NV_TENSORRT_MAJOR 1
66 #define NV_TENSORRT_MINOR 0
67 #endif
68 #endif
69 
70 #if NV_TENSORRT_MAJOR >= 8
71 #define NOEXCEPT noexcept
72 #else
73 #define NOEXCEPT
74 #endif
75 
76 
82 #define TENSORRT_VERSION_CHECK(major, minor, patch) (NV_TENSORRT_MAJOR > major || (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR > minor) || (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR == minor && NV_TENSORRT_PATCH >= patch))
83 
88 #define DEFAULT_MAX_BATCH_SIZE 1
89 
94 #define LOG_TRT "[TRT] "
95 
96 
103 {
110 };
111 
116 const char* precisionTypeToStr( precisionType type );
117 
122 precisionType precisionTypeFromStr( const char* str );
123 
130 {
136 };
137 
142 const char* deviceTypeToStr( deviceType type );
143 
148 deviceType deviceTypeFromStr( const char* str );
149 
156 {
162 };
163 
168 const char* modelTypeToStr( modelType type );
169 
174 modelType modelTypeFromStr( const char* str );
175 
180 modelType modelTypeFromPath( const char* path );
181 
188 {
194 };
195 
200 const char* profilerQueryToStr( profilerQuery query );
201 
207 {
210 };
211 
212 
219 {
220 public:
224  virtual ~tensorNet();
225 
235  bool LoadNetwork( const char* prototxt, const char* model, const char* mean=NULL,
236  const char* input_blob="data", const char* output_blob="prob",
237  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
238  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
239  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
240 
250  bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
251  const char* input_blob, const std::vector<std::string>& output_blobs,
252  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
253  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
254  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
255 
265  bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
266  const std::vector<std::string>& input_blobs,
267  const std::vector<std::string>& output_blobs,
268  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
269  precisionType precision=TYPE_FASTEST,
270  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
271  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
272 
283  bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
284  const char* input_blob, const Dims3& input_dims,
285  const std::vector<std::string>& output_blobs,
286  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
287  precisionType precision=TYPE_FASTEST,
288  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
289  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
290 
301  bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
302  const std::vector<std::string>& input_blobs,
303  const std::vector<Dims3>& input_dims,
304  const std::vector<std::string>& output_blobs,
305  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
306  precisionType precision=TYPE_FASTEST,
307  deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
308  nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
309 
316  bool LoadEngine( const char* engine_filename,
317  const std::vector<std::string>& input_blobs,
318  const std::vector<std::string>& output_blobs,
319  nvinfer1::IPluginFactory* pluginFactory=NULL,
320  deviceType device=DEVICE_GPU,
321  cudaStream_t stream=NULL );
322 
330  bool LoadEngine( char* engine_stream, size_t engine_size,
331  const std::vector<std::string>& input_blobs,
332  const std::vector<std::string>& output_blobs,
333  nvinfer1::IPluginFactory* pluginFactory=NULL,
334  deviceType device=DEVICE_GPU,
335  cudaStream_t stream=NULL );
336 
344  bool LoadEngine( nvinfer1::ICudaEngine* engine,
345  const std::vector<std::string>& input_blobs,
346  const std::vector<std::string>& output_blobs,
347  deviceType device=DEVICE_GPU,
348  cudaStream_t stream=NULL );
349 
353  bool LoadEngine( const char* filename, char** stream, size_t* size );
354 
360  static bool LoadClassLabels( const char* filename, std::vector<std::string>& descriptions, int expectedClasses=-1 );
361 
367  static bool LoadClassLabels( const char* filename, std::vector<std::string>& descriptions, std::vector<std::string>& synsets, int expectedClasses=-1 );
368 
374  static bool LoadClassColors( const char* filename, float4* colors, int expectedClasses, float defaultAlpha=255.0f );
375 
381  static bool LoadClassColors( const char* filename, float4** colors, int expectedClasses, float defaultAlpha=255.0f );
382 
387  static float4 GenerateColor( uint32_t classID, float alpha=255.0f );
388 
392  void EnableLayerProfiler();
393 
397  void EnableDebug();
398 
402  inline bool AllowGPUFallback() const { return mAllowGPUFallback; }
403 
407  inline deviceType GetDevice() const { return mDevice; }
408 
412  inline precisionType GetPrecision() const { return mPrecision; }
413 
417  inline bool IsPrecision( precisionType type ) const { return (mPrecision == type); }
418 
422  static precisionType SelectPrecision( precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true );
423 
427  static precisionType FindFastestPrecision( deviceType device=DEVICE_GPU, bool allowInt8=true );
428 
432  static std::vector<precisionType> DetectNativePrecisions( deviceType device=DEVICE_GPU );
433 
437  static bool DetectNativePrecision( const std::vector<precisionType>& nativeTypes, precisionType type );
438 
442  static bool DetectNativePrecision( precisionType precision, deviceType device=DEVICE_GPU );
443 
447  inline cudaStream_t GetStream() const { return mStream; }
448 
452  cudaStream_t CreateStream( bool nonBlocking=true );
453 
457  void SetStream( cudaStream_t stream );
458 
462  inline const char* GetPrototxtPath() const { return mPrototxtPath.c_str(); }
463 
467  inline const char* GetModelPath() const { return mModelPath.c_str(); }
468 
472  inline const char* GetModelFilename() const { return mModelFile.c_str(); }
473 
477  inline modelType GetModelType() const { return mModelType; }
478 
482  inline bool IsModelType( modelType type ) const { return (mModelType == type); }
483 
487  inline uint32_t GetInputLayers() const { return mInputs.size(); }
488 
492  inline uint32_t GetOutputLayers() const { return mOutputs.size(); }
493 
497  inline Dims3 GetInputDims( uint32_t layer=0 ) const { return mInputs[layer].dims; }
498 
502  inline uint32_t GetInputWidth( uint32_t layer=0 ) const { return DIMS_W(mInputs[layer].dims); }
503 
507  inline uint32_t GetInputHeight( uint32_t layer=0 ) const { return DIMS_H(mInputs[layer].dims); }
508 
512  inline uint32_t GetInputSize( uint32_t layer=0 ) const { return mInputs[layer].size; }
513 
517  inline float* GetInputPtr( uint32_t layer=0 ) const { return mInputs[layer].CUDA; }
518 
522  inline Dims3 GetOutputDims( uint32_t layer=0 ) const { return mOutputs[layer].dims; }
523 
527  inline uint32_t GetOutputWidth( uint32_t layer=0 ) const { return DIMS_W(mOutputs[layer].dims); }
528 
532  inline uint32_t GetOutputHeight( uint32_t layer=0 ) const { return DIMS_H(mOutputs[layer].dims); }
533 
537  inline uint32_t GetOutputSize( uint32_t layer=0 ) const { return mOutputs[layer].size; }
538 
542  inline float* GetOutputPtr( uint32_t layer=0 ) const { return mOutputs[layer].CUDA; }
543 
547  inline float GetNetworkFPS() { return 1000.0f / GetNetworkTime(); }
548 
553 
557  inline const char* GetNetworkName() const { return mModelFile.c_str(); }
558 
562  inline float2 GetProfilerTime( profilerQuery query ) { PROFILER_QUERY(query); return mProfilerTimes[query]; }
563 
567  inline float GetProfilerTime( profilerQuery query, profilerDevice device ) { PROFILER_QUERY(query); return (device == PROFILER_CPU) ? mProfilerTimes[query].x : mProfilerTimes[query].y; }
568 
572  inline void PrintProfilerTimes()
573  {
574  LogInfo("\n");
575  LogInfo(LOG_TRT "------------------------------------------------\n");
576  LogInfo(LOG_TRT "Timing Report %s\n", GetModelPath());
577  LogInfo(LOG_TRT "------------------------------------------------\n");
578 
579  for( uint32_t n=0; n <= PROFILER_TOTAL; n++ )
580  {
581  const profilerQuery query = (profilerQuery)n;
582 
583  if( PROFILER_QUERY(query) )
584  LogInfo(LOG_TRT "%-12s CPU %9.5fms CUDA %9.5fms\n", profilerQueryToStr(query), mProfilerTimes[n].x, mProfilerTimes[n].y);
585  }
586 
587  LogInfo(LOG_TRT "------------------------------------------------\n\n");
588 
589  static bool first_run=true;
590 
591  if( first_run )
592  {
593  LogWarning(LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"
594  " to disable DVFS for more accurate profiling/timing measurements\n\n");
595 
596  first_run = false;
597  }
598  }
599 
600 protected:
601 
605  tensorNet();
606 
614  bool ProcessNetwork( bool sync=true );
615 
626  bool ProfileModel( const std::string& deployFile, const std::string& modelFile,
627  const std::vector<std::string>& inputs, const std::vector<Dims3>& inputDims,
628  const std::vector<std::string>& outputs, uint32_t maxBatchSize,
629  precisionType precision, deviceType device, bool allowGPUFallback,
630  nvinfer1::IInt8Calibrator* calibrator, char** engineStream, size_t* engineSize );
631 
635 #if NV_TENSORRT_MAJOR >= 8
636  bool ConfigureBuilder( nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
637  uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision,
638  deviceType device, bool allowGPUFallback,
639  nvinfer1::IInt8Calibrator* calibrator );
640 #else
641  bool ConfigureBuilder( nvinfer1::IBuilder* builder, uint32_t maxBatchSize,
642  uint32_t workspaceSize, precisionType precision,
643  deviceType device, bool allowGPUFallback,
644  nvinfer1::IInt8Calibrator* calibrator );
645 #endif
646 
650  bool ValidateEngine( const char* model_path, const char* cache_path, const char* checksum_path );
651 
655  class Logger : public nvinfer1::ILogger
656  {
657  public:
658  void log( Severity severity, const char* msg ) NOEXCEPT override
659  {
660  if( severity == Severity::kWARNING )
661  {
662  LogWarning(LOG_TRT "%s\n", msg);
663  }
664  else if( severity == Severity::kINFO )
665  {
666  LogInfo(LOG_TRT "%s\n", msg);
667  }
668  #if NV_TENSORRT_MAJOR >= 6
669  else if( severity == Severity::kVERBOSE )
670  {
671  LogVerbose(LOG_TRT "%s\n", msg);
672  }
673  #endif
674  else
675  {
676  LogError(LOG_TRT "%s\n", msg);
677  }
678  }
679  } static gLogger;
680 
684  class Profiler : public nvinfer1::IProfiler
685  {
686  public:
688 
689  virtual void reportLayerTime(const char* layerName, float ms) NOEXCEPT
690  {
691  LogVerbose(LOG_TRT "layer %s - %f ms\n", layerName, ms);
692  timingAccumulator += ms;
693  }
694 
696  } gProfiler;
697 
701  inline void PROFILER_BEGIN( profilerQuery query )
702  {
703  const uint32_t evt = query*2;
704  const uint32_t flag = (1 << query);
705 
706  CUDA(cudaEventRecord(mEventsGPU[evt], mStream));
707  timestamp(&mEventsCPU[evt]);
708 
709  mProfilerQueriesUsed |= flag;
710  mProfilerQueriesDone &= ~flag;
711  }
712 
716  inline void PROFILER_END( profilerQuery query )
717  {
718  const uint32_t evt = query*2+1;
719 
720  CUDA(cudaEventRecord(mEventsGPU[evt]));
721  timestamp(&mEventsCPU[evt]);
722  timespec cpuTime;
723  timeDiff(mEventsCPU[evt-1], mEventsCPU[evt], &cpuTime);
724  mProfilerTimes[query].x = timeFloat(cpuTime);
725 
726  if( mEnableProfiler && query == PROFILER_NETWORK )
727  {
728  LogVerbose(LOG_TRT "layer network time - %f ms\n", gProfiler.timingAccumulator);
730  LogWarning(LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"
731  " to disable DVFS for more accurate profiling/timing measurements\n");
732  }
733  }
734 
738  inline bool PROFILER_QUERY( profilerQuery query )
739  {
740  const uint32_t flag = (1 << query);
741 
742  if( query == PROFILER_TOTAL )
743  {
744  mProfilerTimes[PROFILER_TOTAL].x = 0.0f;
745  mProfilerTimes[PROFILER_TOTAL].y = 0.0f;
746 
747  for( uint32_t n=0; n < PROFILER_TOTAL; n++ )
748  {
749  if( PROFILER_QUERY((profilerQuery)n) )
750  {
753  }
754  }
755 
756  return true;
757  }
758  else if( mProfilerQueriesUsed & flag )
759  {
760  if( !(mProfilerQueriesDone & flag) )
761  {
762  const uint32_t evt = query*2;
763  float cuda_time = 0.0f;
764  CUDA(cudaEventElapsedTime(&cuda_time, mEventsGPU[evt], mEventsGPU[evt+1]));
765  mProfilerTimes[query].y = cuda_time;
766  mProfilerQueriesDone |= flag;
767  //mProfilerQueriesUsed &= ~flag;
768  }
769 
770  return true;
771  }
772 
773  return false;
774  }
775 
776 protected:
777 
778  /* Member Variables */
779  std::string mPrototxtPath;
780  std::string mModelPath;
781  std::string mModelFile;
782  std::string mMeanPath;
783  std::string mCacheEnginePath;
785  std::string mChecksumPath;
786 
790  cudaStream_t mStream;
791  cudaEvent_t mEventsGPU[PROFILER_TOTAL * 2];
792  timespec mEventsCPU[PROFILER_TOTAL * 2];
793 
794  nvinfer1::IRuntime* mInfer;
795  nvinfer1::ICudaEngine* mEngine;
796  nvinfer1::IExecutionContext* mContext;
797 
801  uint32_t mWorkspaceSize;
802  uint32_t mMaxBatchSize;
806  void** mBindings;
807 
808  struct layerInfo
809  {
810  std::string name;
812  uint32_t size;
813  uint32_t binding;
814  float* CPU;
815  float* CUDA;
816  };
817 
818  std::vector<layerInfo> mInputs;
819  std::vector<layerInfo> mOutputs;
820 };
821 
822 #endif
tensorNet::DetectNativePrecisions
static std::vector< precisionType > DetectNativePrecisions(deviceType device=DEVICE_GPU)
Detect the precisions supported natively on a device.
DEVICE_DLA_0
@ DEVICE_DLA_0
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:133
tensorNet::Logger
Logger class for GIE info/warning/errors.
Definition: tensorNet.h:655
tensorNet::GetPrecision
precisionType GetPrecision() const
Retrieve the type of precision being used.
Definition: tensorNet.h:412
MODEL_UFF
@ MODEL_UFF
UFF.
Definition: tensorNet.h:160
tensorNet::DetectNativePrecision
static bool DetectNativePrecision(const std::vector< precisionType > &nativeTypes, precisionType type)
Detect if a particular precision is supported natively.
CUDA
#define CUDA(x)
Execute a CUDA call and print out any errors.
Definition: cudaUtility.h:41
tensorNet::ProcessNetwork
bool ProcessNetwork(bool sync=true)
Execute processing of the network.
MODEL_CUSTOM
@ MODEL_CUSTOM
Created directly with TensorRT API.
Definition: tensorNet.h:157
tensorNet::layerInfo::binding
uint32_t binding
Definition: tensorNet.h:813
LogInfo
#define LogInfo(format, args...)
Log a printf-style info message (Log::INFO)
Definition: logging.h:168
tensorNet::mAllowGPUFallback
bool mAllowGPUFallback
Definition: tensorNet.h:805
timeFloat
float timeFloat(const timespec &a)
Convert to 32-bit float (in milliseconds).
Definition: timespec.h:149
modelTypeFromStr
modelType modelTypeFromStr(const char *str)
Parse the model format from a string.
tensorNet::GetNetworkFPS
float GetNetworkFPS()
Retrieve the network frames per second (FPS).
Definition: tensorNet.h:547
tensorNet::layerInfo::size
uint32_t size
Definition: tensorNet.h:812
tensorNet::mModelFile
std::string mModelFile
Definition: tensorNet.h:781
timestamp
void timestamp(timespec *timestampOut)
Retrieve a timestamp of the current system time.
Definition: timespec.h:37
PROFILER_CPU
@ PROFILER_CPU
CPU walltime.
Definition: tensorNet.h:208
tensorNet::LoadNetwork
bool LoadNetwork(const char *prototxt, const char *model, const char *mean=NULL, const char *input_blob="data", const char *output_blob="prob", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
Load a new network instance.
TYPE_FP16
@ TYPE_FP16
16-bit floating-point half precision (FP16)
Definition: tensorNet.h:107
deviceTypeToStr
const char * deviceTypeToStr(deviceType type)
Stringize function that returns deviceType in text.
tensorNet::GetPrototxtPath
const char * GetPrototxtPath() const
Retrieve the path to the network prototxt file.
Definition: tensorNet.h:462
tensorNet::mEnableProfiler
bool mEnableProfiler
Definition: tensorNet.h:803
MODEL_ENGINE
@ MODEL_ENGINE
TensorRT engine/plan.
Definition: tensorNet.h:161
tensorNet::GetOutputSize
uint32_t GetOutputSize(uint32_t layer=0) const
Retrieve the size (in bytes) of network output layer.
Definition: tensorNet.h:537
tensorNet::IsModelType
bool IsModelType(modelType type) const
Return true if the model is of the specified format.
Definition: tensorNet.h:482
cudaUtility.h
tensorNet::ValidateEngine
bool ValidateEngine(const char *model_path, const char *cache_path, const char *checksum_path)
Validate that the model already has a built TensorRT engine that exists and doesn't need updating.
DEVICE_DLA
@ DEVICE_DLA
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:132
tensorNet::EnableLayerProfiler
void EnableLayerProfiler()
Manually enable layer profiling times.
profilerDevice
profilerDevice
Profiler device.
Definition: tensorNet.h:206
TYPE_INT8
@ TYPE_INT8
8-bit integer precision (INT8)
Definition: tensorNet.h:108
tensorNet::tensorNet
tensorNet()
Constructor.
tensorNet::Profiler::reportLayerTime
virtual void reportLayerTime(const char *layerName, float ms) NOEXCEPT
Definition: tensorNet.h:689
precisionTypeToStr
const char * precisionTypeToStr(precisionType type)
Stringize function that returns precisionType in text.
tensorNet::IsPrecision
bool IsPrecision(precisionType type) const
Check if a particular precision is being used.
Definition: tensorNet.h:417
tensorNet::mWorkspaceSize
uint32_t mWorkspaceSize
Definition: tensorNet.h:801
tensorNet::mMeanPath
std::string mMeanPath
Definition: tensorNet.h:782
LOG_TRT
#define LOG_TRT
Prefix used for tagging printed log output from TensorRT.
Definition: tensorNet.h:94
PROFILER_VISUALIZE
@ PROFILER_VISUALIZE
Definition: tensorNet.h:192
tensorNet::gProfiler
tensorNet::Profiler gProfiler
tensorNet::Logger::log
void log(Severity severity, const char *msg) NOEXCEPT override
Definition: tensorNet.h:658
tensorNet::mProfilerTimes
float2 mProfilerTimes[PROFILER_TOTAL+1]
Definition: tensorNet.h:798
profilerQueryToStr
const char * profilerQueryToStr(profilerQuery query)
Stringize function that returns profilerQuery in text.
tensorNet::layerInfo::CPU
float * CPU
Definition: tensorNet.h:814
profilerQuery
profilerQuery
Profiling queries.
Definition: tensorNet.h:187
tensorNet::LoadEngine
bool LoadEngine(const char *engine_filename, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, nvinfer1::IPluginFactory *pluginFactory=NULL, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
Load a network instance from a serialized engine plan file.
tensorNet::GetNetworkName
const char * GetNetworkName() const
Retrieve the network name (it's filename).
Definition: tensorNet.h:557
commandLine.h
DEVICE_GPU
@ DEVICE_GPU
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice()
Definition: tensorNet.h:131
modelType
modelType
Enumeration indicating the format of the model that's imported in TensorRT (either caffe,...
Definition: tensorNet.h:155
PROFILER_CUDA
@ PROFILER_CUDA
CUDA kernel time.
Definition: tensorNet.h:209
tensorNet::Profiler::timingAccumulator
float timingAccumulator
Definition: tensorNet.h:695
Dims3
nvinfer1::Dims3 Dims3
Definition: tensorNet.h:58
tensorNet::FindFastestPrecision
static precisionType FindFastestPrecision(deviceType device=DEVICE_GPU, bool allowInt8=true)
Determine the fastest native precision on a device.
tensorNet::PrintProfilerTimes
void PrintProfilerTimes()
Print the profiler times (in millseconds).
Definition: tensorNet.h:572
classID
uint8_t classID
The class ID of the point.
Definition: cudaPointCloud.h:17
tensorNet::GetInputWidth
uint32_t GetInputWidth(uint32_t layer=0) const
Retrieve the width of network input layer.
Definition: tensorNet.h:502
tensorNet::GetOutputHeight
uint32_t GetOutputHeight(uint32_t layer=0) const
Retrieve the height of network output layer.
Definition: tensorNet.h:532
deviceType
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:129
tensorNet::mStream
cudaStream_t mStream
Definition: tensorNet.h:790
tensorNet::GetModelType
modelType GetModelType() const
Retrieve the format of the network model.
Definition: tensorNet.h:477
tensorNet::GetOutputLayers
uint32_t GetOutputLayers() const
Retrieve the number of output layers to the network.
Definition: tensorNet.h:492
tensorNet::EnableDebug
void EnableDebug()
Manually enable debug messages and synchronization.
tensorNet::mBindings
void ** mBindings
Definition: tensorNet.h:806
nvinfer1
Definition: tensorNet.h:27
NOEXCEPT
#define NOEXCEPT
Definition: tensorNet.h:73
tensorNet::mMaxBatchSize
uint32_t mMaxBatchSize
Definition: tensorNet.h:802
PROFILER_POSTPROCESS
@ PROFILER_POSTPROCESS
Definition: tensorNet.h:191
tensorNet::GetInputHeight
uint32_t GetInputHeight(uint32_t layer=0) const
Retrieve the height of network input layer.
Definition: tensorNet.h:507
tensorNet::LoadClassLabels
static bool LoadClassLabels(const char *filename, std::vector< std::string > &descriptions, int expectedClasses=-1)
Load class descriptions from a label file.
tensorNet::ConfigureBuilder
bool ConfigureBuilder(nvinfer1::IBuilder *builder, uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator)
Configure builder options.
precisionTypeFromStr
precisionType precisionTypeFromStr(const char *str)
Parse the precision type from a string.
tensorNet::mCacheEnginePath
std::string mCacheEnginePath
Definition: tensorNet.h:783
tensorNet::GetProfilerTime
float2 GetProfilerTime(profilerQuery query)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:562
tensorNet::mModelType
modelType mModelType
Definition: tensorNet.h:789
DIMS_H
#define DIMS_H(x)
Definition: tensorNet.h:61
TYPE_FASTEST
@ TYPE_FASTEST
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:105
tensorNet::GetOutputPtr
float * GetOutputPtr(uint32_t layer=0) const
Get the CUDA pointer to the output memory.
Definition: tensorNet.h:542
tensorNet::Profiler::Profiler
Profiler()
Definition: tensorNet.h:687
tensorNet::mEnableDebug
bool mEnableDebug
Definition: tensorNet.h:804
tensorNet::GetInputDims
Dims3 GetInputDims(uint32_t layer=0) const
Retrieve the dimensions of network input layer.
Definition: tensorNet.h:497
NUM_DEVICES
@ NUM_DEVICES
Number of device types defined.
Definition: tensorNet.h:135
precisionType
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:102
deviceTypeFromStr
deviceType deviceTypeFromStr(const char *str)
Parse the device type from a string.
tensorNet::SelectPrecision
static precisionType SelectPrecision(precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true)
Resolve a desired precision to a specific one that's available.
tensorNet::GetDevice
deviceType GetDevice() const
Retrieve the device being used for execution.
Definition: tensorNet.h:407
LogWarning
#define LogWarning(format, args...)
Log a printf-style warning message (Log::WARNING)
Definition: logging.h:156
modelTypeFromPath
modelType modelTypeFromPath(const char *path)
Parse the model format from a file path.
DEVICE_DLA_1
@ DEVICE_DLA_1
Deep Learning Accelerator (DLA) Core 1 (only on Jetson Xavier)
Definition: tensorNet.h:134
tensorNet::AllowGPUFallback
bool AllowGPUFallback() const
Return true if GPU fallback is enabled.
Definition: tensorNet.h:402
tensorNet::SetStream
void SetStream(cudaStream_t stream)
Set the stream that the device is operating on.
tensorNet
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:218
tensorNet::GetModelPath
const char * GetModelPath() const
Retrieve the full path to model file, including the filename.
Definition: tensorNet.h:467
tensorNet::mInputs
std::vector< layerInfo > mInputs
Definition: tensorNet.h:818
tensorNet::mEngine
nvinfer1::ICudaEngine * mEngine
Definition: tensorNet.h:795
tensorNet::GetOutputDims
Dims3 GetOutputDims(uint32_t layer=0) const
Retrieve the dimensions of network output layer.
Definition: tensorNet.h:522
PROFILER_NETWORK
@ PROFILER_NETWORK
Definition: tensorNet.h:190
tensorNet::mEventsGPU
cudaEvent_t mEventsGPU[PROFILER_TOTAL *2]
Definition: tensorNet.h:791
DIMS_W
#define DIMS_W(x)
Definition: tensorNet.h:62
TYPE_FP32
@ TYPE_FP32
32-bit floating-point precision (FP32)
Definition: tensorNet.h:106
tensorNet::layerInfo::dims
Dims3 dims
Definition: tensorNet.h:811
MODEL_ONNX
@ MODEL_ONNX
ONNX.
Definition: tensorNet.h:159
modelTypeToStr
const char * modelTypeToStr(modelType type)
Stringize function that returns modelType in text.
tensorNet::PROFILER_END
void PROFILER_END(profilerQuery query)
End a profiling query, after the network is run.
Definition: tensorNet.h:716
tensorNet::layerInfo::CUDA
float * CUDA
Definition: tensorNet.h:815
tensorNet::mContext
nvinfer1::IExecutionContext * mContext
Definition: tensorNet.h:796
LogVerbose
#define LogVerbose(format, args...)
Log a printf-style verbose message (Log::VERBOSE)
Definition: logging.h:174
NUM_PRECISIONS
@ NUM_PRECISIONS
Number of precision types defined.
Definition: tensorNet.h:109
tensorNet::mPrecision
precisionType mPrecision
Definition: tensorNet.h:788
tensorNet::GetInputSize
uint32_t GetInputSize(uint32_t layer=0) const
Retrieve the size (in bytes) of network input layer.
Definition: tensorNet.h:512
tensorNet::GetProfilerTime
float GetProfilerTime(profilerQuery query, profilerDevice device)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:567
tensorNet::GetInputPtr
float * GetInputPtr(uint32_t layer=0) const
Get the CUDA pointer to the input layer's memory.
Definition: tensorNet.h:517
tensorNet::mChecksumPath
std::string mChecksumPath
Definition: tensorNet.h:785
tensorNet::GetModelFilename
const char * GetModelFilename() const
Retrieve the filename of the file, excluding the directory.
Definition: tensorNet.h:472
tensorNet::mDevice
deviceType mDevice
Definition: tensorNet.h:787
logging.h
PROFILER_TOTAL
@ PROFILER_TOTAL
Definition: tensorNet.h:193
tensorNet::layerInfo::name
std::string name
Definition: tensorNet.h:810
tensorNet::GetOutputWidth
uint32_t GetOutputWidth(uint32_t layer=0) const
Retrieve the width of network output layer.
Definition: tensorNet.h:527
MODEL_CAFFE
@ MODEL_CAFFE
caffemodel
Definition: tensorNet.h:158
DEFAULT_MAX_BATCH_SIZE
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:88
tensorNet::GetStream
cudaStream_t GetStream() const
Retrieve the stream that the device is operating on.
Definition: tensorNet.h:447
tensorNet::mEventsCPU
timespec mEventsCPU[PROFILER_TOTAL *2]
Definition: tensorNet.h:792
tensorNet::mProfilerQueriesUsed
uint32_t mProfilerQueriesUsed
Definition: tensorNet.h:799
PROFILER_PREPROCESS
@ PROFILER_PREPROCESS
Definition: tensorNet.h:189
tensorNet::GetInputLayers
uint32_t GetInputLayers() const
Retrieve the number of input layers to the network.
Definition: tensorNet.h:487
tensorNet::~tensorNet
virtual ~tensorNet()
Destory.
tensorNet::GenerateColor
static float4 GenerateColor(uint32_t classID, float alpha=255.0f)
Procedurally generate a color for a given class index with the specified alpha value.
LogError
#define LogError(format, args...)
Log a printf-style error message (Log::ERROR)
Definition: logging.h:150
tensorNet::ProfileModel
bool ProfileModel(const std::string &deployFile, const std::string &modelFile, const std::vector< std::string > &inputs, const std::vector< Dims3 > &inputDims, const std::vector< std::string > &outputs, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator, char **engineStream, size_t *engineSize)
Create and output an optimized network model.
tensorNet::PROFILER_QUERY
bool PROFILER_QUERY(profilerQuery query)
Query the CUDA part of a profiler query.
Definition: tensorNet.h:738
tensorNet::mPrototxtPath
std::string mPrototxtPath
Definition: tensorNet.h:779
tensorNet::LoadClassColors
static bool LoadClassColors(const char *filename, float4 *colors, int expectedClasses, float defaultAlpha=255.0f)
Load class colors from a text file.
tensorNet::mCacheCalibrationPath
std::string mCacheCalibrationPath
Definition: tensorNet.h:784
tensorNet::gLogger
tensorNet::Logger gLogger
tensorNet::CreateStream
cudaStream_t CreateStream(bool nonBlocking=true)
Create and use a new stream for execution.
tensorNet::mInfer
nvinfer1::IRuntime * mInfer
Definition: tensorNet.h:794
tensorNet::PROFILER_BEGIN
void PROFILER_BEGIN(profilerQuery query)
Begin a profiling query, before network is run.
Definition: tensorNet.h:701
tensorNet::Profiler
Profiler interface for measuring layer timings.
Definition: tensorNet.h:684
tensorNet::GetNetworkTime
float GetNetworkTime()
Retrieve the network runtime (in milliseconds).
Definition: tensorNet.h:552
timespec.h
tensorNet::mProfilerQueriesDone
uint32_t mProfilerQueriesDone
Definition: tensorNet.h:800
imageFormat.h
tensorNet::mOutputs
std::vector< layerInfo > mOutputs
Definition: tensorNet.h:819
tensorNet::layerInfo
Definition: tensorNet.h:808
tensorNet::mModelPath
std::string mModelPath
Definition: tensorNet.h:780
alpha
__device__ cudaVectorTypeInfo< T >::Base alpha(T vec, typename cudaVectorTypeInfo< T >::Base default_alpha=255)
Definition: cudaVector.h:98
timeDiff
void timeDiff(const timespec &start, const timespec &end, timespec *result)
Find the difference between two timestamps.
Definition: timespec.h:73
TYPE_DISABLED
@ TYPE_DISABLED
Unknown, unspecified, or disabled type.
Definition: tensorNet.h:104