Jetson Inference
DNN Vision Library

Image recognition with classification networks, using TensorRT. More...

#include <imageNet.h>

Inheritance diagram for imageNet:
tensorNet

Public Types

typedef std::vector< std::pair< uint32_t, float > > Classifications
 List of classification results where each entry represents a (classID, confidence) pair. More...
 

Public Member Functions

virtual ~imageNet ()
 Destroy. More...
 
template<typename T >
int Classify (T *image, uint32_t width, uint32_t height, float *confidence=NULL)
 Predict the maximum-likelihood image class whose confidence meets the minimum threshold. More...
 
int Classify (void *image, uint32_t width, uint32_t height, imageFormat format, float *confidence=NULL)
 Predict the maximum-likelihood image class whose confidence meets the minimum threshold. More...
 
int Classify (float *rgba, uint32_t width, uint32_t height, float *confidence=NULL, imageFormat format=IMAGE_RGBA32F)
 Predict the maximum-likelihood image class whose confidence meets the minimum threshold. More...
 
template<typename T >
int Classify (T *image, uint32_t width, uint32_t height, Classifications &classifications, int topK=1)
 Classify the image and return the topK image classification results that meet the minimum confidence threshold set by SetThreshold() or the --threshold command-line argument. More...
 
int Classify (void *image, uint32_t width, uint32_t height, imageFormat format, Classifications &classifications, int topK=1)
 Classify the image and return the topK image classification results that meet the minimum confidence threshold set by SetThreshold() or the --threshold command-line argument. More...
 
uint32_t GetNumClasses () const
 Retrieve the number of image recognition classes (typically 1000) More...
 
const char * GetClassLabel (int index) const
 Retrieve the description of a particular class. More...
 
const char * GetClassDesc (int index) const
 Retrieve the description of a particular class. More...
 
const char * GetClassSynset (int index) const
 Retrieve the class synset category of a particular class. More...
 
const char * GetClassPath () const
 Retrieve the path to the file containing the class descriptions. More...
 
float GetThreshold () const
 Return the confidence threshold used for classification. More...
 
void SetThreshold (float threshold)
 Set the confidence threshold used for classification. More...
 
float GetSmoothing () const
 Return the temporal smoothing weight or number of frames in the smoothing window. More...
 
void SetSmoothing (float factor)
 Enable temporal smoothing of the results using EWMA (exponentially-weighted moving average). More...
 
- Public Member Functions inherited from tensorNet
virtual ~tensorNet ()
 Destory. More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean=NULL, const char *input_blob="data", const char *output_blob="prob", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance. More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean, const char *input_blob, const std::vector< std::string > &output_blobs, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance with multiple output layers. More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance with multiple input layers. More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean, const char *input_blob, const Dims3 &input_dims, const std::vector< std::string > &output_blobs, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance (this variant is used for UFF models) More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean, const std::vector< std::string > &input_blobs, const std::vector< Dims3 > &input_dims, const std::vector< std::string > &output_blobs, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance with multiple input layers (used for UFF models) More...
 
bool LoadEngine (const char *engine_filename, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, nvinfer1::IPluginFactory *pluginFactory=NULL, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
 Load a network instance from a serialized engine plan file. More...
 
bool LoadEngine (char *engine_stream, size_t engine_size, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, nvinfer1::IPluginFactory *pluginFactory=NULL, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
 Load a network instance from a serialized engine plan file. More...
 
bool LoadEngine (nvinfer1::ICudaEngine *engine, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
 Load network resources from an existing TensorRT engine instance. More...
 
bool LoadEngine (const char *filename, char **stream, size_t *size)
 Load a serialized engine plan file into memory. More...
 
void EnableLayerProfiler ()
 Manually enable layer profiling times. More...
 
void EnableDebug ()
 Manually enable debug messages and synchronization. More...
 
bool AllowGPUFallback () const
 Return true if GPU fallback is enabled. More...
 
deviceType GetDevice () const
 Retrieve the device being used for execution. More...
 
precisionType GetPrecision () const
 Retrieve the type of precision being used. More...
 
bool IsPrecision (precisionType type) const
 Check if a particular precision is being used. More...
 
cudaStream_t GetStream () const
 Retrieve the stream that the device is operating on. More...
 
cudaStream_t CreateStream (bool nonBlocking=true)
 Create and use a new stream for execution. More...
 
void SetStream (cudaStream_t stream)
 Set the stream that the device is operating on. More...
 
const char * GetPrototxtPath () const
 Retrieve the path to the network prototxt file. More...
 
const char * GetModelPath () const
 Retrieve the full path to model file, including the filename. More...
 
const char * GetModelFilename () const
 Retrieve the filename of the file, excluding the directory. More...
 
modelType GetModelType () const
 Retrieve the format of the network model. More...
 
bool IsModelType (modelType type) const
 Return true if the model is of the specified format. More...
 
uint32_t GetInputLayers () const
 Retrieve the number of input layers to the network. More...
 
uint32_t GetOutputLayers () const
 Retrieve the number of output layers to the network. More...
 
Dims3 GetInputDims (uint32_t layer=0) const
 Retrieve the dimensions of network input layer. More...
 
uint32_t GetInputWidth (uint32_t layer=0) const
 Retrieve the width of network input layer. More...
 
uint32_t GetInputHeight (uint32_t layer=0) const
 Retrieve the height of network input layer. More...
 
uint32_t GetInputSize (uint32_t layer=0) const
 Retrieve the size (in bytes) of network input layer. More...
 
float * GetInputPtr (uint32_t layer=0) const
 Get the CUDA pointer to the input layer's memory. More...
 
Dims3 GetOutputDims (uint32_t layer=0) const
 Retrieve the dimensions of network output layer. More...
 
uint32_t GetOutputWidth (uint32_t layer=0) const
 Retrieve the width of network output layer. More...
 
uint32_t GetOutputHeight (uint32_t layer=0) const
 Retrieve the height of network output layer. More...
 
uint32_t GetOutputSize (uint32_t layer=0) const
 Retrieve the size (in bytes) of network output layer. More...
 
float * GetOutputPtr (uint32_t layer=0) const
 Get the CUDA pointer to the output memory. More...
 
float GetNetworkFPS ()
 Retrieve the network frames per second (FPS). More...
 
float GetNetworkTime ()
 Retrieve the network runtime (in milliseconds). More...
 
const char * GetNetworkName () const
 Retrieve the network name (it's filename). More...
 
float2 GetProfilerTime (profilerQuery query)
 Retrieve the profiler runtime (in milliseconds). More...
 
float GetProfilerTime (profilerQuery query, profilerDevice device)
 Retrieve the profiler runtime (in milliseconds). More...
 
void PrintProfilerTimes ()
 Print the profiler times (in millseconds). More...
 

Static Public Member Functions

static imageNetCreate (const char *network="googlenet", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true)
 Load one of the following pre-trained models: More...
 
static imageNetCreate (const char *prototxt_path, const char *model_path, const char *mean_binary, const char *class_labels, const char *input=IMAGENET_DEFAULT_INPUT, const char *output=IMAGENET_DEFAULT_OUTPUT, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true)
 Load a new network instance. More...
 
static imageNetCreate (int argc, char **argv)
 Load a new network instance by parsing the command line. More...
 
static imageNetCreate (const commandLine &cmdLine)
 Load a new network instance by parsing the command line. More...
 
static const char * Usage ()
 Usage string for command line arguments to Create() More...
 
- Static Public Member Functions inherited from tensorNet
static bool LoadClassLabels (const char *filename, std::vector< std::string > &descriptions, int expectedClasses=-1)
 Load class descriptions from a label file. More...
 
static bool LoadClassLabels (const char *filename, std::vector< std::string > &descriptions, std::vector< std::string > &synsets, int expectedClasses=-1)
 Load class descriptions and synset strings from a label file. More...
 
static bool LoadClassColors (const char *filename, float4 *colors, int expectedClasses, float defaultAlpha=255.0f)
 Load class colors from a text file. More...
 
static bool LoadClassColors (const char *filename, float4 **colors, int expectedClasses, float defaultAlpha=255.0f)
 Load class colors from a text file. More...
 
static float4 GenerateColor (uint32_t classID, float alpha=255.0f)
 Procedurally generate a color for a given class index with the specified alpha value. More...
 
static precisionType SelectPrecision (precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true)
 Resolve a desired precision to a specific one that's available. More...
 
static precisionType FindFastestPrecision (deviceType device=DEVICE_GPU, bool allowInt8=true)
 Determine the fastest native precision on a device. More...
 
static std::vector< precisionTypeDetectNativePrecisions (deviceType device=DEVICE_GPU)
 Detect the precisions supported natively on a device. More...
 
static bool DetectNativePrecision (const std::vector< precisionType > &nativeTypes, precisionType type)
 Detect if a particular precision is supported natively. More...
 
static bool DetectNativePrecision (precisionType precision, deviceType device=DEVICE_GPU)
 Detect if a particular precision is supported natively. More...
 

Protected Member Functions

 imageNet ()
 
bool init (const char *prototxt_path, const char *model_path, const char *mean_binary, const char *class_path, const char *input, const char *output, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback)
 
bool loadClassInfo (const char *filename, int expectedClasses=-1)
 
bool preProcess (void *image, uint32_t width, uint32_t height, imageFormat format)
 
float * applySmoothing ()
 
- Protected Member Functions inherited from tensorNet
 tensorNet ()
 Constructor. More...
 
bool ProcessNetwork (bool sync=true)
 Execute processing of the network. More...
 
bool ProfileModel (const std::string &deployFile, const std::string &modelFile, const std::vector< std::string > &inputs, const std::vector< Dims3 > &inputDims, const std::vector< std::string > &outputs, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator, char **engineStream, size_t *engineSize)
 Create and output an optimized network model. More...
 
bool ConfigureBuilder (nvinfer1::IBuilder *builder, uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator)
 Configure builder options. More...
 
bool ValidateEngine (const char *model_path, const char *cache_path, const char *checksum_path)
 Validate that the model already has a built TensorRT engine that exists and doesn't need updating. More...
 
void PROFILER_BEGIN (profilerQuery query)
 Begin a profiling query, before network is run. More...
 
void PROFILER_END (profilerQuery query)
 End a profiling query, after the network is run. More...
 
bool PROFILER_QUERY (profilerQuery query)
 Query the CUDA part of a profiler query. More...
 

Protected Attributes

uint32_t mNumClasses
 
std::vector< std::string > mClassSynset
 
std::vector< std::string > mClassDesc
 
std::string mClassPath
 
float * mSmoothingBuffer
 
float mSmoothingFactor
 
float mThreshold
 
- Protected Attributes inherited from tensorNet
tensorNet::Logger gLogger
 
tensorNet::Profiler gProfiler
 
std::string mPrototxtPath
 
std::string mModelPath
 
std::string mModelFile
 
std::string mMeanPath
 
std::string mCacheEnginePath
 
std::string mCacheCalibrationPath
 
std::string mChecksumPath
 
deviceType mDevice
 
precisionType mPrecision
 
modelType mModelType
 
cudaStream_t mStream
 
cudaEvent_t mEventsGPU [PROFILER_TOTAL *2]
 
timespec mEventsCPU [PROFILER_TOTAL *2]
 
nvinfer1::IRuntime * mInfer
 
nvinfer1::ICudaEngine * mEngine
 
nvinfer1::IExecutionContext * mContext
 
float2 mProfilerTimes [PROFILER_TOTAL+1]
 
uint32_t mProfilerQueriesUsed
 
uint32_t mProfilerQueriesDone
 
uint32_t mWorkspaceSize
 
uint32_t mMaxBatchSize
 
bool mEnableProfiler
 
bool mEnableDebug
 
bool mAllowGPUFallback
 
void ** mBindings
 
std::vector< layerInfomInputs
 
std::vector< layerInfomOutputs
 

Detailed Description

Image recognition with classification networks, using TensorRT.

Member Typedef Documentation

◆ Classifications

typedef std::vector<std::pair<uint32_t, float> > imageNet::Classifications

List of classification results where each entry represents a (classID, confidence) pair.

Constructor & Destructor Documentation

◆ ~imageNet()

virtual imageNet::~imageNet ( )
virtual

Destroy.

◆ imageNet()

imageNet::imageNet ( )
protected

Member Function Documentation

◆ applySmoothing()

float* imageNet::applySmoothing ( )
protected

◆ Classify() [1/5]

int imageNet::Classify ( float *  rgba,
uint32_t  width,
uint32_t  height,
float *  confidence = NULL,
imageFormat  format = IMAGE_RGBA32F 
)

Predict the maximum-likelihood image class whose confidence meets the minimum threshold.

Either the class with the maximum probability will be returned, or -1 if no class meets the threshold set by SetThreshold() or the --threshold command-line argument.

Parameters
rgbafloat4 input image in CUDA device memory.
widthwidth of the input image in pixels.
heightheight of the input image in pixels.
confidenceoptional pointer to float filled with confidence value.
formatformat of the image (rgb8, rgba8, rgb32f, rgba32f are supported)
Returns
ID of the class with the highest confidence, or -1 if no classes met the threshold. If a runtime error occurred during processing, then a value of -2 will be returned.
Deprecated:
this overload of Classify() provides legacy compatibility with float* type (RGBA32F).

◆ Classify() [2/5]

template<typename T >
int imageNet::Classify ( T *  image,
uint32_t  width,
uint32_t  height,
Classifications classifications,
int  topK = 1 
)
inline

Classify the image and return the topK image classification results that meet the minimum confidence threshold set by SetThreshold() or the --threshold command-line argument.

Parameters
imageinput image in CUDA device memory.
widthwidth of the input image in pixels.
heightheight of the input image in pixels.
classificationsreturns a list of the topK (classID, confidence) classification resuts, sorted from highest to lowest confidence.
topKthe number of predictions to return (it can be less than this number if there weren't that many valid predictions) The default value of topK is 1, in which case only the highest-confidence result wil be returned. If the value of topK is <= 0, then all the valid predictions with confidence >= threshold will be returned.
Returns
ID of the class with the highest confidence, or -1 if no classes met the threshold. If a runtime error occurred during processing, then a value of -2 will be returned.

◆ Classify() [3/5]

template<typename T >
int imageNet::Classify ( T *  image,
uint32_t  width,
uint32_t  height,
float *  confidence = NULL 
)
inline

Predict the maximum-likelihood image class whose confidence meets the minimum threshold.

Either the class with the maximum probability will be returned, or -1 if no class meets the threshold set by SetThreshold() or the --threshold command-line argument.

Parameters
imageinput image in CUDA device memory.
widthwidth of the input image in pixels.
heightheight of the input image in pixels.
confidenceoptional pointer to float filled with confidence value.
Returns
ID of the class with the highest confidence, or -1 if no classes met the threshold. If a runtime error occurred during processing, then a value of -2 will be returned.

◆ Classify() [4/5]

int imageNet::Classify ( void *  image,
uint32_t  width,
uint32_t  height,
imageFormat  format,
Classifications classifications,
int  topK = 1 
)

Classify the image and return the topK image classification results that meet the minimum confidence threshold set by SetThreshold() or the --threshold command-line argument.

Parameters
imageinput image in CUDA device memory.
widthwidth of the input image in pixels.
heightheight of the input image in pixels.
formatformat of the image (rgb8, rgba8, rgb32f, rgba32f are supported)
classificationsreturns a list of the topK (classID, confidence) classification resuts, sorted from highest to lowest confidence.
topKthe number of predictions to return (it can be less than this number if there weren't that many valid predictions) The default value of topK is 1, in which case only the highest-confidence result wil be returned. If the value of topK is <= 0, then all the valid predictions with confidence >= threshold will be returned.
Returns
ID of the class with the highest confidence, or -1 if no classes met the threshold. If a runtime error occurred during processing, then a value of -2 will be returned.

◆ Classify() [5/5]

int imageNet::Classify ( void *  image,
uint32_t  width,
uint32_t  height,
imageFormat  format,
float *  confidence = NULL 
)

Predict the maximum-likelihood image class whose confidence meets the minimum threshold.

Either the class with the maximum probability will be returned, or -1 if no class meets the threshold set by SetThreshold() or the --threshold command-line argument.

Parameters
imageinput image in CUDA device memory.
widthwidth of the input image in pixels.
heightheight of the input image in pixels.
formatformat of the image (rgb8, rgba8, rgb32f, rgba32f are supported)
confidenceoptional pointer to float filled with confidence value.
Returns
ID of the class with the highest confidence, or -1 if no classes met the threshold. If a runtime error occurred during processing, then a value of -2 will be returned.

◆ Create() [1/4]

static imageNet* imageNet::Create ( const char *  network = "googlenet",
uint32_t  maxBatchSize = DEFAULT_MAX_BATCH_SIZE,
precisionType  precision = TYPE_FASTEST,
deviceType  device = DEVICE_GPU,
bool  allowGPUFallback = true 
)
static

Load one of the following pre-trained models:

  • alexnet, googlenet, googlenet-12,
  • resnet-18, resnet-50, resnet-101, resnet-152,
  • vgg-16, vgg-19, inception-v4

These are all 1000-class models trained on ImageNet ILSVRC, except for googlenet-12 which is a 12-class subset of ILSVRC.

◆ Create() [2/4]

static imageNet* imageNet::Create ( const char *  prototxt_path,
const char *  model_path,
const char *  mean_binary,
const char *  class_labels,
const char *  input = IMAGENET_DEFAULT_INPUT,
const char *  output = IMAGENET_DEFAULT_OUTPUT,
uint32_t  maxBatchSize = DEFAULT_MAX_BATCH_SIZE,
precisionType  precision = TYPE_FASTEST,
deviceType  device = DEVICE_GPU,
bool  allowGPUFallback = true 
)
static

Load a new network instance.

Parameters
prototxt_pathFile path to the deployable network prototxt
model_pathFile path to the caffemodel
mean_binaryFile path to the mean value binary proto (can be NULL)
class_labelsFile path to list of class name labels
inputName of the input layer blob.
outputName of the output layer blob.
maxBatchSizeThe maximum batch size that the network will support and be optimized for.

◆ Create() [3/4]

static imageNet* imageNet::Create ( const commandLine cmdLine)
static

Load a new network instance by parsing the command line.

◆ Create() [4/4]

static imageNet* imageNet::Create ( int  argc,
char **  argv 
)
static

Load a new network instance by parsing the command line.

◆ GetClassDesc()

const char* imageNet::GetClassDesc ( int  index) const
inline

Retrieve the description of a particular class.

◆ GetClassLabel()

const char* imageNet::GetClassLabel ( int  index) const
inline

Retrieve the description of a particular class.

◆ GetClassPath()

const char* imageNet::GetClassPath ( ) const
inline

Retrieve the path to the file containing the class descriptions.

◆ GetClassSynset()

const char* imageNet::GetClassSynset ( int  index) const
inline

Retrieve the class synset category of a particular class.

◆ GetNumClasses()

uint32_t imageNet::GetNumClasses ( ) const
inline

Retrieve the number of image recognition classes (typically 1000)

◆ GetSmoothing()

float imageNet::GetSmoothing ( ) const
inline

Return the temporal smoothing weight or number of frames in the smoothing window.

See also
SetSmoothing

◆ GetThreshold()

float imageNet::GetThreshold ( ) const
inline

Return the confidence threshold used for classification.

◆ init()

bool imageNet::init ( const char *  prototxt_path,
const char *  model_path,
const char *  mean_binary,
const char *  class_path,
const char *  input,
const char *  output,
uint32_t  maxBatchSize,
precisionType  precision,
deviceType  device,
bool  allowGPUFallback 
)
protected

◆ loadClassInfo()

bool imageNet::loadClassInfo ( const char *  filename,
int  expectedClasses = -1 
)
protected

◆ preProcess()

bool imageNet::preProcess ( void *  image,
uint32_t  width,
uint32_t  height,
imageFormat  format 
)
protected

◆ SetSmoothing()

void imageNet::SetSmoothing ( float  factor)
inline

Enable temporal smoothing of the results using EWMA (exponentially-weighted moving average).

This filters the confidence values of each class over ~N frames to reduce noise and jitter. In lieu of storing a history of past data, this uses an accumulated approximation of EMA:

EMA(x,t) = EMA(x, t-1) + w * (x - EMA(x, t-1))

where x is a class softmax output logit, t is the timestep, and w is the smoothing weight.

Parameters
factoreither a weight between [0,1] that's placed on the latest confidence values, or the smoothing window as a number of frames (where the weight will be 1/N).
For example, a factor of N=5 would average over approximately the last 5 frames, and would be equivalent to specifying a weight of 0.2 (either can be used). A weight closer to 1 will be more responsive to changes, but also more noisy.
Setting this to 0 or 1 will disable smoothing and use the unfiltered outputs.
Note
this can also be set using the --smoothing=N command-line argument.

◆ SetThreshold()

void imageNet::SetThreshold ( float  threshold)
inline

Set the confidence threshold used for classification.

Classes with a confidence below this threshold will be ignored.

Note
this can also be set using the --threshold=N command-line argument.

◆ Usage()

static const char* imageNet::Usage ( )
inlinestatic

Usage string for command line arguments to Create()

Member Data Documentation

◆ mClassDesc

std::vector<std::string> imageNet::mClassDesc
protected

◆ mClassPath

std::string imageNet::mClassPath
protected

◆ mClassSynset

std::vector<std::string> imageNet::mClassSynset
protected

◆ mNumClasses

uint32_t imageNet::mNumClasses
protected

◆ mSmoothingBuffer

float* imageNet::mSmoothingBuffer
protected

◆ mSmoothingFactor

float imageNet::mSmoothingFactor
protected

◆ mThreshold

float imageNet::mThreshold
protected

The documentation for this class was generated from the following file: