Jetson Inference
DNN Vision Library
imageNet.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef __IMAGE_NET_H__
24 #define __IMAGE_NET_H__
25 
26 
27 #include "tensorNet.h"
28 
29 
34 #define IMAGENET_DEFAULT_INPUT "data"
35 
40 #define IMAGENET_DEFAULT_OUTPUT "prob"
41 
46 #define IMAGENET_DEFAULT_THRESHOLD 0.01f
47 
52 #define IMAGENET_MODEL_TYPE "classification"
53 
58 #define IMAGENET_USAGE_STRING "imageNet arguments: \n" \
59  " --network=NETWORK pre-trained model to load, one of the following:\n" \
60  " * alexnet\n" \
61  " * googlenet (default)\n" \
62  " * googlenet-12\n" \
63  " * resnet-18\n" \
64  " * resnet-50\n" \
65  " * resnet-101\n" \
66  " * resnet-152\n" \
67  " * vgg-16\n" \
68  " * vgg-19\n" \
69  " * inception-v4\n" \
70  " --model=MODEL path to custom model to load (caffemodel, uff, or onnx)\n" \
71  " --prototxt=PROTOTXT path to custom prototxt to load (for .caffemodel only)\n" \
72  " --labels=LABELS path to text file containing the labels for each class\n" \
73  " --input-blob=INPUT name of the input layer (default is '" IMAGENET_DEFAULT_INPUT "')\n" \
74  " --output-blob=OUTPUT name of the output layer (default is '" IMAGENET_DEFAULT_OUTPUT "')\n" \
75  " --threshold=CONF minimum confidence threshold for classification (default is 0.01)\n" \
76  " --smoothing=WEIGHT weight between [0,1] or number of frames (disabled by default)\n" \
77  " --profile enable layer profiling in TensorRT\n\n"
78 
79 
84 class imageNet : public tensorNet
85 {
86 public:
90  typedef std::vector<std::pair<uint32_t, float>> Classifications;
91 
102  static imageNet* Create( const char* network="googlenet",
103  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
104  precisionType precision=TYPE_FASTEST,
105  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
106 
117  static imageNet* Create( const char* prototxt_path, const char* model_path,
118  const char* mean_binary, const char* class_labels,
119  const char* input=IMAGENET_DEFAULT_INPUT,
120  const char* output=IMAGENET_DEFAULT_OUTPUT,
121  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
122  precisionType precision=TYPE_FASTEST,
123  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
124 
128  static imageNet* Create( int argc, char** argv );
129 
133  static imageNet* Create( const commandLine& cmdLine );
134 
138  static inline const char* Usage() { return IMAGENET_USAGE_STRING; }
139 
143  virtual ~imageNet();
144 
158  template<typename T> int Classify( T* image, uint32_t width, uint32_t height, float* confidence=NULL ) { return Classify((void*)image, width, height, imageFormatFromType<T>(), confidence); }
159 
174  int Classify( void* image, uint32_t width, uint32_t height, imageFormat format, float* confidence=NULL );
175 
191  int Classify( float* rgba, uint32_t width, uint32_t height, float* confidence=NULL, imageFormat format=IMAGE_RGBA32F );
192 
208  template<typename T> int Classify( T* image, uint32_t width, uint32_t height, Classifications& classifications, int topK=1 ) { return Classify((void*)image, width, height, imageFormatFromType<T>(), classifications, topK); }
209 
226  int Classify( void* image, uint32_t width, uint32_t height, imageFormat format, Classifications& classifications, int topK=1 );
227 
231  inline uint32_t GetNumClasses() const { return mNumClasses; }
232 
236  inline const char* GetClassLabel( int index ) const { return GetClassDesc(index); }
237 
241  inline const char* GetClassDesc( int index ) const { return index >= 0 ? mClassDesc[index].c_str() : "none"; }
242 
246  inline const char* GetClassSynset( int index ) const { return index >= 0 ? mClassSynset[index].c_str() : "none"; }
247 
251  inline const char* GetClassPath() const { return mClassPath.c_str(); }
252 
256  inline float GetThreshold() const { return mThreshold; }
257 
263  inline void SetThreshold( float threshold ) { mThreshold = threshold; }
264 
269  inline float GetSmoothing() const { return mSmoothingFactor; }
270 
289  inline void SetSmoothing( float factor ) { mSmoothingFactor = factor; }
290 
291 protected:
292  imageNet();
293 
294  //bool init( NetworkType networkType, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback );
295  bool init(const char* prototxt_path, const char* model_path, const char* mean_binary, const char* class_path, const char* input, const char* output, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback );
296  bool loadClassInfo( const char* filename, int expectedClasses=-1 );
297 
298  bool preProcess( void* image, uint32_t width, uint32_t height, imageFormat format );
299 
300  float* applySmoothing();
301 
302  uint32_t mNumClasses;
303 
304  std::vector<std::string> mClassSynset; // 1000 class ID's (ie n01580077, n04325704)
305  std::vector<std::string> mClassDesc;
306 
307  std::string mClassPath;
308  //NetworkType mNetworkType;
309 
312 
313  float mThreshold;
314 };
315 
316 
317 #endif
imageNet::applySmoothing
float * applySmoothing()
IMAGE_RGBA32F
@ IMAGE_RGBA32F
float4 RGBA32F (‘'rgba32f’`)
Definition: imageFormat.h:55
imageNet::Classify
int Classify(T *image, uint32_t width, uint32_t height, float *confidence=NULL)
Predict the maximum-likelihood image class whose confidence meets the minimum threshold.
Definition: imageNet.h:158
imageNet::Usage
static const char * Usage()
Usage string for command line arguments to Create()
Definition: imageNet.h:138
IMAGENET_DEFAULT_INPUT
#define IMAGENET_DEFAULT_INPUT
Name of default input blob for imageNet model.
Definition: imageNet.h:34
imageNet::Classify
int Classify(T *image, uint32_t width, uint32_t height, Classifications &classifications, int topK=1)
Classify the image and return the topK image classification results that meet the minimum confidence ...
Definition: imageNet.h:208
imageNet::GetSmoothing
float GetSmoothing() const
Return the temporal smoothing weight or number of frames in the smoothing window.
Definition: imageNet.h:269
imageNet::GetClassSynset
const char * GetClassSynset(int index) const
Retrieve the class synset category of a particular class.
Definition: imageNet.h:246
IMAGENET_DEFAULT_OUTPUT
#define IMAGENET_DEFAULT_OUTPUT
Name of default output confidence values for imageNet model.
Definition: imageNet.h:40
imageNet::mSmoothingBuffer
float * mSmoothingBuffer
Definition: imageNet.h:310
imageNet::SetSmoothing
void SetSmoothing(float factor)
Enable temporal smoothing of the results using EWMA (exponentially-weighted moving average).
Definition: imageNet.h:289
DEVICE_GPU
@ DEVICE_GPU
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice()
Definition: tensorNet.h:131
imageNet::GetClassLabel
const char * GetClassLabel(int index) const
Retrieve the description of a particular class.
Definition: imageNet.h:236
deviceType
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:129
imageNet::GetNumClasses
uint32_t GetNumClasses() const
Retrieve the number of image recognition classes (typically 1000)
Definition: imageNet.h:231
tensorNet.h
imageNet::mClassSynset
std::vector< std::string > mClassSynset
Definition: imageNet.h:304
TYPE_FASTEST
@ TYPE_FASTEST
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:105
imageNet::Classifications
std::vector< std::pair< uint32_t, float > > Classifications
List of classification results where each entry represents a (classID, confidence) pair.
Definition: imageNet.h:90
imageNet::GetThreshold
float GetThreshold() const
Return the confidence threshold used for classification.
Definition: imageNet.h:256
precisionType
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:102
imageNet::preProcess
bool preProcess(void *image, uint32_t width, uint32_t height, imageFormat format)
imageNet::~imageNet
virtual ~imageNet()
Destroy.
imageNet::mClassDesc
std::vector< std::string > mClassDesc
Definition: imageNet.h:305
tensorNet
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:218
imageNet::GetClassPath
const char * GetClassPath() const
Retrieve the path to the file containing the class descriptions.
Definition: imageNet.h:251
imageNet
Image recognition with classification networks, using TensorRT.
Definition: imageNet.h:84
imageNet::mThreshold
float mThreshold
Definition: imageNet.h:313
imageNet::mClassPath
std::string mClassPath
Definition: imageNet.h:307
imageNet::mNumClasses
uint32_t mNumClasses
Definition: imageNet.h:302
IMAGENET_USAGE_STRING
#define IMAGENET_USAGE_STRING
Standard command-line options able to be passed to imageNet::Create()
Definition: imageNet.h:58
imageNet::imageNet
imageNet()
imageNet::GetClassDesc
const char * GetClassDesc(int index) const
Retrieve the description of a particular class.
Definition: imageNet.h:241
DEFAULT_MAX_BATCH_SIZE
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:88
imageNet::mSmoothingFactor
float mSmoothingFactor
Definition: imageNet.h:311
imageNet::init
bool init(const char *prototxt_path, const char *model_path, const char *mean_binary, const char *class_path, const char *input, const char *output, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback)
imageNet::Create
static imageNet * Create(const char *network="googlenet", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true)
Load one of the following pre-trained models:
imageNet::loadClassInfo
bool loadClassInfo(const char *filename, int expectedClasses=-1)
commandLine
Command line parser for extracting flags, values, and strings.
Definition: commandLine.h:35
imageFormat
imageFormat
The imageFormat enum is used to identify the pixel format and colorspace of an image.
Definition: imageFormat.h:49
imageNet::SetThreshold
void SetThreshold(float threshold)
Set the confidence threshold used for classification.
Definition: imageNet.h:263