Jetson Inference
DNN Vision Library
detectNet.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef __DETECT_NET_H__
24 #define __DETECT_NET_H__
25 
26 
27 #include "tensorNet.h"
28 
29 
34 #define DETECTNET_DEFAULT_INPUT "data"
35 
40 #define DETECTNET_DEFAULT_COVERAGE "coverage"
41 
46 #define DETECTNET_DEFAULT_BBOX "bboxes"
47 
52 #define DETECTNET_DEFAULT_THRESHOLD 0.5f
53 
58 #define DETECTNET_DEFAULT_ALPHA 120
59 
64 #define DETECTNET_USAGE_STRING "detectNet arguments: \n" \
65  " --network=NETWORK pre-trained model to load, one of the following:\n" \
66  " * ssd-mobilenet-v1\n" \
67  " * ssd-mobilenet-v2 (default)\n" \
68  " * ssd-inception-v2\n" \
69  " * pednet\n" \
70  " * multiped\n" \
71  " * facenet\n" \
72  " * coco-airplane\n" \
73  " * coco-bottle\n" \
74  " * coco-chair\n" \
75  " * coco-dog\n" \
76  " --model=MODEL path to custom model to load (caffemodel, uff, or onnx)\n" \
77  " --prototxt=PROTOTXT path to custom prototxt to load (for .caffemodel only)\n" \
78  " --labels=LABELS path to text file containing the labels for each class\n" \
79  " --input-blob=INPUT name of the input layer (default is '" DETECTNET_DEFAULT_INPUT "')\n" \
80  " --output-cvg=COVERAGE name of the coverge output layer (default is '" DETECTNET_DEFAULT_COVERAGE "')\n" \
81  " --output-bbox=BOXES name of the bounding output layer (default is '" DETECTNET_DEFAULT_BBOX "')\n" \
82  " --mean-pixel=PIXEL mean pixel value to subtract from input (default is 0.0)\n" \
83  " --batch-size=BATCH maximum batch size (default is 1)\n" \
84  " --threshold=THRESHOLD minimum threshold for detection (default is 0.5)\n" \
85  " --alpha=ALPHA overlay alpha blending value, range 0-255 (default: 120)\n" \
86  " --overlay=OVERLAY detection overlay flags (e.g. --overlay=box,labels,conf)\n" \
87  " valid combinations are: 'box', 'labels', 'conf', 'none'\n" \
88  " --profile enable layer profiling in TensorRT\n\n"
89 
90 
95 class detectNet : public tensorNet
96 {
97 public:
101  struct Detection
102  {
103  // Object Info
104  uint32_t Instance;
105  uint32_t ClassID;
106  float Confidence;
108  // Bounding Box Coordinates
109  float Left;
110  float Right;
111  float Top;
112  float Bottom;
115  inline float Width() const { return Right - Left; }
116 
118  inline float Height() const { return Bottom - Top; }
119 
121  inline float Area() const { return Width() * Height(); }
122 
124  static inline float Width( float x1, float x2 ) { return x2 - x1; }
125 
127  static inline float Height( float y1, float y2 ) { return y2 - y1; }
128 
130  static inline float Area( float x1, float y1, float x2, float y2 ) { return Width(x1,x2) * Height(y1,y2); }
131 
133  inline void Center( float* x, float* y ) const { if(x) *x = Left + Width() * 0.5f; if(y) *y = Top + Height() * 0.5f; }
134 
136  inline bool Contains( float x, float y ) const { return x >= Left && x <= Right && y >= Top && y <= Bottom; }
137 
139  inline bool Intersects( const Detection& det, float areaThreshold=0.0f ) const { return (IntersectionArea(det) / fmaxf(Area(), det.Area()) > areaThreshold); }
140 
142  inline bool Intersects( float x1, float y1, float x2, float y2, float areaThreshold=0.0f ) const { return (IntersectionArea(x1,y1,x2,y2) / fmaxf(Area(), Area(x1,y1,x2,y2)) > areaThreshold); }
143 
145  inline float IntersectionArea( const Detection& det ) const { return IntersectionArea(det.Left, det.Top, det.Right, det.Bottom); }
146 
148  inline float IntersectionArea( float x1, float y1, float x2, float y2 ) const { if(!Overlaps(x1,y1,x2,y2)) return 0.0f; return (fminf(Right, x2) - fmaxf(Left, x1)) * (fminf(Bottom, y2) - fmaxf(Top, y1)); }
149 
151  inline bool Overlaps( const Detection& det ) const { return !(det.Left > Right || det.Right < Left || det.Top > Bottom || det.Bottom < Top); }
152 
154  inline bool Overlaps( float x1, float y1, float x2, float y2 ) const { return !(x1 > Right || x2 < Left || y1 > Bottom || y2 < Top); }
155 
157  inline bool Expand( float x1, float y1, float x2, float y2 ) { if(!Overlaps(x1, y1, x2, y2)) return false; Left = fminf(x1, Left); Top = fminf(y1, Top); Right = fmaxf(x2, Right); Bottom = fmaxf(y2, Bottom); return true; }
158 
160  inline bool Expand( const Detection& det ) { if(!Overlaps(det)) return false; Left = fminf(det.Left, Left); Top = fminf(det.Top, Top); Right = fmaxf(det.Right, Right); Bottom = fmaxf(det.Bottom, Bottom); return true; }
161 
163  inline void Reset() { Instance = 0; ClassID = 0; Confidence = 0; Left = 0; Right = 0; Top = 0; Bottom = 0; }
164 
166  inline Detection() { Reset(); }
167  };
168 
173  {
175  OVERLAY_BOX = (1 << 0),
176  OVERLAY_LABEL = (1 << 1),
177  OVERLAY_CONFIDENCE = (1 << 2),
179  };
180 
185  {
186  CUSTOM = 0,
195 #if NV_TENSORRT_MAJOR > 4
196  SSD_MOBILENET_V1,
197  SSD_MOBILENET_V2,
198  SSD_INCEPTION_V2,
201  NETWORK_DEFAULT=SSD_MOBILENET_V2
202 #else
203  NETWORK_DEFAULT=PEDNET_MULTI
204 #endif
205  };
206 
213  static NetworkType NetworkTypeFromStr( const char* model_name );
214 
221  static uint32_t OverlayFlagsFromStr( const char* flags );
222 
229  static detectNet* Create( NetworkType networkType=NETWORK_DEFAULT, float threshold=DETECTNET_DEFAULT_THRESHOLD,
230  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
231  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
232 
245  static detectNet* Create( const char* prototxt_path, const char* model_path, const char* mean_binary,
246  const char* class_labels, float threshold=DETECTNET_DEFAULT_THRESHOLD,
247  const char* input = DETECTNET_DEFAULT_INPUT,
248  const char* coverage = DETECTNET_DEFAULT_COVERAGE,
249  const char* bboxes = DETECTNET_DEFAULT_BBOX,
250  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
251  precisionType precision=TYPE_FASTEST,
252  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
253 
266  static detectNet* Create( const char* prototxt_path, const char* model_path, float mean_pixel=0.0f,
267  const char* class_labels=NULL, float threshold=DETECTNET_DEFAULT_THRESHOLD,
268  const char* input = DETECTNET_DEFAULT_INPUT,
269  const char* coverage = DETECTNET_DEFAULT_COVERAGE,
270  const char* bboxes = DETECTNET_DEFAULT_BBOX,
271  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
272  precisionType precision=TYPE_FASTEST,
273  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
274 
286  static detectNet* Create( const char* model_path, const char* class_labels, float threshold,
287  const char* input, const Dims3& inputDims,
288  const char* output, const char* numDetections,
289  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
290  precisionType precision=TYPE_FASTEST,
291  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
292 
296  static detectNet* Create( int argc, char** argv );
297 
301  static detectNet* Create( const commandLine& cmdLine );
302 
306  static inline const char* Usage() { return DETECTNET_USAGE_STRING; }
307 
311  virtual ~detectNet();
312 
322  template<typename T> int Detect( T* image, uint32_t width, uint32_t height, Detection** detections, uint32_t overlay=OVERLAY_BOX ) { return Detect((void*)image, width, height, imageFormatFromType<T>(), detections, overlay); }
323 
334  template<typename T> int Detect( T* image, uint32_t width, uint32_t height, Detection* detections, uint32_t overlay=OVERLAY_BOX ) { return Detect((void*)image, width, height, imageFormatFromType<T>(), detections, overlay); }
335 
345  int Detect( void* input, uint32_t width, uint32_t height, imageFormat format, Detection** detections, uint32_t overlay=OVERLAY_BOX );
346 
357  int Detect( void* input, uint32_t width, uint32_t height, imageFormat format, Detection* detections, uint32_t overlay=OVERLAY_BOX );
358 
369  int Detect( float* input, uint32_t width, uint32_t height, Detection** detections, uint32_t overlay=OVERLAY_BOX );
370 
382  int Detect( float* input, uint32_t width, uint32_t height, Detection* detections, uint32_t overlay=OVERLAY_BOX );
383 
391  template<typename T> bool Overlay( T* input, T* output, uint32_t width, uint32_t height, Detection* detections, uint32_t numDetections, uint32_t flags=OVERLAY_DEFAULT ) { return Overlay(input, output, width, height, imageFormatFromType<T>(), detections, flags); }
392 
400  bool Overlay( void* input, void* output, uint32_t width, uint32_t height, imageFormat format, Detection* detections, uint32_t numDetections, uint32_t flags=OVERLAY_DEFAULT );
401 
406  inline float GetThreshold() const { return mCoverageThreshold; }
407 
411  inline void SetThreshold( float threshold ) { mCoverageThreshold = threshold; }
412 
417  inline uint32_t GetMaxDetections() const { return mMaxDetections; }
418 
422  inline uint32_t GetNumClasses() const { return mNumClasses; }
423 
427  inline const char* GetClassDesc( uint32_t index ) const { return mClassDesc[index].c_str(); }
428 
432  inline const char* GetClassSynset( uint32_t index ) const { return mClassSynset[index].c_str(); }
433 
437  inline const char* GetClassPath() const { return mClassPath.c_str(); }
438 
442  inline float* GetClassColor( uint32_t classIndex ) const { return mClassColors[0] + (classIndex*4); }
443 
447  void SetClassColor( uint32_t classIndex, float r, float g, float b, float a=255.0f );
448 
452  void SetOverlayAlpha( float alpha );
453 
457  static bool LoadClassInfo( const char* filename, std::vector<std::string>& descriptions, int expectedClasses=-1 );
458 
462  static bool LoadClassInfo( const char* filename, std::vector<std::string>& descriptions, std::vector<std::string>& synsets, int expectedClasses=-1 );
463 
467  static void GenerateColor( uint32_t classID, uint8_t* rgb );
468 
469 protected:
470 
471  // constructor
472  detectNet( float meanPixel=0.0f );
473 
474  bool allocDetections();
475  bool defaultColors();
476  bool loadClassInfo( const char* filename );
477 
478  bool init( const char* prototxt_path, const char* model_path, const char* mean_binary, const char* class_labels,
479  float threshold, const char* input, const char* coverage, const char* bboxes, uint32_t maxBatchSize,
480  precisionType precision, deviceType device, bool allowGPUFallback );
481 
482  int clusterDetections( Detection* detections, uint32_t width, uint32_t height );
483  int clusterDetections( Detection* detections, int n, float threshold=0.75f );
484 
485  void sortDetections( Detection* detections, int numDetections );
486 
488  float* mClassColors[2];
489  float mMeanPixel;
490 
491  std::vector<std::string> mClassDesc;
492  std::vector<std::string> mClassSynset;
493 
494  std::string mClassPath;
495  uint32_t mNumClasses;
496 
497  Detection* mDetectionSets[2]; // list of detections, mNumDetectionSets * mMaxDetections
498  uint32_t mDetectionSet; // index of next detection set to use
499  uint32_t mMaxDetections; // number of raw detections in the grid
500 
501  static const uint32_t mNumDetectionSets = 16; // size of detection ringbuffer
502 };
503 
504 
505 #endif
Detection * mDetectionSets[2]
Definition: detectNet.h:497
Pedestrian / person detector.
Definition: detectNet.h:192
float IntersectionArea(const Detection &det) const
Return the area of the bounding box intersection.
Definition: detectNet.h:145
MS-COCO chair class.
Definition: detectNet.h:189
bool Contains(float x, float y) const
Return true if the bounding boxes intersect and exceeds area % threshold.
Definition: detectNet.h:136
void SetThreshold(float threshold)
Set the minimum threshold for detection.
Definition: detectNet.h:411
static void GenerateColor(uint32_t classID, uint8_t *rgb)
Procedurally generate a bounding box color for a class index.
float Top
Top bounding box cooridnate (in pixels)
Definition: detectNet.h:111
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:95
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice() ...
Definition: tensorNet.h:124
float fmaxf(float a, float b)
Definition: cudaMath.h:56
std::vector< std::string > mClassSynset
Definition: detectNet.h:492
bool Expand(const Detection &det)
Reset all member variables to zero.
Definition: detectNet.h:160
float Left
Left bounding box coordinate (in pixels)
Definition: detectNet.h:109
float IntersectionArea(float x1, float y1, float x2, float y2) const
Return true if the bounding boxes overlap.
Definition: detectNet.h:148
MS-COCO bottle class.
Definition: detectNet.h:188
Overlay the object bounding boxes.
Definition: detectNet.h:175
#define DETECTNET_DEFAULT_BBOX
Name of default output blob of the grid of bounding boxes for DetectNet caffe model.
Definition: detectNet.h:46
std::string mClassPath
Definition: detectNet.h:494
static float Width(float x1, float x2)
Calculate the height of the bounding box.
Definition: detectNet.h:124
Detection()
Definition: detectNet.h:166
Definition: detectNet.h:203
#define DETECTNET_DEFAULT_COVERAGE
Name of default output blob of the coverage map for DetectNet caffe model.
Definition: detectNet.h:40
const char * GetClassSynset(uint32_t index) const
Retrieve the class synset category of a particular class.
Definition: detectNet.h:432
int Detect(T *image, uint32_t width, uint32_t height, Detection *detections, uint32_t overlay=OVERLAY_BOX)
Detect object locations in an image, into an array of the results allocated by the user...
Definition: detectNet.h:334
#define DETECTNET_USAGE_STRING
Standard command-line options able to be passed to detectNet::Create()
Definition: detectNet.h:64
static float Height(float y1, float y2)
Calculate the area of the bounding box.
Definition: detectNet.h:127
void SetClassColor(uint32_t classIndex, float r, float g, float b, float a=255.0f)
Set the visualization color of a particular class of object.
static bool LoadClassInfo(const char *filename, std::vector< std::string > &descriptions, int expectedClasses=-1)
Load class descriptions from a label file.
bool Intersects(const Detection &det, float areaThreshold=0.0f) const
Return true if the bounding boxes intersect and exceeds area % threshold.
Definition: detectNet.h:139
float GetThreshold() const
Retrieve the minimum threshold for detection.
Definition: detectNet.h:406
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:122
float fminf(float a, float b)
Definition: cudaMath.h:51
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:98
bool Overlay(T *input, T *output, uint32_t width, uint32_t height, Detection *detections, uint32_t numDetections, uint32_t flags=OVERLAY_DEFAULT)
Draw the detected bounding boxes overlayed on an RGBA image.
Definition: detectNet.h:391
Overlay the class description labels.
Definition: detectNet.h:176
float * mClassColors[2]
Definition: detectNet.h:488
std::vector< std::string > mClassDesc
Definition: detectNet.h:491
static NetworkType NetworkTypeFromStr(const char *model_name)
Parse a string to one of the built-in pretrained models.
void SetOverlayAlpha(float alpha)
Set overlay alpha blending value for all classes (between 0-255).
bool allocDetections()
bool init(const char *prototxt_path, const char *model_path, const char *mean_binary, const char *class_labels, float threshold, const char *input, const char *coverage, const char *bboxes, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback)
uint32_t mDetectionSet
Definition: detectNet.h:498
MS-COCO dog class.
Definition: detectNet.h:190
bool loadClassInfo(const char *filename)
uint32_t ClassID
Class index of the detected object.
Definition: detectNet.h:105
#define DETECTNET_DEFAULT_INPUT
Name of default input blob for DetectNet caffe model.
Definition: detectNet.h:34
Multi-class pedestrian + baggage detector.
Definition: detectNet.h:193
uint32_t GetNumClasses() const
Retrieve the number of object classes supported in the detector.
Definition: detectNet.h:422
static uint32_t OverlayFlagsFromStr(const char *flags)
Parse a string sequence into OverlayFlags enum.
bool Expand(float x1, float y1, float x2, float y2)
Expand the bounding box if they overlap (return true if so)
Definition: detectNet.h:157
MS-COCO airplane class.
Definition: detectNet.h:187
bool Intersects(float x1, float y1, float x2, float y2, float areaThreshold=0.0f) const
Return the area of the bounding box intersection.
Definition: detectNet.h:142
Object Detection result.
Definition: detectNet.h:101
Command line parser for extracting flags, values, and strings.
Definition: commandLine.h:35
float mMeanPixel
Definition: detectNet.h:489
const char * GetClassDesc(uint32_t index) const
Retrieve the description of a particular class.
Definition: detectNet.h:427
__device__ cudaVectorTypeInfo< T >::Base alpha(T vec, typename cudaVectorTypeInfo< T >::Base default_alpha=255)
Definition: cudaVector.h:98
uint32_t Instance
Index of this unique object instance.
Definition: detectNet.h:104
static const uint32_t mNumDetectionSets
Definition: detectNet.h:501
No overlay.
Definition: detectNet.h:174
void Center(float *x, float *y) const
Return true if the coordinate is inside the bounding box.
Definition: detectNet.h:133
void sortDetections(Detection *detections, int numDetections)
Human facial detector trained on FDDB.
Definition: detectNet.h:191
float Height() const
Calculate the area of the object.
Definition: detectNet.h:118
float Right
Right bounding box coordinate (in pixels)
Definition: detectNet.h:110
int Detect(T *image, uint32_t width, uint32_t height, Detection **detections, uint32_t overlay=OVERLAY_BOX)
Detect object locations from an image, returning an array containing the detection results...
Definition: detectNet.h:322
static float Area(float x1, float y1, float x2, float y2)
Return the center of the object.
Definition: detectNet.h:130
Overlay the detection confidence values.
Definition: detectNet.h:177
float Area() const
Calculate the width of the bounding box.
Definition: detectNet.h:121
float * GetClassColor(uint32_t classIndex) const
Retrieve the RGBA visualization color a particular class.
Definition: detectNet.h:442
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:81
uint32_t GetMaxDetections() const
Retrieve the maximum number of simultaneous detections the network supports.
Definition: detectNet.h:417
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:211
Object recognition and localization networks with TensorRT support.
Definition: detectNet.h:95
bool defaultColors()
uint32_t mMaxDetections
Definition: detectNet.h:499
The default choice of overlay.
Definition: detectNet.h:178
static const char * Usage()
Usage string for command line arguments to Create()
Definition: detectNet.h:306
bool Overlaps(float x1, float y1, float x2, float y2) const
Expand the bounding box if they overlap (return true if so)
Definition: detectNet.h:154
float mCoverageThreshold
Definition: detectNet.h:487
const char * GetClassPath() const
Retrieve the path to the file containing the class descriptions.
Definition: detectNet.h:437
float Bottom
Bottom bounding box coordinate (in pixels)
Definition: detectNet.h:112
static detectNet * Create(NetworkType networkType=NETWORK_DEFAULT, float threshold=DETECTNET_DEFAULT_THRESHOLD, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true)
Load a new network instance.
uint8_t classID
The class ID of the point.
Definition: cudaPointCloud.h:97
NetworkType
Network choice enumeration.
Definition: detectNet.h:184
float Confidence
Confidence value of the detected object.
Definition: detectNet.h:106
detectNet(float meanPixel=0.0f)
#define DETECTNET_DEFAULT_THRESHOLD
Default value of the minimum detection threshold.
Definition: detectNet.h:52
uint32_t mNumClasses
Definition: detectNet.h:495
bool Overlaps(const Detection &det) const
Return true if the bounding boxes overlap.
Definition: detectNet.h:151
virtual ~detectNet()
Destory.
OverlayFlags
Overlay flags (can be OR&#39;d together).
Definition: detectNet.h:172
void Reset()
Default constructor.
Definition: detectNet.h:163
int clusterDetections(Detection *detections, uint32_t width, uint32_t height)
float Width() const
Calculate the height of the object.
Definition: detectNet.h:115
Custom model from user.
Definition: detectNet.h:186
imageFormat
The imageFormat enum is used to identify the pixel format and colorspace of an image.
Definition: imageFormat.h:49
nvinfer1::Dims3 Dims3
Definition: tensorNet.h:58