Jetson Inference
DNN Vision Library
segNet.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef __SEGMENTATION_NET_H__
24 #define __SEGMENTATION_NET_H__
25 
26 
27 #include "tensorNet.h"
28 
29 
34 #define SEGNET_DEFAULT_INPUT "data"
35 
40 #define SEGNET_DEFAULT_OUTPUT "score_fr_21classes"
41 
46 #define SEGNET_DEFAULT_ALPHA 150
47 
52 #define SEGNET_USAGE_STRING "segNet arguments: \n" \
53  " --network=NETWORK pre-trained model to load, one of the following:\n" \
54  " * fcn-resnet18-cityscapes-512x256\n" \
55  " * fcn-resnet18-cityscapes-1024x512\n" \
56  " * fcn-resnet18-cityscapes-2048x1024\n" \
57  " * fcn-resnet18-deepscene-576x320\n" \
58  " * fcn-resnet18-deepscene-864x480\n" \
59  " * fcn-resnet18-mhp-512x320\n" \
60  " * fcn-resnet18-mhp-640x360\n" \
61  " * fcn-resnet18-voc-320x320 (default)\n" \
62  " * fcn-resnet18-voc-512x320\n" \
63  " * fcn-resnet18-sun-512x400\n" \
64  " * fcn-resnet18-sun-640x512\n" \
65  " --model=MODEL path to custom model to load (caffemodel, uff, or onnx)\n" \
66  " --prototxt=PROTOTXT path to custom prototxt to load (for .caffemodel only)\n" \
67  " --labels=LABELS path to text file containing the labels for each class\n" \
68  " --colors=COLORS path to text file containing the colors for each class\n" \
69  " --input-blob=INPUT name of the input layer (default: '" SEGNET_DEFAULT_INPUT "')\n" \
70  " --output-blob=OUTPUT name of the output layer (default: '" SEGNET_DEFAULT_OUTPUT "')\n" \
71  " --batch-size=BATCH maximum batch size (default is 1)\n" \
72  " --alpha=ALPHA overlay alpha blending value, range 0-255 (default: 150)\n" \
73  " --visualize=VISUAL visualization flags (e.g. --visualize=overlay,mask)\n" \
74  " valid combinations are: 'overlay', 'mask'\n" \
75  " --profile enable layer profiling in TensorRT\n\n"
76 
77 
82 class segNet : public tensorNet
83 {
84 public:
89  {
102  /* legacy models (deprecated) */
111  /* add new models here */
113  };
114 
119  {
122  };
123 
128  {
129  VISUALIZE_OVERLAY = (1 << 0),
130  VISUALIZE_MASK = (1 << 1),
131  /*VISUALIZE_LEGEND = (1 << 2)*/ // TODO
132  };
133 
138  static uint32_t VisualizationFlagsFromStr( const char* str, uint32_t default_value=VISUALIZE_OVERLAY );
139 
145  static FilterMode FilterModeFromStr( const char* str, FilterMode default_value=FILTER_LINEAR );
146 
152  static NetworkType NetworkTypeFromStr( const char* model_name );
153 
158  static const char* NetworkTypeToStr( NetworkType networkType );
159 
163  static segNet* Create( NetworkType networkType=FCN_ALEXNET_CITYSCAPES_SD, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
164  precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
165 
176  static segNet* Create( const char* prototxt_path, const char* model_path,
177  const char* class_labels, const char* class_colors=NULL,
178  const char* input = SEGNET_DEFAULT_INPUT,
179  const char* output = SEGNET_DEFAULT_OUTPUT,
180  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
181  precisionType precision=TYPE_FASTEST,
182  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
183 
184 
188  static segNet* Create( int argc, char** argv );
189 
193  static segNet* Create( const commandLine& cmdLine );
194 
198  static inline const char* Usage() { return SEGNET_USAGE_STRING; }
199 
203  virtual ~segNet();
204 
213  template<typename T> bool Process( T* input, uint32_t width, uint32_t height, const char* ignore_class="void" ) { return Process((void*)input, width, height, imageFormatFromType<T>(), ignore_class); }
214 
223  bool Process( void* input, uint32_t width, uint32_t height, imageFormat format, const char* ignore_class="void" );
224 
234  bool Process( float* input, uint32_t width, uint32_t height, const char* ignore_class="void" );
235 
239  template<typename T> bool Mask( T* output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR ) { return Mask((void*)output, width, height, imageFormatFromType<T>(), filter); }
240 
244  bool Mask( void* output, uint32_t width, uint32_t height, imageFormat format, FilterMode filter=FILTER_LINEAR );
245 
250  bool Mask( float* output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR );
251 
256  bool Mask( uint8_t* output, uint32_t width, uint32_t height );
257 
267  template<typename T> bool Overlay( T* output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR ) { return Overlay((void*)output, width, height, imageFormatFromType<T>(), filter); }
268 
278  bool Overlay( void* output, uint32_t width, uint32_t height, imageFormat format, FilterMode filter=FILTER_LINEAR );
279 
291  bool Overlay( float* output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR );
292 
296  int FindClassID( const char* label_name );
297 
301  inline uint32_t GetNumClasses() const { return DIMS_C(mOutputs[0].dims); }
302 
306  inline const char* GetClassDesc( uint32_t id ) const { return id < mClassLabels.size() ? mClassLabels[id].c_str() : NULL; }
307 
311  inline float* GetClassColor( uint32_t id ) const { return mClassColors + (id*4); }
312 
316  void SetClassColor( uint32_t classIndex, float r, float g, float b, float a=255.0f );
317 
322  void SetOverlayAlpha( float alpha, bool explicit_exempt=true );
323 
327  inline const char* GetClassPath() const { return mClassPath.c_str(); }
328 
333  inline uint32_t GetGridWidth() const { return DIMS_W(mOutputs[0].dims); }
334 
339  inline uint32_t GetGridHeight() const { return DIMS_H(mOutputs[0].dims); }
340 
344  inline NetworkType GetNetworkType() const { return mNetworkType; }
345 
349  inline const char* GetNetworkName() const { return NetworkTypeToStr(mNetworkType); }
350 
351 protected:
352  segNet();
353 
354  bool classify( const char* ignore_class );
355 
356  bool overlayPoint( void* input, uint32_t in_width, uint32_t in_height, imageFormat in_format, void* output, uint32_t out_width, uint32_t out_height, imageFormat out_format, bool mask_only );
357  bool overlayLinear( void* input, uint32_t in_width, uint32_t in_height, imageFormat in_format, void* output, uint32_t out_width, uint32_t out_height, imageFormat out_format, bool mask_only );
358 
359  bool loadClassColors( const char* filename );
360  bool loadClassLabels( const char* filename );
361  bool saveClassLegend( const char* filename );
362 
363  std::vector<std::string> mClassLabels;
364  std::string mClassPath;
365 
367  float* mClassColors;
368  uint8_t* mClassMap;
371  uint32_t mLastInputWidth;
372  uint32_t mLastInputHeight;
376 };
377 
378 
379 #endif
380 
FilterMode
Enumeration of mask/overlay filtering modes.
Definition: segNet.h:118
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:95
static FilterMode FilterModeFromStr(const char *str, FilterMode default_value=FILTER_LINEAR)
Parse a string from one of the FilterMode values.
FCN-ResNet18 trained on Multi-Human Parsing dataset (512x320)
Definition: segNet.h:95
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice() ...
Definition: tensorNet.h:124
FCN-ResNet18 trained on DeepScene Forest dataset (864x480)
Definition: segNet.h:94
bool saveClassLegend(const char *filename)
Definition: segNet.h:129
#define DIMS_W(x)
Definition: tensorNet.h:62
std::vector< std::string > mClassLabels
Definition: segNet.h:363
NetworkType mNetworkType
Pretrained built-in model type enumeration.
Definition: segNet.h:375
bool Overlay(T *output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR)
Produce the segmentation overlay alpha blended on top of the original image.
Definition: segNet.h:267
Definition: segNet.h:130
FCN-Alexnet trained on SYNTHIA SEQS summer datasets.
Definition: segNet.h:106
FCN-Alexnet trained on SYNTHIA SEQS summer datasets.
Definition: segNet.h:105
FCN-Alexnet trained on Pascal VOC dataset.
Definition: segNet.h:103
std::vector< layerInfo > mOutputs
Definition: tensorNet.h:751
FCN-Alexnet trained on Cityscapes dataset with 21 classes.
Definition: segNet.h:107
static NetworkType NetworkTypeFromStr(const char *model_name)
Parse a string from one of the built-in pretrained models.
FCN-ResNet18 trained on Cityscapes dataset (512x256)
Definition: segNet.h:90
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:122
#define DIMS_C(x)
Definition: tensorNet.h:60
uint32_t GetNumClasses() const
Retrieve the number of object classes supported in the detector.
Definition: segNet.h:301
FCN-ResNet18 trained on DeepScene Forest dataset (576x320)
Definition: segNet.h:93
const char * GetNetworkName() const
Retrieve a string describing the network name.
Definition: segNet.h:349
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:98
FCN-Alexnet trained on SYNTHIA CVPR16 dataset.
Definition: segNet.h:104
bool loadClassLabels(const char *filename)
static const char * NetworkTypeToStr(NetworkType networkType)
Convert a NetworkType enum to a human-readable string.
FCN-ResNet18 trained on Pascal VOC dataset (320x320)
Definition: segNet.h:97
#define SEGNET_DEFAULT_INPUT
Name of default input blob for segmentation model.
Definition: segNet.h:34
int FindClassID(const char *label_name)
Find the ID of a particular class (by label name).
bool * mColorsAlphaSet
true if class color had been explicitly set from file or user
Definition: segNet.h:366
bool Mask(T *output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR)
Produce a colorized segmentation mask.
Definition: segNet.h:239
FCN-ResNet18 trained on SUN RGB-D dataset (512x400)
Definition: segNet.h:99
FCN-Alexnet trained on aerial first-person view of the horizon line for drones, 1280x720 and 21 outpu...
Definition: segNet.h:109
Definition: segNet.h:112
bool overlayLinear(void *input, uint32_t in_width, uint32_t in_height, imageFormat in_format, void *output, uint32_t out_width, uint32_t out_height, imageFormat out_format, bool mask_only)
uint32_t mLastInputWidth
width in pixels of last input image to be processed
Definition: segNet.h:371
Command line parser for extracting flags, values, and strings.
Definition: commandLine.h:35
void * mLastInputImg
last input image to be processed, stored for overlay
Definition: segNet.h:370
const char * GetClassPath() const
Retrieve the path to the file containing the class label descriptions.
Definition: segNet.h:327
virtual ~segNet()
Destroy.
bool overlayPoint(void *input, uint32_t in_width, uint32_t in_height, imageFormat in_format, void *output, uint32_t out_width, uint32_t out_height, imageFormat out_format, bool mask_only)
VisualizationFlags
Visualization flags.
Definition: segNet.h:127
Bilinear filtering.
Definition: segNet.h:121
__device__ cudaVectorTypeInfo< T >::Base alpha(T vec, typename cudaVectorTypeInfo< T >::Base default_alpha=255)
Definition: cudaVector.h:98
Nearest point sampling.
Definition: segNet.h:120
std::string mClassPath
Definition: segNet.h:364
imageFormat mLastInputFormat
pixel format of last input image
Definition: segNet.h:373
uint8_t * mClassMap
runtime buffer for the argmax-classified class index of each tile
Definition: segNet.h:368
bool classify(const char *ignore_class)
FCN-Alexnet trained on Cityscapes dataset with 21 classes.
Definition: segNet.h:108
Image segmentation with FCN-Alexnet or custom models, using TensorRT.
Definition: segNet.h:82
void SetOverlayAlpha(float alpha, bool explicit_exempt=true)
Set overlay alpha blending value for all classes (between 0-255), (optionally except for those that h...
uint32_t GetGridWidth() const
Retrieve the number of columns in the classification grid.
Definition: segNet.h:333
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:81
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:211
FCN-ResNet18 trained on Multi-Human Parsing dataset (640x360)
Definition: segNet.h:96
FCN-ResNet18 trained on SUN RGB-D dataset (640x512)
Definition: segNet.h:100
#define SEGNET_DEFAULT_OUTPUT
Name of default output blob for segmentation model.
Definition: segNet.h:40
NetworkType GetNetworkType() const
Retrieve the network type (alexnet or googlenet)
Definition: segNet.h:344
bool loadClassColors(const char *filename)
static uint32_t VisualizationFlagsFromStr(const char *str, uint32_t default_value=VISUALIZE_OVERLAY)
Parse a string of one of more VisualizationMode values.
float * GetClassColor(uint32_t id) const
Retrieve the RGBA visualization color a particular class.
Definition: segNet.h:311
const char * GetClassDesc(uint32_t id) const
Retrieve the description of a particular class.
Definition: segNet.h:306
void SetClassColor(uint32_t classIndex, float r, float g, float b, float a=255.0f)
Set the visualization color of a particular class of object.
uint32_t mLastInputHeight
height in pixels of last input image to be processed
Definition: segNet.h:372
#define SEGNET_USAGE_STRING
Standard command-line options able to be passed to segNet::Create()
Definition: segNet.h:52
bool Process(T *input, uint32_t width, uint32_t height, const char *ignore_class="void")
Perform the initial inferencing processing portion of the segmentation.
Definition: segNet.h:213
FCN-ResNet18 trained on Cityscapes dataset (2048x1024)
Definition: segNet.h:92
FCN-ResNet18 trained on Cityscapes dataset (1024x512)
Definition: segNet.h:91
static const char * Usage()
Usage string for command line arguments to Create()
Definition: segNet.h:198
NetworkType
Enumeration of pretrained/built-in network models.
Definition: segNet.h:88
float * mClassColors
array of overlay colors in shared CPU/GPU memory
Definition: segNet.h:367
static segNet * Create(NetworkType networkType=FCN_ALEXNET_CITYSCAPES_SD, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true)
Load a new network instance.
imageFormat
The imageFormat enum is used to identify the pixel format and colorspace of an image.
Definition: imageFormat.h:49
FCN-ResNet18 trained on Pascal VOC dataset (512x320)
Definition: segNet.h:98
uint32_t GetGridHeight() const
Retrieve the number of rows in the classification grid.
Definition: segNet.h:339
#define DIMS_H(x)
Definition: tensorNet.h:61