Jetson Inference
DNN Vision Library
poseNet.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #ifndef __POSE_NET_H__
24 #define __POSE_NET_H__
25 
26 
27 #include "tensorNet.h"
28 
29 #include <array>
30 #include <vector>
31 
32 
37 #define POSENET_DEFAULT_INPUT "input"
38 
43 #define POSENET_DEFAULT_CMAP "cmap"
44 
49 #define POSENET_DEFAULT_PAF "paf"
50 
55 #define POSENET_DEFAULT_THRESHOLD 0.15f
56 
62 #define POSENET_DEFAULT_KEYPOINT_SCALE 0.0052f
63 
69 #define POSENET_DEFAULT_LINK_SCALE 0.0013f
70 
75 #define POSENET_MODEL_TYPE "pose"
76 
81 #define POSENET_USAGE_STRING "poseNet arguments: \n" \
82  " --network=NETWORK pre-trained model to load, one of the following:\n" \
83  " * resnet18-body (default)\n" \
84  " * resnet18-hand\n" \
85  " * densenet121-body\n" \
86  " --model=MODEL path to custom model to load (caffemodel, uff, or onnx)\n" \
87  " --prototxt=PROTOTXT path to custom prototxt to load (for .caffemodel only)\n" \
88  " --labels=LABELS path to text file containing the labels for each class\n" \
89  " --input-blob=INPUT name of the input layer (default is '" POSENET_DEFAULT_INPUT "')\n" \
90  " --output-cvg=COVERAGE name of the coverge output layer (default is '" POSENET_DEFAULT_CMAP "')\n" \
91  " --output-bbox=BOXES name of the bounding output layer (default is '" POSENET_DEFAULT_PAF "')\n" \
92  " --mean-pixel=PIXEL mean pixel value to subtract from input (default is 0.0)\n" \
93  " --threshold=THRESHOLD minimum threshold for detection (default is 0.5)\n" \
94  " --overlay=OVERLAY detection overlay flags (e.g. --overlay=links,keypoints)\n" \
95  " valid combinations are: 'box', 'links', 'keypoints', 'none'\n" \
96  " --keypoint-scale=X radius scale for keypoints, relative to image (default: 0.0052)\n" \
97  " --link-scale=X line width scale for links, relative to image (default: 0.0013)\n" \
98  " --profile enable layer profiling in TensorRT\n\n"
99 
100 
105 class poseNet : public tensorNet
106 {
107 public:
112  struct ObjectPose
113  {
114  uint32_t ID;
116  float Left;
117  float Right;
118  float Top;
119  float Bottom;
124  struct Keypoint
125  {
126  uint32_t ID;
127  float x;
128  float y;
129  };
130 
131  std::vector<Keypoint> Keypoints;
132  std::vector<std::array<uint32_t, 2>> Links;
135  inline int FindKeypoint(uint32_t id) const;
136 
138  inline int FindLink(uint32_t a, uint32_t b) const;
139  };
140 
145  {
147  OVERLAY_BOX = (1 << 0),
148  OVERLAY_LINKS = (1 << 1),
149  OVERLAY_KEYPOINTS = (1 << 2),
151  };
152 
159  static uint32_t OverlayFlagsFromStr( const char* flags );
160 
167  static poseNet* Create( const char* network="resnet18-body", float threshold=POSENET_DEFAULT_THRESHOLD,
168  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
169  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
170 
182  static poseNet* Create( const char* model_path, const char* topology, const char* colors,
183  float threshold=POSENET_DEFAULT_THRESHOLD,
184  const char* input = POSENET_DEFAULT_INPUT,
185  const char* cmap = POSENET_DEFAULT_CMAP,
186  const char* paf = POSENET_DEFAULT_PAF,
187  uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
188  precisionType precision=TYPE_FASTEST,
189  deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
190 
194  static poseNet* Create( int argc, char** argv );
195 
199  static poseNet* Create( const commandLine& cmdLine );
200 
204  static inline const char* Usage() { return POSENET_USAGE_STRING; }
205 
209  virtual ~poseNet();
210 
220  template<typename T> bool Process( T* image, uint32_t width, uint32_t height, std::vector<ObjectPose>& poses, uint32_t overlay=OVERLAY_DEFAULT ) { return Process((void*)image, width, height, imageFormatFromType<T>(), poses, overlay); }
221 
231  bool Process( void* image, uint32_t width, uint32_t height, imageFormat format, std::vector<ObjectPose>& poses, uint32_t overlay=OVERLAY_DEFAULT );
232 
241  template<typename T> bool Process( T* image, uint32_t width, uint32_t height, uint32_t overlay=OVERLAY_DEFAULT ) { return Process((void*)image, width, height, imageFormatFromType<T>(), overlay); }
242 
251  bool Process( void* image, uint32_t width, uint32_t height, imageFormat format, uint32_t overlay=OVERLAY_DEFAULT );
252 
256  template<typename T> bool Overlay( T* input, T* output, uint32_t width, uint32_t height, const std::vector<ObjectPose>& poses, uint32_t overlay=OVERLAY_DEFAULT ) { return Overlay((void*)input, (void*)output, width, height, imageFormatFromType<T>(), overlay); }
257 
261  bool Overlay( void* input, void* output, uint32_t width, uint32_t height, imageFormat format, const std::vector<ObjectPose>& poses, uint32_t overlay=OVERLAY_DEFAULT );
262 
266  inline float GetThreshold() const { return mThreshold; }
267 
271  inline void SetThreshold( float threshold ) { mThreshold = threshold; }
272 
276  inline const char* GetCategory() const { return mTopology.category.c_str(); }
277 
281  inline uint32_t GetNumKeypoints() const { return mTopology.keypoints.size(); }
282 
286  inline const char* GetKeypointName( uint32_t index ) const { return mTopology.keypoints[index].c_str(); }
287 
291  inline int FindKeypointID( const char* name ) const;
292 
296  inline float4 GetKeypointColor( uint32_t index ) const { return mKeypointColors[index]; }
297 
301  inline void SetKeypointColor( uint32_t index, const float4& color ) { mKeypointColors[index] = color; }
302 
306  inline void SetKeypointAlpha( uint32_t index, float alpha ) { mKeypointColors[index].w = alpha; }
307 
311  inline void SetKeypointAlpha( float alpha );
312 
316  inline float GetKeypointScale() const { return mKeypointScale; }
317 
322  inline void SetKeypointScale( float scale ) { mKeypointScale = scale; }
323 
327  inline float GetLinkScale() const { return mLinkScale; }
328 
333  inline void SetLinkScale( float scale ) { mLinkScale = scale; }
334 
335 protected:
336 
337  static const int CMAP_WINDOW_SIZE=5;
338  static const int PAF_INTEGRAL_SAMPLES=7;
339  static const int MAX_LINKS=100;
340  static const int MAX_OBJECTS=100;
341 
342  struct Topology
343  {
344  std::string category;
345  std::vector<std::string> keypoints;
346  int links[MAX_LINKS * 4];
347  int numLinks;
348  };
349 
350  // constructor
351  poseNet();
352 
353  bool init( const char* model_path, const char* topology, const char* colors, float threshold,
354  const char* input, const char* cmap, const char* paf, uint32_t maxBatchSize,
355  precisionType precision, deviceType device, bool allowGPUFallback );
356 
357  bool postProcess(std::vector<ObjectPose>& poses, uint32_t width, uint32_t height);
358 
359  bool loadTopology( const char* json_path, Topology* topology );
360  bool loadKeypointColors( const char* filename );
361 
363 
364  float mThreshold;
365  float mLinkScale;
367 
369 
370  // post-processing buffers
371  int* mPeaks;
374  int* mObjects;
376 
378  float* mScoreGraph;
379 
382 };
383 
384 
385 // FindKeypointID
386 inline int poseNet::FindKeypointID( const char* name ) const
387 {
388  if( !name )
389  return -1;
390 
391  const uint32_t numKeypoints = GetNumKeypoints();
392 
393  for( uint32_t n=0; n < numKeypoints; n++ )
394  {
395  if( strcasecmp(GetKeypointName(n), name) == 0 )
396  return n;
397  }
398 
399  return -1;
400 }
401 
402 // FindKeypoint
403 inline int poseNet::ObjectPose::FindKeypoint( uint32_t id ) const
404 {
405  const uint32_t numKeypoints = Keypoints.size();
406 
407  for( uint32_t n=0; n < numKeypoints; n++ )
408  {
409  if( id == Keypoints[n].ID )
410  return n;
411  }
412 
413  return -1;
414 }
415 
416 // FindLink
417 inline int poseNet::ObjectPose::FindLink( uint32_t a, uint32_t b ) const
418 {
419  const uint32_t numLinks = Links.size();
420 
421  for( uint32_t n=0; n < numLinks; n++ )
422  {
423  if( a == Keypoints[Links[n][0]].ID && b == Keypoints[Links[n][1]].ID )
424  return n;
425  }
426 
427  return -1;
428 }
429 
430 // SetKeypointAlpha
431 inline void poseNet::SetKeypointAlpha( float alpha )
432 {
433  const uint32_t numKeypoints = GetNumKeypoints();
434 
435  for( uint32_t n=0; n < numKeypoints; n++ )
436  mKeypointColors[n].w = alpha;
437 }
438 
439 
440 #endif
poseNet::SetLinkScale
void SetLinkScale(float scale)
Set the scale used to calculate the width of link lines.
Definition: poseNet.h:333
poseNet::mObjects
int * mObjects
Definition: poseNet.h:374
poseNet::Overlay
bool Overlay(T *input, T *output, uint32_t width, uint32_t height, const std::vector< ObjectPose > &poses, uint32_t overlay=OVERLAY_DEFAULT)
Overlay the results on the image.
Definition: poseNet.h:256
poseNet::init
bool init(const char *model_path, const char *topology, const char *colors, float threshold, const char *input, const char *cmap, const char *paf, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback)
poseNet::ObjectPose::Bottom
float Bottom
Bounding box bottom, as determined by the bottom-most keypoint in the pose.
Definition: poseNet.h:119
poseNet::GetKeypointName
const char * GetKeypointName(uint32_t index) const
Get the name of a keypoint in the topology by it's ID.
Definition: poseNet.h:286
poseNet::Topology::category
std::string category
Definition: poseNet.h:344
poseNet::PAF_INTEGRAL_SAMPLES
static const int PAF_INTEGRAL_SAMPLES
Definition: poseNet.h:338
poseNet::OVERLAY_KEYPOINTS
@ OVERLAY_KEYPOINTS
Overlay the keypoints (joints) as circles.
Definition: poseNet.h:149
color
uchar3 color
The RGB color of the point.
Definition: cudaPointCloud.h:11
poseNet::mAssignmentWorkspace
void * mAssignmentWorkspace
Definition: poseNet.h:380
poseNet::mPeakCounts
int * mPeakCounts
Definition: poseNet.h:372
POSENET_USAGE_STRING
#define POSENET_USAGE_STRING
Standard command-line options able to be passed to poseNet::Create()
Definition: poseNet.h:81
poseNet::~poseNet
virtual ~poseNet()
Destory.
poseNet::mScoreGraph
float * mScoreGraph
Definition: poseNet.h:378
poseNet::OverlayFlagsFromStr
static uint32_t OverlayFlagsFromStr(const char *flags)
Parse a string sequence into OverlayFlags enum.
poseNet::loadKeypointColors
bool loadKeypointColors(const char *filename)
poseNet::mRefinedPeaks
float * mRefinedPeaks
Definition: poseNet.h:377
poseNet::GetKeypointScale
float GetKeypointScale() const
Get the scale used to calculate the radius of keypoints relative to input image dimensions.
Definition: poseNet.h:316
poseNet::ObjectPose::Top
float Top
Bounding box top, as determined by the top-most keypoint in the pose.
Definition: poseNet.h:118
poseNet::mThreshold
float mThreshold
Definition: poseNet.h:364
DEVICE_GPU
@ DEVICE_GPU
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice()
Definition: tensorNet.h:131
poseNet::postProcess
bool postProcess(std::vector< ObjectPose > &poses, uint32_t width, uint32_t height)
poseNet::SetKeypointScale
void SetKeypointScale(float scale)
Set the scale used to calculate the radius of keypoint circles.
Definition: poseNet.h:322
POSENET_DEFAULT_PAF
#define POSENET_DEFAULT_PAF
Name of default output blob of the Part Affinity Field (PAF) for pose estimation ONNX model.
Definition: poseNet.h:49
poseNet::Topology::keypoints
std::vector< std::string > keypoints
Definition: poseNet.h:345
deviceType
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:129
poseNet::ObjectPose::FindKeypoint
int FindKeypoint(uint32_t id) const
Find a link index by two keypoint ID's, or return -1 if not found.
Definition: poseNet.h:403
poseNet::GetNumKeypoints
uint32_t GetNumKeypoints() const
Get the number of keypoints in the topology.
Definition: poseNet.h:281
POSENET_DEFAULT_INPUT
#define POSENET_DEFAULT_INPUT
Name of default input blob for pose estimation ONNX model.
Definition: poseNet.h:37
poseNet::mLinkScale
float mLinkScale
Definition: poseNet.h:365
poseNet::OverlayFlags
OverlayFlags
Overlay flags (can be OR'd together).
Definition: poseNet.h:144
poseNet::GetLinkScale
float GetLinkScale() const
Get the scale used to calculate the width of link lines relative to input image dimensions.
Definition: poseNet.h:327
tensorNet.h
poseNet::mConnections
int * mConnections
Definition: poseNet.h:373
TYPE_FASTEST
@ TYPE_FASTEST
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:105
poseNet::CMAP_WINDOW_SIZE
static const int CMAP_WINDOW_SIZE
Definition: poseNet.h:337
poseNet::OVERLAY_DEFAULT
@ OVERLAY_DEFAULT
Definition: poseNet.h:150
poseNet::mConnectionWorkspace
void * mConnectionWorkspace
Definition: poseNet.h:381
poseNet::mTopology
Topology mTopology
Definition: poseNet.h:362
poseNet::Usage
static const char * Usage()
Usage string for command line arguments to Create()
Definition: poseNet.h:204
poseNet::Topology::numLinks
int numLinks
Definition: poseNet.h:347
poseNet::ObjectPose::Keypoint
A keypoint or joint in the topology.
Definition: poseNet.h:124
poseNet::Create
static poseNet * Create(const char *network="resnet18-body", float threshold=POSENET_DEFAULT_THRESHOLD, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true)
Load a pre-trained model.
precisionType
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:102
poseNet::Process
bool Process(T *image, uint32_t width, uint32_t height, uint32_t overlay=OVERLAY_DEFAULT)
Perform pose estimation on the given image, and overlay the results.
Definition: poseNet.h:241
poseNet::SetKeypointColor
void SetKeypointColor(uint32_t index, const float4 &color)
Set the overlay color for a keypoint.
Definition: poseNet.h:301
poseNet::mPeaks
int * mPeaks
Definition: poseNet.h:371
poseNet::GetThreshold
float GetThreshold() const
Retrieve the minimum confidence threshold.
Definition: poseNet.h:266
poseNet::OVERLAY_NONE
@ OVERLAY_NONE
No overlay.
Definition: poseNet.h:146
poseNet::GetKeypointColor
float4 GetKeypointColor(uint32_t index) const
Get the overlay color of a keypoint.
Definition: poseNet.h:296
poseNet::ObjectPose::Right
float Right
Bounding box right, as determined by the right-most keypoint in the pose.
Definition: poseNet.h:117
poseNet::SetKeypointAlpha
void SetKeypointAlpha(uint32_t index, float alpha)
Set the alpha channel for a keypoint color (between 0-255).
Definition: poseNet.h:306
tensorNet
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:218
poseNet::ObjectPose::FindLink
int FindLink(uint32_t a, uint32_t b) const
Definition: poseNet.h:417
poseNet::MAX_OBJECTS
static const int MAX_OBJECTS
Definition: poseNet.h:340
poseNet::GetCategory
const char * GetCategory() const
Get the category of objects that are detected (e.g.
Definition: poseNet.h:276
poseNet::ObjectPose::Left
float Left
Bounding box left, as determined by the left-most keypoint in the pose.
Definition: poseNet.h:116
POSENET_DEFAULT_CMAP
#define POSENET_DEFAULT_CMAP
Name of default output blob of the confidence map for pose estimation ONNX model.
Definition: poseNet.h:43
poseNet::loadTopology
bool loadTopology(const char *json_path, Topology *topology)
poseNet::SetThreshold
void SetThreshold(float threshold)
Set the minimum confidence threshold.
Definition: poseNet.h:271
poseNet::ObjectPose::Keypoint::x
float x
The x coordinate of the keypoint.
Definition: poseNet.h:127
poseNet::Process
bool Process(T *image, uint32_t width, uint32_t height, std::vector< ObjectPose > &poses, uint32_t overlay=OVERLAY_DEFAULT)
Perform pose estimation on the given image, returning object poses, and overlay the results.
Definition: poseNet.h:220
poseNet::ObjectPose::Links
std::vector< std::array< uint32_t, 2 > > Links
List of links in the object.
Definition: poseNet.h:132
POSENET_DEFAULT_THRESHOLD
#define POSENET_DEFAULT_THRESHOLD
Default value of the minimum confidence threshold.
Definition: poseNet.h:55
poseNet::ObjectPose::ID
uint32_t ID
Object ID in the image frame, starting with 0.
Definition: poseNet.h:114
DEFAULT_MAX_BATCH_SIZE
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:88
poseNet::mNumObjects
int mNumObjects
Definition: poseNet.h:375
poseNet::OVERLAY_LINKS
@ OVERLAY_LINKS
Overlay the skeleton links (bones) as lines
Definition: poseNet.h:148
poseNet::ObjectPose::Keypoint::ID
uint32_t ID
Type ID of the keypoint - the name can be retrieved with poseNet::GetKeypointName()
Definition: poseNet.h:126
poseNet::FindKeypointID
int FindKeypointID(const char *name) const
Find the ID of a keypoint by name, or return -1 if not found.
Definition: poseNet.h:386
poseNet::poseNet
poseNet()
poseNet::MAX_LINKS
static const int MAX_LINKS
Definition: poseNet.h:339
poseNet::mKeypointScale
float mKeypointScale
Definition: poseNet.h:366
commandLine
Command line parser for extracting flags, values, and strings.
Definition: commandLine.h:35
poseNet::mKeypointColors
float4 * mKeypointColors
Definition: poseNet.h:368
poseNet::ObjectPose::Keypoint::y
float y
The y coordinate of the keypoint.
Definition: poseNet.h:128
poseNet::Topology
Definition: poseNet.h:342
poseNet::Topology::links
int links[MAX_LINKS *4]
Definition: poseNet.h:346
poseNet::OVERLAY_BOX
@ OVERLAY_BOX
Overlay object bounding boxes.
Definition: poseNet.h:147
poseNet::ObjectPose
The pose of an object, composed of links between keypoints.
Definition: poseNet.h:112
imageFormat
imageFormat
The imageFormat enum is used to identify the pixel format and colorspace of an image.
Definition: imageFormat.h:49
poseNet
Pose estimation models with TensorRT support.
Definition: poseNet.h:105
alpha
__device__ cudaVectorTypeInfo< T >::Base alpha(T vec, typename cudaVectorTypeInfo< T >::Base default_alpha=255)
Definition: cudaVector.h:98
poseNet::ObjectPose::Keypoints
std::vector< Keypoint > Keypoints
List of keypoints in the object, which contain the keypoint ID and x/y coordinates.
Definition: poseNet.h:131