docs/html/segNet_8h_source.html

/*

 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.

 *

 * Permission is hereby granted, free of charge, to any person obtaining a

 * copy of this software and associated documentation files (the "Software"),

 * to deal in the Software without restriction, including without limitation

 * the rights to use, copy, modify, merge, publish, distribute, sublicense,

 * and/or sell copies of the Software, and to permit persons to whom the

 * Software is furnished to do so, subject to the following conditions:

 *

 * The above copyright notice and this permission notice shall be included in

 * all copies or substantial portions of the Software.

 *

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL

 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

 * DEALINGS IN THE SOFTWARE.

 */


#ifndef __SEGMENTATION_NET_H__

#define __SEGMENTATION_NET_H__


#include "tensorNet.h"


#define SEGNET_DEFAULT_INPUT   "input_0"


#define SEGNET_DEFAULT_OUTPUT  "output_0"


#define SEGNET_DEFAULT_ALPHA 150


#define SEGNET_MODEL_TYPE "segmentation"


#define SEGNET_USAGE_STRING  "segNet arguments: \n"                                                     \

                  "  --network=NETWORK    pre-trained model to load, one of the following:\n"   \

                  "                           * fcn-resnet18-cityscapes-512x256\n"                      \

                  "                           * fcn-resnet18-cityscapes-1024x512\n"                     \

                  "                           * fcn-resnet18-cityscapes-2048x1024\n"                    \

                  "                           * fcn-resnet18-deepscene-576x320\n"                       \

                  "                           * fcn-resnet18-deepscene-864x480\n"                       \

                  "                           * fcn-resnet18-mhp-512x320\n"                                     \

                  "                           * fcn-resnet18-mhp-640x360\n"                                     \

                  "                           * fcn-resnet18-voc-320x320 (default)\n"                   \

                  "                           * fcn-resnet18-voc-512x320\n"                                     \

                  "                           * fcn-resnet18-sun-512x400\n"                                     \

                  "                           * fcn-resnet18-sun-640x512\n"                     \

                  "  --model=MODEL        path to custom model to load (caffemodel, uff, or onnx)\n"                    \

                  "  --prototxt=PROTOTXT  path to custom prototxt to load (for .caffemodel only)\n"                             \

                  "  --labels=LABELS      path to text file containing the labels for each class\n"                             \

                  "  --colors=COLORS      path to text file containing the colors for each class\n"                             \

                  "  --input-blob=INPUT   name of the input layer (default: '" SEGNET_DEFAULT_INPUT "')\n"              \

                  "  --output-blob=OUTPUT name of the output layer (default: '" SEGNET_DEFAULT_OUTPUT "')\n"            \

            "  --alpha=ALPHA        overlay alpha blending value, range 0-255 (default: 150)\n"                 \

                  "  --visualize=VISUAL   visualization flags (e.g. --visualize=overlay,mask)\n"                                \

                  "                       valid combinations are:  'overlay', 'mask'\n"                                         \

                  "  --profile            enable layer profiling in TensorRT\n\n"


class segNet : public tensorNet

{

public:

        enum FilterMode

        {

                FILTER_POINT = 0,

                FILTER_LINEAR

        };


        enum VisualizationFlags

        {

                VISUALIZE_OVERLAY = (1 << 0),

                VISUALIZE_MASK    = (1 << 1),

        };


        static uint32_t VisualizationFlagsFromStr( const char* str, uint32_t default_value=VISUALIZE_OVERLAY );


        static FilterMode FilterModeFromStr( const char* str, FilterMode default_value=FILTER_LINEAR );


        static segNet* Create( const char* network="fcn-resnet18-voc", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,

                                           precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true );


        static segNet* Create( const char* prototxt_path, const char* model_path,

                                           const char* class_labels, const char* class_colors=NULL,

                                           const char* input = SEGNET_DEFAULT_INPUT,

                                           const char* output = SEGNET_DEFAULT_OUTPUT,

                                           uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,

                                           precisionType precision=TYPE_FASTEST,

                                           deviceType device=DEVICE_GPU, bool allowGPUFallback=true );


        static segNet* Create( int argc, char** argv );


        static segNet* Create( const commandLine& cmdLine );


        static inline const char* Usage()               { return SEGNET_USAGE_STRING; }


        virtual ~segNet();


        template<typename T> bool Process( T* input, uint32_t width, uint32_t height, const char* ignore_class="void" )         { return Process((void*)input, width, height, imageFormatFromType<T>(), ignore_class); }


        bool Process( void* input, uint32_t width, uint32_t height, imageFormat format, const char* ignore_class="void" );


        bool Process( float* input, uint32_t width, uint32_t height, const char* ignore_class="void" );


        template<typename T> bool Mask( T* output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR )                           { return Mask((void*)output, width, height, imageFormatFromType<T>(), filter); }


        bool Mask( void* output, uint32_t width, uint32_t height, imageFormat format, FilterMode filter=FILTER_LINEAR );


        bool Mask( float* output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR );


        bool Mask( uint8_t* output, uint32_t width, uint32_t height );


        template<typename T> bool Overlay( T* output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR )                        { return Overlay((void*)output, width, height, imageFormatFromType<T>(), filter); }


        bool Overlay( void* output, uint32_t width, uint32_t height, imageFormat format, FilterMode filter=FILTER_LINEAR );


        bool Overlay( float* output, uint32_t width, uint32_t height, FilterMode filter=FILTER_LINEAR );


        int FindClassID( const char* label_name );


        inline uint32_t GetNumClasses() const                                           { return DIMS_C(mOutputs[0].dims); }


        inline const char* GetClassLabel( uint32_t id ) const                   { return GetClassDesc(id); }


        inline const char* GetClassDesc( uint32_t id ) const                    { return id < mClassLabels.size() ? mClassLabels[id].c_str() : NULL; }


        inline float4 GetClassColor( uint32_t id ) const                                { return mClassColors[id]; }


        void SetClassColor( uint32_t classIndex, const float4& color );


        void SetClassColor( uint32_t classIndex, float r, float g, float b, float a=255.0f );


        float GetOverlayAlpha() const;


        void SetOverlayAlpha( float alpha, bool explicit_exempt=true );


        inline const char* GetClassPath() const                                         { return mClassPath.c_str(); }


        inline uint32_t GetGridWidth() const                                            { return DIMS_W(mOutputs[0].dims); }


        inline uint32_t GetGridHeight() const                                           { return DIMS_H(mOutputs[0].dims); }


protected:

        segNet();


        bool classify( const char* ignore_class );


        bool overlayPoint( void* input, uint32_t in_width, uint32_t in_height, imageFormat in_format, void* output, uint32_t out_width, uint32_t out_height, imageFormat out_format, bool mask_only );

        bool overlayLinear( void* input, uint32_t in_width, uint32_t in_height, imageFormat in_format, void* output, uint32_t out_width, uint32_t out_height, imageFormat out_format, bool mask_only );


        bool loadClassColors( const char* filename );

        bool loadClassLabels( const char* filename );

        bool saveClassLegend( const char* filename );


        std::vector<std::string> mClassLabels;

        std::string mClassPath;


        bool*    mColorsAlphaSet;

        float4*  mClassColors;

        uint8_t* mClassMap;

        void*     mLastInputImg;

        uint32_t          mLastInputWidth;

        uint32_t          mLastInputHeight;

        imageFormat mLastInputFormat;

};


#endif