23 #ifndef __CUDA_HELPER_MATH_H_ 
   24 #define __CUDA_HELPER_MATH_H_ 
   26 #include "cuda_runtime.h" 
   51 inline float fminf(
float a, 
float b)
 
   56 inline float fmaxf(
float a, 
float b)
 
   61 inline int max(
int a, 
int b)
 
   66 inline int min(
int a, 
int b)
 
   73     return 1.0f / sqrtf(x);
 
  150     return make_float3(
float(a.x), 
float(a.y), 
float(a.z));
 
  154     return make_float3(
float(a.x), 
float(a.y), 
float(a.z));
 
  158     return make_float3(
float(a.x), 
float(a.y), 
float(a.z));
 
  162     return make_float3(
float(a.x), 
float(a.y), 
float(a.z));
 
  173 inline __host__ __device__ int3 
make_int3(int2 a, 
int s)
 
  179     return make_int3(
int(a.x), 
int(a.y), 
int(a.z));
 
  183     return make_int3(
int(a.x), 
int(a.y), 
int(a.z));
 
  266     return make_float4(
float(a.x), 
float(a.y), 
float(a.z), 
float(a.w));
 
  270     return make_float4(
float(a.x), 
float(a.y), 
float(a.z), 
float(a.w));
 
  274     return make_float4(
float(a.x), 
float(a.y), 
float(a.z), 0.0f);
 
  278     return make_float4(
float(a.x), 
float(a.y), 
float(a.z), 
float(a.w));
 
  289 inline __host__ __device__ int4 
make_int4(int3 a, 
int w)
 
  295     return make_int4(
int(a.x), 
int(a.y), 
int(a.z), 
int(a.w));
 
  299     return make_int4(
int(a.x), 
int(a.y), 
int(a.z), 
int(a.w));
 
  383     return make_int4(-a.x, -a.y, -a.z, -a.w);
 
  390 inline __host__ __device__ float2 
operator+(float2 a, float2 b)
 
  394 inline __host__ __device__ 
void operator+=(float2 &a, float2 b)
 
  399 inline __host__ __device__ float2 
operator+(float2 a, 
float b)
 
  403 inline __host__ __device__ float2 
operator+(
float b, float2 a)
 
  407 inline __host__ __device__ 
void operator+=(float2 &a, 
float b)
 
  413 inline __host__ __device__ int2 
operator+(int2 a, int2 b)
 
  422 inline __host__ __device__ int2 
operator+(int2 a, 
int b)
 
  426 inline __host__ __device__ int2 
operator+(
int b, int2 a)
 
  436 inline __host__ __device__ uint2 
operator+(uint2 a, uint2 b)
 
  440 inline __host__ __device__ 
void operator+=(uint2 &a, uint2 b)
 
  460 inline __host__ __device__ float3 
operator+(float3 a, float3 b)
 
  462     return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
 
  464 inline __host__ __device__ 
void operator+=(float3 &a, float3 b)
 
  470 inline __host__ __device__ float3 
operator+(float3 a, 
float b)
 
  474 inline __host__ __device__ 
void operator+=(float3 &a, 
float b)
 
  481 inline __host__ __device__ int3 
operator+(int3 a, int3 b)
 
  483     return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
 
  491 inline __host__ __device__ int3 
operator+(int3 a, 
int b)
 
  493     return make_int3(a.x + b, a.y + b, a.z + b);
 
  502 inline __host__ __device__ uint3 
operator+(uint3 a, uint3 b)
 
  504     return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
 
  506 inline __host__ __device__ 
void operator+=(uint3 &a, uint3 b)
 
  523 inline __host__ __device__ uchar3 
operator+(uchar3 a, uchar3 b)
 
  525     return make_uchar3(a.x + b.x, a.y + b.y, a.z + b.z);
 
  527 inline __host__ __device__ 
void operator+=(uchar3 &a, uchar3 b)
 
  544 inline __host__ __device__ int3 
operator+(
int b, int3 a)
 
  546     return make_int3(a.x + b, a.y + b, a.z + b);
 
  552 inline __host__ __device__ float3 
operator+(
float b, float3 a)
 
  557 inline __host__ __device__ float4 
operator+(float4 a, float4 b)
 
  559     return make_float4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
 
  561 inline __host__ __device__ 
void operator+=(float4 &a, float4 b)
 
  568 inline __host__ __device__ float4 
operator+(float4 a, 
float b)
 
  570     return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
 
  572 inline __host__ __device__ float4 
operator+(
float b, float4 a)
 
  574     return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
 
  576 inline __host__ __device__ 
void operator+=(float4 &a, 
float b)
 
  584 inline __host__ __device__ int4 
operator+(int4 a, int4 b)
 
  586     return make_int4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
 
  595 inline __host__ __device__ int4 
operator+(int4 a, 
int b)
 
  597     return make_int4(a.x + b, a.y + b, a.z + b,  a.w + b);
 
  599 inline __host__ __device__ int4 
operator+(
int b, int4 a)
 
  601     return make_int4(a.x + b, a.y + b, a.z + b,  a.w + b);
 
  611 inline __host__ __device__ uint4 
operator+(uint4 a, uint4 b)
 
  613     return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
 
  615 inline __host__ __device__ 
void operator+=(uint4 &a, uint4 b)
 
  624     return make_uint4(a.x + b, a.y + b, a.z + b,  a.w + b);
 
  628     return make_uint4(a.x + b, a.y + b, a.z + b,  a.w + b);
 
  638 inline __host__ __device__ uchar4 
operator+(uchar4 a, uchar4 b)
 
  640     return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
 
  642 inline __host__ __device__ 
void operator+=(uchar4 &a, uchar4 b)
 
  651     return make_uchar4(a.x + b, a.y + b, a.z + b,  a.w + b);
 
  655     return make_uchar4(a.x + b, a.y + b, a.z + b,  a.w + b);
 
  669 inline __host__ __device__ float2 
operator-(float2 a, float2 b)
 
  673 inline __host__ __device__ 
void operator-=(float2 &a, float2 b)
 
  678 inline __host__ __device__ float2 
operator-(float2 a, 
float b)
 
  682 inline __host__ __device__ float2 
operator-(
float b, float2 a)
 
  686 inline __host__ __device__ 
void operator-=(float2 &a, 
float b)
 
  692 inline __host__ __device__ int2 
operator-(int2 a, int2 b)
 
  701 inline __host__ __device__ int2 
operator-(int2 a, 
int b)
 
  705 inline __host__ __device__ int2 
operator-(
int b, int2 a)
 
  715 inline __host__ __device__ uint2 
operator-(uint2 a, uint2 b)
 
  719 inline __host__ __device__ 
void operator-=(uint2 &a, uint2 b)
 
  738 inline __host__ __device__ float3 
operator-(float3 a, float3 b)
 
  740     return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
 
  742 inline __host__ __device__ 
void operator-=(float3 &a, float3 b)
 
  748 inline __host__ __device__ float3 
operator-(float3 a, 
float b)
 
  752 inline __host__ __device__ float3 
operator-(
float b, float3 a)
 
  756 inline __host__ __device__ 
void operator-=(float3 &a, 
float b)
 
  763 inline __host__ __device__ int3 
operator-(int3 a, int3 b)
 
  765     return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
 
  773 inline __host__ __device__ int3 
operator-(int3 a, 
int b)
 
  775     return make_int3(a.x - b, a.y - b, a.z - b);
 
  777 inline __host__ __device__ int3 
operator-(
int b, int3 a)
 
  779     return make_int3(b - a.x, b - a.y, b - a.z);
 
  788 inline __host__ __device__ uint3 
operator-(uint3 a, uint3 b)
 
  790     return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
 
  792 inline __host__ __device__ 
void operator-=(uint3 &a, uint3 b)
 
  813 inline __host__ __device__ float4 
operator-(float4 a, float4 b)
 
  815     return make_float4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
 
  817 inline __host__ __device__ 
void operator-=(float4 &a, float4 b)
 
  824 inline __host__ __device__ float4 
operator-(float4 a, 
float b)
 
  826     return make_float4(a.x - b, a.y - b, a.z - b,  a.w - b);
 
  828 inline __host__ __device__ 
void operator-=(float4 &a, 
float b)
 
  836 inline __host__ __device__ int4 
operator-(int4 a, int4 b)
 
  838     return make_int4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
 
  847 inline __host__ __device__ int4 
operator-(int4 a, 
int b)
 
  849     return make_int4(a.x - b, a.y - b, a.z - b,  a.w - b);
 
  851 inline __host__ __device__ int4 
operator-(
int b, int4 a)
 
  853     return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
 
  863 inline __host__ __device__ uint4 
operator-(uint4 a, uint4 b)
 
  865     return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
 
  867 inline __host__ __device__ 
void operator-=(uint4 &a, uint4 b)
 
  876     return make_uint4(a.x - b, a.y - b, a.z - b,  a.w - b);
 
  880     return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
 
  894 inline __host__ __device__ float2 
operator*(float2 a, float2 b)
 
  898 inline __host__ __device__ 
void operator*=(float2 &a, float2 b)
 
  903 inline __host__ __device__ float2 
operator*(float2 a, 
float b)
 
  907 inline __host__ __device__ float2 
operator*(
float b, float2 a)
 
  911 inline __host__ __device__ 
void operator*=(float2 &a, 
float b)
 
  917 inline __host__ __device__ int2 
operator*(int2 a, int2 b)
 
  926 inline __host__ __device__ int2 
operator*(int2 a, 
int b)
 
  930 inline __host__ __device__ int2 
operator*(
int b, int2 a)
 
  940 inline __host__ __device__ uint2 
operator*(uint2 a, uint2 b)
 
  944 inline __host__ __device__ 
void operator*=(uint2 &a, uint2 b)
 
  963 inline __host__ __device__ uchar2 
operator*(uchar2 a, uchar2 b)
 
  965     return make_uchar2(a.x * b.x, a.y * b.y);
 
  967 inline __host__ __device__ 
void operator*=(uchar2 &a, uchar2 b)
 
  974     return make_uchar2(a.x * b, a.y * b);
 
  978     return make_uchar2(b * a.x, b * a.y);
 
  980 inline __host__ __device__ uchar2 
operator*(uchar2 a, 
float b)
 
  982     return make_uchar2(a.x * b, a.y * b);
 
  984 inline __host__ __device__ uchar2 
operator*(
float b, uchar2 a)
 
  986     return make_uchar2(b * a.x, b * a.y);
 
  993 inline __host__ __device__ 
void operator*=(uchar2 &a, 
float b)
 
  999 inline __host__ __device__ float3 
operator*(float3 a, float3 b)
 
 1001     return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
 
 1009 inline __host__ __device__ float3 
operator*(float3 a, 
float b)
 
 1013 inline __host__ __device__ float3 
operator*(
float b, float3 a)
 
 1026     return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
 
 1036     return make_int3(a.x * b, a.y * b, a.z * b);
 
 1040     return make_int3(b * a.x, b * a.y, b * a.z);
 
 1049 inline __host__ __device__ uint3 
operator*(uint3 a, uint3 b)
 
 1051     return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
 
 1061     return make_uint3(a.x * b, a.y * b, a.z * b);
 
 1065     return make_uint3(b * a.x, b * a.y, b * a.z);
 
 1074 inline __host__ __device__ uchar3 
operator*(uchar3 a, uchar3 b)
 
 1076     return make_uchar3(a.x * b.x, a.y * b.y, a.z * b.z);
 
 1092 inline __host__ __device__ uchar3 
operator*(uchar3 a, 
float b)
 
 1096 inline __host__ __device__ uchar3 
operator*(
float b, uchar3 a)
 
 1113 inline __host__ __device__ float4 
operator*(float4 a, float4 b)
 
 1115     return make_float4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
 
 1124 inline __host__ __device__ float4 
operator*(float4 a, 
float b)
 
 1126     return make_float4(a.x * b, a.y * b, a.z * b,  a.w * b);
 
 1128 inline __host__ __device__ float4 
operator*(
float b, float4 a)
 
 1130     return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
 
 1142     return make_int4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
 
 1153     return make_int4(a.x * b, a.y * b, a.z * b,  a.w * b);
 
 1157     return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
 
 1167 inline __host__ __device__ uint4 
operator*(uint4 a, uint4 b)
 
 1169     return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
 
 1180     return make_uint4(a.x * b, a.y * b, a.z * b,  a.w * b);
 
 1184     return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
 
 1194 inline __host__ __device__ uchar4 
operator*(uchar4 a, uchar4 b)
 
 1196     return make_uchar4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
 
 1207     return make_uchar4(a.x * b, a.y * b, a.z * b,  a.w * b);
 
 1211     return make_uchar4(b * a.x, b * a.y, b * a.z, b * a.w);
 
 1213 inline __host__ __device__ uchar4 
operator*(uchar4 a, 
float b)
 
 1215     return make_uchar4(a.x * b, a.y * b, a.z * b,  a.w * b);
 
 1217 inline __host__ __device__ uchar4 
operator*(
float b, uchar4 a)
 
 1219     return make_uchar4(b * a.x, b * a.y, b * a.z, b * a.w);
 
 1240 inline __host__ __device__ float2 
operator/(float2 a, float2 b)
 
 1249 inline __host__ __device__ float2 
operator/(float2 a, 
float b)
 
 1258 inline __host__ __device__ float2 
operator/(
float b, float2 a)
 
 1263 inline __host__ __device__ float3 
operator/(float3 a, float3 b)
 
 1265     return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
 
 1273 inline __host__ __device__ float3 
operator/(float3 a, 
float b)
 
 1283 inline __host__ __device__ float3 
operator/(
float b, float3 a)
 
 1288 inline __host__ __device__ float4 
operator/(float4 a, float4 b)
 
 1290     return make_float4(a.x / b.x, a.y / b.y, a.z / b.z,  a.w / b.w);
 
 1299 inline __host__ __device__ float4 
operator/(float4 a, 
float b)
 
 1301     return make_float4(a.x / b, a.y / b, a.z / b,  a.w / b);
 
 1310 inline __host__ __device__ float4 
operator/(
float b, float4 a)
 
 1312     return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
 
 1319 inline  __host__ __device__ float2 
fminf(float2 a, float2 b)
 
 1323 inline __host__ __device__ float3 
fminf(float3 a, float3 b)
 
 1327 inline  __host__ __device__ float4 
fminf(float4 a, float4 b)
 
 1332 inline __host__ __device__ int2 
min(int2 a, int2 b)
 
 1336 inline __host__ __device__ int3 
min(int3 a, int3 b)
 
 1340 inline __host__ __device__ int4 
min(int4 a, int4 b)
 
 1345 inline __host__ __device__ uint2 
min(uint2 a, uint2 b)
 
 1349 inline __host__ __device__ uint3 
min(uint3 a, uint3 b)
 
 1353 inline __host__ __device__ uint4 
min(uint4 a, uint4 b)
 
 1362 inline __host__ __device__ float2 
fmaxf(float2 a, float2 b)
 
 1366 inline __host__ __device__ float3 
fmaxf(float3 a, float3 b)
 
 1370 inline __host__ __device__ float4 
fmaxf(float4 a, float4 b)
 
 1375 inline __host__ __device__ int2 
max(int2 a, int2 b)
 
 1379 inline __host__ __device__ int3 
max(int3 a, int3 b)
 
 1383 inline __host__ __device__ int4 
max(int4 a, int4 b)
 
 1388 inline __host__ __device__ uint2 
max(uint2 a, uint2 b)
 
 1392 inline __host__ __device__ uint3 
max(uint3 a, uint3 b)
 
 1396 inline __host__ __device__ uint4 
max(uint4 a, uint4 b)
 
 1406 inline __device__ __host__ 
float lerp(
float a, 
float b, 
float t)
 
 1410 inline __device__ __host__ float2 
lerp(float2 a, float2 b, 
float t)
 
 1414 inline __device__ __host__ float3 
lerp(float3 a, float3 b, 
float t)
 
 1418 inline __device__ __host__ float4 
lerp(float4 a, float4 b, 
float t)
 
 1428 inline __device__ __host__ 
float clamp(
float f, 
float a, 
float b)
 
 1432 inline __device__ __host__ 
int clamp(
int f, 
int a, 
int b)
 
 1434     return max(a, 
min(f, b));
 
 1438     return max(a, 
min(f, b));
 
 1441 inline __device__ __host__ float2 
clamp(float2 v, 
float a, 
float b)
 
 1445 inline __device__ __host__ float2 
clamp(float2 v, float2 a, float2 b)
 
 1449 inline __device__ __host__ float3 
clamp(float3 v, 
float a, 
float b)
 
 1453 inline __device__ __host__ float3 
clamp(float3 v, float3 a, float3 b)
 
 1457 inline __device__ __host__ float4 
clamp(float4 v, 
float a, 
float b)
 
 1459     return make_float4(
clamp(v.x, a, b), 
clamp(v.y, a, b), 
clamp(v.z, a, b), 
clamp(v.w, a, b));
 
 1461 inline __device__ __host__ float4 
clamp(float4 v, float4 a, float4 b)
 
 1463     return make_float4(
clamp(v.x, a.x, b.x), 
clamp(v.y, a.y, b.y), 
clamp(v.z, a.z, b.z), 
clamp(v.w, a.w, b.w));
 
 1466 inline __device__ __host__ int2 
clamp(int2 v, 
int a, 
int b)
 
 1470 inline __device__ __host__ int2 
clamp(int2 v, int2 a, int2 b)
 
 1474 inline __device__ __host__ int3 
clamp(int3 v, 
int a, 
int b)
 
 1478 inline __device__ __host__ int3 
clamp(int3 v, int3 a, int3 b)
 
 1482 inline __device__ __host__ int4 
clamp(int4 v, 
int a, 
int b)
 
 1484     return make_int4(
clamp(v.x, a, b), 
clamp(v.y, a, b), 
clamp(v.z, a, b), 
clamp(v.w, a, b));
 
 1486 inline __device__ __host__ int4 
clamp(int4 v, int4 a, int4 b)
 
 1488     return make_int4(
clamp(v.x, a.x, b.x), 
clamp(v.y, a.y, b.y), 
clamp(v.z, a.z, b.z), 
clamp(v.w, a.w, b.w));
 
 1495 inline __device__ __host__ uint2 
clamp(uint2 v, uint2 a, uint2 b)
 
 1503 inline __device__ __host__ uint3 
clamp(uint3 v, uint3 a, uint3 b)
 
 1509     return make_uint4(
clamp(v.x, a, b), 
clamp(v.y, a, b), 
clamp(v.z, a, b), 
clamp(v.w, a, b));
 
 1511 inline __device__ __host__ uint4 
clamp(uint4 v, uint4 a, uint4 b)
 
 1513     return make_uint4(
clamp(v.x, a.x, b.x), 
clamp(v.y, a.y, b.y), 
clamp(v.z, a.z, b.z), 
clamp(v.w, a.w, b.w));
 
 1520 inline __host__ __device__ 
float dot(float2 a, float2 b)
 
 1522     return a.x * b.x + a.y * b.y;
 
 1524 inline __host__ __device__ 
float dot(float3 a, float3 b)
 
 1526     return a.x * b.x + a.y * b.y + a.z * b.z;
 
 1528 inline __host__ __device__ 
float dot(float4 a, float4 b)
 
 1530     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
 
 1533 inline __host__ __device__ 
int dot(int2 a, int2 b)
 
 1535     return a.x * b.x + a.y * b.y;
 
 1537 inline __host__ __device__ 
int dot(int3 a, int3 b)
 
 1539     return a.x * b.x + a.y * b.y + a.z * b.z;
 
 1541 inline __host__ __device__ 
int dot(int4 a, int4 b)
 
 1543     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
 
 1546 inline __host__ __device__ 
uint dot(uint2 a, uint2 b)
 
 1548     return a.x * b.x + a.y * b.y;
 
 1550 inline __host__ __device__ 
uint dot(uint3 a, uint3 b)
 
 1552     return a.x * b.x + a.y * b.y + a.z * b.z;
 
 1554 inline __host__ __device__ 
uint dot(uint4 a, uint4 b)
 
 1556     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
 
 1563 inline __host__ __device__ 
float length(float2 v)
 
 1565     return sqrtf(
dot(v, v));
 
 1567 inline __host__ __device__ 
float length(float3 v)
 
 1569     return sqrtf(
dot(v, v));
 
 1571 inline __host__ __device__ 
float length(float4 v)
 
 1573     return sqrtf(
dot(v, v));
 
 1600 inline __host__ __device__ float2 
floorf(float2 v)
 
 1604 inline __host__ __device__ float3 
floorf(float3 v)
 
 1608 inline __host__ __device__ float4 
floorf(float4 v)
 
 1617 inline __host__ __device__ 
float fracf(
float v)
 
 1621 inline __host__ __device__ float2 
fracf(float2 v)
 
 1625 inline __host__ __device__ float3 
fracf(float3 v)
 
 1629 inline __host__ __device__ float4 
fracf(float4 v)
 
 1638 inline __host__ __device__ float2 
fmodf(float2 a, float2 b)
 
 1642 inline __host__ __device__ float3 
fmodf(float3 a, float3 b)
 
 1646 inline __host__ __device__ float4 
fmodf(float4 a, float4 b)
 
 1655 inline __host__ __device__ float2 
fabs(float2 v)
 
 1659 inline __host__ __device__ float3 
fabs(float3 v)
 
 1663 inline __host__ __device__ float4 
fabs(float4 v)
 
 1668 inline __host__ __device__ int2 
abs(int2 v)
 
 1672 inline __host__ __device__ int3 
abs(int3 v)
 
 1676 inline __host__ __device__ int4 
abs(int4 v)
 
 1687 inline __host__ __device__ float3 
reflect(float3 i, float3 n)
 
 1689     return i - 2.0f * n * 
dot(n,i);
 
 1696 inline __host__ __device__ float3 
cross(float3 a, float3 b)
 
 1698     return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
 
 1708 inline __device__ __host__ 
float smoothstep(
float a, 
float b, 
float x)
 
 1710     float y = 
clamp((x - a) / (b - a), 0.0f, 1.0f);
 
 1711     return (y*y*(3.0f - (2.0f*y)));
 
 1713 inline __device__ __host__ float2 
smoothstep(float2 a, float2 b, float2 x)
 
 1715     float2 y = 
clamp((x - a) / (b - a), 0.0f, 1.0f);
 
 1718 inline __device__ __host__ float3 
smoothstep(float3 a, float3 b, float3 x)
 
 1720     float3 y = 
clamp((x - a) / (b - a), 0.0f, 1.0f);
 
 1723 inline __device__ __host__ float4 
smoothstep(float4 a, float4 b, float4 x)
 
 1725     float4 y = 
clamp((x - a) / (b - a), 0.0f, 1.0f);