23 #ifndef __CUDA_HELPER_MATH_H_
24 #define __CUDA_HELPER_MATH_H_
26 #include "cuda_runtime.h"
51 inline float fminf(
float a,
float b)
56 inline float fmaxf(
float a,
float b)
61 inline int max(
int a,
int b)
66 inline int min(
int a,
int b)
73 return 1.0f / sqrtf(x);
150 return make_float3(
float(a.x),
float(a.y),
float(a.z));
154 return make_float3(
float(a.x),
float(a.y),
float(a.z));
158 return make_float3(
float(a.x),
float(a.y),
float(a.z));
162 return make_float3(
float(a.x),
float(a.y),
float(a.z));
173 inline __host__ __device__ int3
make_int3(int2 a,
int s)
179 return make_int3(
int(a.x),
int(a.y),
int(a.z));
183 return make_int3(
int(a.x),
int(a.y),
int(a.z));
266 return make_float4(
float(a.x),
float(a.y),
float(a.z),
float(a.w));
270 return make_float4(
float(a.x),
float(a.y),
float(a.z),
float(a.w));
274 return make_float4(
float(a.x),
float(a.y),
float(a.z), 0.0f);
278 return make_float4(
float(a.x),
float(a.y),
float(a.z),
float(a.w));
289 inline __host__ __device__ int4
make_int4(int3 a,
int w)
295 return make_int4(
int(a.x),
int(a.y),
int(a.z),
int(a.w));
299 return make_int4(
int(a.x),
int(a.y),
int(a.z),
int(a.w));
383 return make_int4(-a.x, -a.y, -a.z, -a.w);
390 inline __host__ __device__ float2
operator+(float2 a, float2 b)
394 inline __host__ __device__
void operator+=(float2 &a, float2 b)
399 inline __host__ __device__ float2
operator+(float2 a,
float b)
403 inline __host__ __device__ float2
operator+(
float b, float2 a)
407 inline __host__ __device__
void operator+=(float2 &a,
float b)
413 inline __host__ __device__ int2
operator+(int2 a, int2 b)
422 inline __host__ __device__ int2
operator+(int2 a,
int b)
426 inline __host__ __device__ int2
operator+(
int b, int2 a)
436 inline __host__ __device__ uint2
operator+(uint2 a, uint2 b)
440 inline __host__ __device__
void operator+=(uint2 &a, uint2 b)
460 inline __host__ __device__ float3
operator+(float3 a, float3 b)
462 return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
464 inline __host__ __device__
void operator+=(float3 &a, float3 b)
470 inline __host__ __device__ float3
operator+(float3 a,
float b)
474 inline __host__ __device__
void operator+=(float3 &a,
float b)
481 inline __host__ __device__ int3
operator+(int3 a, int3 b)
483 return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
491 inline __host__ __device__ int3
operator+(int3 a,
int b)
493 return make_int3(a.x + b, a.y + b, a.z + b);
502 inline __host__ __device__ uint3
operator+(uint3 a, uint3 b)
504 return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
506 inline __host__ __device__
void operator+=(uint3 &a, uint3 b)
523 inline __host__ __device__ uchar3
operator+(uchar3 a, uchar3 b)
525 return make_uchar3(a.x + b.x, a.y + b.y, a.z + b.z);
527 inline __host__ __device__
void operator+=(uchar3 &a, uchar3 b)
544 inline __host__ __device__ int3
operator+(
int b, int3 a)
546 return make_int3(a.x + b, a.y + b, a.z + b);
552 inline __host__ __device__ float3
operator+(
float b, float3 a)
557 inline __host__ __device__ float4
operator+(float4 a, float4 b)
559 return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
561 inline __host__ __device__
void operator+=(float4 &a, float4 b)
568 inline __host__ __device__ float4
operator+(float4 a,
float b)
570 return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
572 inline __host__ __device__ float4
operator+(
float b, float4 a)
574 return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
576 inline __host__ __device__
void operator+=(float4 &a,
float b)
584 inline __host__ __device__ int4
operator+(int4 a, int4 b)
586 return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
595 inline __host__ __device__ int4
operator+(int4 a,
int b)
597 return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
599 inline __host__ __device__ int4
operator+(
int b, int4 a)
601 return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
611 inline __host__ __device__ uint4
operator+(uint4 a, uint4 b)
613 return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
615 inline __host__ __device__
void operator+=(uint4 &a, uint4 b)
624 return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
628 return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
638 inline __host__ __device__ uchar4
operator+(uchar4 a, uchar4 b)
640 return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
642 inline __host__ __device__
void operator+=(uchar4 &a, uchar4 b)
651 return make_uchar4(a.x + b, a.y + b, a.z + b, a.w + b);
655 return make_uchar4(a.x + b, a.y + b, a.z + b, a.w + b);
669 inline __host__ __device__ float2
operator-(float2 a, float2 b)
673 inline __host__ __device__
void operator-=(float2 &a, float2 b)
678 inline __host__ __device__ float2
operator-(float2 a,
float b)
682 inline __host__ __device__ float2
operator-(
float b, float2 a)
686 inline __host__ __device__
void operator-=(float2 &a,
float b)
692 inline __host__ __device__ int2
operator-(int2 a, int2 b)
701 inline __host__ __device__ int2
operator-(int2 a,
int b)
705 inline __host__ __device__ int2
operator-(
int b, int2 a)
715 inline __host__ __device__ uint2
operator-(uint2 a, uint2 b)
719 inline __host__ __device__
void operator-=(uint2 &a, uint2 b)
738 inline __host__ __device__ float3
operator-(float3 a, float3 b)
740 return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
742 inline __host__ __device__
void operator-=(float3 &a, float3 b)
748 inline __host__ __device__ float3
operator-(float3 a,
float b)
752 inline __host__ __device__ float3
operator-(
float b, float3 a)
756 inline __host__ __device__
void operator-=(float3 &a,
float b)
763 inline __host__ __device__ int3
operator-(int3 a, int3 b)
765 return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
773 inline __host__ __device__ int3
operator-(int3 a,
int b)
775 return make_int3(a.x - b, a.y - b, a.z - b);
777 inline __host__ __device__ int3
operator-(
int b, int3 a)
779 return make_int3(b - a.x, b - a.y, b - a.z);
788 inline __host__ __device__ uint3
operator-(uint3 a, uint3 b)
790 return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
792 inline __host__ __device__
void operator-=(uint3 &a, uint3 b)
813 inline __host__ __device__ float4
operator-(float4 a, float4 b)
815 return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
817 inline __host__ __device__
void operator-=(float4 &a, float4 b)
824 inline __host__ __device__ float4
operator-(float4 a,
float b)
826 return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);
828 inline __host__ __device__
void operator-=(float4 &a,
float b)
836 inline __host__ __device__ int4
operator-(int4 a, int4 b)
838 return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
847 inline __host__ __device__ int4
operator-(int4 a,
int b)
849 return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);
851 inline __host__ __device__ int4
operator-(
int b, int4 a)
853 return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
863 inline __host__ __device__ uint4
operator-(uint4 a, uint4 b)
865 return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
867 inline __host__ __device__
void operator-=(uint4 &a, uint4 b)
876 return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);
880 return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
894 inline __host__ __device__ float2
operator*(float2 a, float2 b)
898 inline __host__ __device__
void operator*=(float2 &a, float2 b)
903 inline __host__ __device__ float2
operator*(float2 a,
float b)
907 inline __host__ __device__ float2
operator*(
float b, float2 a)
911 inline __host__ __device__
void operator*=(float2 &a,
float b)
917 inline __host__ __device__ int2
operator*(int2 a, int2 b)
926 inline __host__ __device__ int2
operator*(int2 a,
int b)
930 inline __host__ __device__ int2
operator*(
int b, int2 a)
940 inline __host__ __device__ uint2
operator*(uint2 a, uint2 b)
944 inline __host__ __device__
void operator*=(uint2 &a, uint2 b)
963 inline __host__ __device__ uchar2
operator*(uchar2 a, uchar2 b)
965 return make_uchar2(a.x * b.x, a.y * b.y);
967 inline __host__ __device__
void operator*=(uchar2 &a, uchar2 b)
974 return make_uchar2(a.x * b, a.y * b);
978 return make_uchar2(b * a.x, b * a.y);
980 inline __host__ __device__ uchar2
operator*(uchar2 a,
float b)
982 return make_uchar2(a.x * b, a.y * b);
984 inline __host__ __device__ uchar2
operator*(
float b, uchar2 a)
986 return make_uchar2(b * a.x, b * a.y);
993 inline __host__ __device__
void operator*=(uchar2 &a,
float b)
999 inline __host__ __device__ float3
operator*(float3 a, float3 b)
1001 return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
1009 inline __host__ __device__ float3
operator*(float3 a,
float b)
1013 inline __host__ __device__ float3
operator*(
float b, float3 a)
1026 return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
1036 return make_int3(a.x * b, a.y * b, a.z * b);
1040 return make_int3(b * a.x, b * a.y, b * a.z);
1049 inline __host__ __device__ uint3
operator*(uint3 a, uint3 b)
1051 return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
1061 return make_uint3(a.x * b, a.y * b, a.z * b);
1065 return make_uint3(b * a.x, b * a.y, b * a.z);
1074 inline __host__ __device__ uchar3
operator*(uchar3 a, uchar3 b)
1076 return make_uchar3(a.x * b.x, a.y * b.y, a.z * b.z);
1092 inline __host__ __device__ uchar3
operator*(uchar3 a,
float b)
1096 inline __host__ __device__ uchar3
operator*(
float b, uchar3 a)
1113 inline __host__ __device__ float4
operator*(float4 a, float4 b)
1115 return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1124 inline __host__ __device__ float4
operator*(float4 a,
float b)
1126 return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
1128 inline __host__ __device__ float4
operator*(
float b, float4 a)
1130 return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
1142 return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1153 return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);
1157 return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
1167 inline __host__ __device__ uint4
operator*(uint4 a, uint4 b)
1169 return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1180 return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);
1184 return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
1194 inline __host__ __device__ uchar4
operator*(uchar4 a, uchar4 b)
1196 return make_uchar4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1207 return make_uchar4(a.x * b, a.y * b, a.z * b, a.w * b);
1211 return make_uchar4(b * a.x, b * a.y, b * a.z, b * a.w);
1213 inline __host__ __device__ uchar4
operator*(uchar4 a,
float b)
1215 return make_uchar4(a.x * b, a.y * b, a.z * b, a.w * b);
1217 inline __host__ __device__ uchar4
operator*(
float b, uchar4 a)
1219 return make_uchar4(b * a.x, b * a.y, b * a.z, b * a.w);
1240 inline __host__ __device__ float2
operator/(float2 a, float2 b)
1249 inline __host__ __device__ float2
operator/(float2 a,
float b)
1258 inline __host__ __device__ float2
operator/(
float b, float2 a)
1263 inline __host__ __device__ float3
operator/(float3 a, float3 b)
1265 return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
1273 inline __host__ __device__ float3
operator/(float3 a,
float b)
1283 inline __host__ __device__ float3
operator/(
float b, float3 a)
1288 inline __host__ __device__ float4
operator/(float4 a, float4 b)
1290 return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
1299 inline __host__ __device__ float4
operator/(float4 a,
float b)
1301 return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
1310 inline __host__ __device__ float4
operator/(
float b, float4 a)
1312 return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
1319 inline __host__ __device__ float2
fminf(float2 a, float2 b)
1323 inline __host__ __device__ float3
fminf(float3 a, float3 b)
1327 inline __host__ __device__ float4
fminf(float4 a, float4 b)
1332 inline __host__ __device__ int2
min(int2 a, int2 b)
1336 inline __host__ __device__ int3
min(int3 a, int3 b)
1340 inline __host__ __device__ int4
min(int4 a, int4 b)
1345 inline __host__ __device__ uint2
min(uint2 a, uint2 b)
1349 inline __host__ __device__ uint3
min(uint3 a, uint3 b)
1353 inline __host__ __device__ uint4
min(uint4 a, uint4 b)
1362 inline __host__ __device__ float2
fmaxf(float2 a, float2 b)
1366 inline __host__ __device__ float3
fmaxf(float3 a, float3 b)
1370 inline __host__ __device__ float4
fmaxf(float4 a, float4 b)
1375 inline __host__ __device__ int2
max(int2 a, int2 b)
1379 inline __host__ __device__ int3
max(int3 a, int3 b)
1383 inline __host__ __device__ int4
max(int4 a, int4 b)
1388 inline __host__ __device__ uint2
max(uint2 a, uint2 b)
1392 inline __host__ __device__ uint3
max(uint3 a, uint3 b)
1396 inline __host__ __device__ uint4
max(uint4 a, uint4 b)
1406 inline __device__ __host__
float lerp(
float a,
float b,
float t)
1410 inline __device__ __host__ float2
lerp(float2 a, float2 b,
float t)
1414 inline __device__ __host__ float3
lerp(float3 a, float3 b,
float t)
1418 inline __device__ __host__ float4
lerp(float4 a, float4 b,
float t)
1428 inline __device__ __host__
float clamp(
float f,
float a,
float b)
1432 inline __device__ __host__
int clamp(
int f,
int a,
int b)
1434 return max(a,
min(f, b));
1438 return max(a,
min(f, b));
1441 inline __device__ __host__ float2
clamp(float2 v,
float a,
float b)
1445 inline __device__ __host__ float2
clamp(float2 v, float2 a, float2 b)
1449 inline __device__ __host__ float3
clamp(float3 v,
float a,
float b)
1453 inline __device__ __host__ float3
clamp(float3 v, float3 a, float3 b)
1457 inline __device__ __host__ float4
clamp(float4 v,
float a,
float b)
1459 return make_float4(
clamp(v.x, a, b),
clamp(v.y, a, b),
clamp(v.z, a, b),
clamp(v.w, a, b));
1461 inline __device__ __host__ float4
clamp(float4 v, float4 a, float4 b)
1463 return make_float4(
clamp(v.x, a.x, b.x),
clamp(v.y, a.y, b.y),
clamp(v.z, a.z, b.z),
clamp(v.w, a.w, b.w));
1466 inline __device__ __host__ int2
clamp(int2 v,
int a,
int b)
1470 inline __device__ __host__ int2
clamp(int2 v, int2 a, int2 b)
1474 inline __device__ __host__ int3
clamp(int3 v,
int a,
int b)
1478 inline __device__ __host__ int3
clamp(int3 v, int3 a, int3 b)
1482 inline __device__ __host__ int4
clamp(int4 v,
int a,
int b)
1484 return make_int4(
clamp(v.x, a, b),
clamp(v.y, a, b),
clamp(v.z, a, b),
clamp(v.w, a, b));
1486 inline __device__ __host__ int4
clamp(int4 v, int4 a, int4 b)
1488 return make_int4(
clamp(v.x, a.x, b.x),
clamp(v.y, a.y, b.y),
clamp(v.z, a.z, b.z),
clamp(v.w, a.w, b.w));
1495 inline __device__ __host__ uint2
clamp(uint2 v, uint2 a, uint2 b)
1503 inline __device__ __host__ uint3
clamp(uint3 v, uint3 a, uint3 b)
1509 return make_uint4(
clamp(v.x, a, b),
clamp(v.y, a, b),
clamp(v.z, a, b),
clamp(v.w, a, b));
1511 inline __device__ __host__ uint4
clamp(uint4 v, uint4 a, uint4 b)
1513 return make_uint4(
clamp(v.x, a.x, b.x),
clamp(v.y, a.y, b.y),
clamp(v.z, a.z, b.z),
clamp(v.w, a.w, b.w));
1520 inline __host__ __device__
float dot(float2 a, float2 b)
1522 return a.x * b.x + a.y * b.y;
1524 inline __host__ __device__
float dot(float3 a, float3 b)
1526 return a.x * b.x + a.y * b.y + a.z * b.z;
1528 inline __host__ __device__
float dot(float4 a, float4 b)
1530 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1533 inline __host__ __device__
int dot(int2 a, int2 b)
1535 return a.x * b.x + a.y * b.y;
1537 inline __host__ __device__
int dot(int3 a, int3 b)
1539 return a.x * b.x + a.y * b.y + a.z * b.z;
1541 inline __host__ __device__
int dot(int4 a, int4 b)
1543 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1546 inline __host__ __device__
uint dot(uint2 a, uint2 b)
1548 return a.x * b.x + a.y * b.y;
1550 inline __host__ __device__
uint dot(uint3 a, uint3 b)
1552 return a.x * b.x + a.y * b.y + a.z * b.z;
1554 inline __host__ __device__
uint dot(uint4 a, uint4 b)
1556 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1563 inline __host__ __device__
float length(float2 v)
1565 return sqrtf(
dot(v, v));
1567 inline __host__ __device__
float length(float3 v)
1569 return sqrtf(
dot(v, v));
1571 inline __host__ __device__
float length(float4 v)
1573 return sqrtf(
dot(v, v));
1600 inline __host__ __device__ float2
floorf(float2 v)
1604 inline __host__ __device__ float3
floorf(float3 v)
1608 inline __host__ __device__ float4
floorf(float4 v)
1617 inline __host__ __device__
float fracf(
float v)
1621 inline __host__ __device__ float2
fracf(float2 v)
1625 inline __host__ __device__ float3
fracf(float3 v)
1629 inline __host__ __device__ float4
fracf(float4 v)
1638 inline __host__ __device__ float2
fmodf(float2 a, float2 b)
1642 inline __host__ __device__ float3
fmodf(float3 a, float3 b)
1646 inline __host__ __device__ float4
fmodf(float4 a, float4 b)
1655 inline __host__ __device__ float2
fabs(float2 v)
1659 inline __host__ __device__ float3
fabs(float3 v)
1663 inline __host__ __device__ float4
fabs(float4 v)
1668 inline __host__ __device__ int2
abs(int2 v)
1672 inline __host__ __device__ int3
abs(int3 v)
1676 inline __host__ __device__ int4
abs(int4 v)
1687 inline __host__ __device__ float3
reflect(float3 i, float3 n)
1689 return i - 2.0f * n *
dot(n,i);
1696 inline __host__ __device__ float3
cross(float3 a, float3 b)
1698 return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
1708 inline __device__ __host__
float smoothstep(
float a,
float b,
float x)
1710 float y =
clamp((x - a) / (b - a), 0.0f, 1.0f);
1711 return (y*y*(3.0f - (2.0f*y)));
1713 inline __device__ __host__ float2
smoothstep(float2 a, float2 b, float2 x)
1715 float2 y =
clamp((x - a) / (b - a), 0.0f, 1.0f);
1718 inline __device__ __host__ float3
smoothstep(float3 a, float3 b, float3 x)
1720 float3 y =
clamp((x - a) / (b - a), 0.0f, 1.0f);
1723 inline __device__ __host__ float4
smoothstep(float4 a, float4 b, float4 x)
1725 float4 y =
clamp((x - a) / (b - a), 0.0f, 1.0f);