Jetson Inference
DNN Vision Library
cudaMath.h
Go to the documentation of this file.
1 
12 /*
13  * This file implements common mathematical operations on vector types
14  * (float3, float4 etc.) since these are not provided as standard by CUDA.
15  *
16  * The syntax is modeled on the Cg standard library.
17  *
18  * This is part of the Helper library includes
19  *
20  * Thanks to Linh Hah for additions and fixes.
21  */
22 
23 #ifndef __CUDA_HELPER_MATH_H_
24 #define __CUDA_HELPER_MATH_H_
25 
26 #include "cuda_runtime.h"
27 
33 
35 
36 typedef unsigned int uint;
37 typedef unsigned char uchar;
38 typedef unsigned short ushort;
39 
40 #ifndef EXIT_WAIVED
41 #define EXIT_WAIVED 2
42 #endif
43 
44 #ifndef __CUDACC__
45 #include <math.h>
46 
48 // host implementations of CUDA functions
50 
51 inline float fminf(float a, float b)
52 {
53  return a < b ? a : b;
54 }
55 
56 inline float fmaxf(float a, float b)
57 {
58  return a > b ? a : b;
59 }
60 
61 inline int max(int a, int b)
62 {
63  return a > b ? a : b;
64 }
65 
66 inline int min(int a, int b)
67 {
68  return a < b ? a : b;
69 }
70 
71 inline float rsqrtf(float x)
72 {
73  return 1.0f / sqrtf(x);
74 }
75 #endif
76 
78 // constructors
80 
81 inline __host__ __device__ float2 make_float2(float s)
82 {
83  return make_float2(s, s);
84 }
85 inline __host__ __device__ float2 make_float2(float3 a)
86 {
87  return make_float2(a.x, a.y);
88 }
89 inline __host__ __device__ float2 make_float2(int2 a)
90 {
91  return make_float2(float(a.x), float(a.y));
92 }
93 inline __host__ __device__ float2 make_float2(uint2 a)
94 {
95  return make_float2(float(a.x), float(a.y));
96 }
97 
98 inline __host__ __device__ int2 make_int2(int s)
99 {
100  return make_int2(s, s);
101 }
102 inline __host__ __device__ int2 make_int2(int3 a)
103 {
104  return make_int2(a.x, a.y);
105 }
106 inline __host__ __device__ int2 make_int2(uint2 a)
107 {
108  return make_int2(int(a.x), int(a.y));
109 }
110 inline __host__ __device__ int2 make_int2(float2 a)
111 {
112  return make_int2(int(a.x), int(a.y));
113 }
114 
115 inline __host__ __device__ uint2 make_uint2(uint s)
116 {
117  return make_uint2(s, s);
118 }
119 inline __host__ __device__ uint2 make_uint2(uint3 a)
120 {
121  return make_uint2(a.x, a.y);
122 }
123 inline __host__ __device__ uint2 make_uint2(int2 a)
124 {
125  return make_uint2(uint(a.x), uint(a.y));
126 }
127 
128 inline __host__ __device__ float3 make_float3(float s)
129 {
130  return make_float3(s, s, s);
131 }
132 inline __host__ __device__ float3 make_float3(float2 a)
133 {
134  return make_float3(a.x, a.y, 0.0f);
135 }
136 inline __host__ __device__ float3 make_float3(float2 a, float s)
137 {
138  return make_float3(a.x, a.y, s);
139 }
140 inline __host__ __device__ float3 make_float3(float3 a)
141 {
142  return make_float3(a.x, a.y, a.z);
143 }
144 inline __host__ __device__ float3 make_float3(float4 a)
145 {
146  return make_float3(a.x, a.y, a.z);
147 }
148 inline __host__ __device__ float3 make_float3(int3 a)
149 {
150  return make_float3(float(a.x), float(a.y), float(a.z));
151 }
152 inline __host__ __device__ float3 make_float3(uint3 a)
153 {
154  return make_float3(float(a.x), float(a.y), float(a.z));
155 }
156 inline __host__ __device__ float3 make_float3(uchar3 a)
157 {
158  return make_float3(float(a.x), float(a.y), float(a.z));
159 }
160 inline __host__ __device__ float3 make_float3(uchar4 a)
161 {
162  return make_float3(float(a.x), float(a.y), float(a.z));
163 }
164 
165 inline __host__ __device__ int3 make_int3(int s)
166 {
167  return make_int3(s, s, s);
168 }
169 inline __host__ __device__ int3 make_int3(int2 a)
170 {
171  return make_int3(a.x, a.y, 0);
172 }
173 inline __host__ __device__ int3 make_int3(int2 a, int s)
174 {
175  return make_int3(a.x, a.y, s);
176 }
177 inline __host__ __device__ int3 make_int3(uint3 a)
178 {
179  return make_int3(int(a.x), int(a.y), int(a.z));
180 }
181 inline __host__ __device__ int3 make_int3(float3 a)
182 {
183  return make_int3(int(a.x), int(a.y), int(a.z));
184 }
185 
186 inline __host__ __device__ uint3 make_uint3(uint s)
187 {
188  return make_uint3(s, s, s);
189 }
190 inline __host__ __device__ uint3 make_uint3(uint2 a)
191 {
192  return make_uint3(a.x, a.y, 0);
193 }
194 inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
195 {
196  return make_uint3(a.x, a.y, s);
197 }
198 inline __host__ __device__ uint3 make_uint3(uint4 a)
199 {
200  return make_uint3(a.x, a.y, a.z);
201 }
202 inline __host__ __device__ uint3 make_uint3(int3 a)
203 {
204  return make_uint3(uint(a.x), uint(a.y), uint(a.z));
205 }
206 
207 inline __host__ __device__ uchar3 make_uchar3(uchar s)
208 {
209  return make_uchar3(s, s, s);
210 }
211 inline __host__ __device__ uchar3 make_uchar3(uint s)
212 {
213  return make_uchar3(s, s, s);
214 }
215 inline __host__ __device__ uchar3 make_uchar3(uint2 a)
216 {
217  return make_uchar3(a.x, a.y, 0);
218 }
219 inline __host__ __device__ uchar3 make_uchar3(uint2 a, uint s)
220 {
221  return make_uchar3(a.x, a.y, s);
222 }
223 inline __host__ __device__ uchar3 make_uchar3(uint4 a)
224 {
225  return make_uchar3(a.x, a.y, a.z);
226 }
227 inline __host__ __device__ uchar3 make_uchar3(uchar3 a)
228 {
229  return make_uchar3(a.x, a.y, a.z);
230 }
231 inline __host__ __device__ uchar3 make_uchar3(uchar4 a)
232 {
233  return make_uchar3(a.x, a.y, a.z);
234 }
235 inline __host__ __device__ uchar3 make_uchar3(int3 a)
236 {
237  return make_uchar3(uchar(a.x), uchar(a.y), uchar(a.z));
238 }
239 inline __host__ __device__ uchar3 make_uchar3(float3 a)
240 {
241  return make_uchar3(a.x, a.y, a.z);
242 }
243 inline __host__ __device__ uchar3 make_uchar3(float4 a)
244 {
245  return make_uchar3(a.x, a.y, a.z);
246 }
247 
248 inline __host__ __device__ float4 make_float4(float s)
249 {
250  return make_float4(s, s, s, s);
251 }
252 inline __host__ __device__ float4 make_float4(float3 a)
253 {
254  return make_float4(a.x, a.y, a.z, 0.0f);
255 }
256 inline __host__ __device__ float4 make_float4(float4 a)
257 {
258  return make_float4(a.x, a.y, a.z, a.w);
259 }
260 inline __host__ __device__ float4 make_float4(float3 a, float w)
261 {
262  return make_float4(a.x, a.y, a.z, w);
263 }
264 inline __host__ __device__ float4 make_float4(int4 a)
265 {
266  return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
267 }
268 inline __host__ __device__ float4 make_float4(uint4 a)
269 {
270  return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
271 }
272 inline __host__ __device__ float4 make_float4(uchar3 a)
273 {
274  return make_float4(float(a.x), float(a.y), float(a.z), 0.0f);
275 }
276 inline __host__ __device__ float4 make_float4(uchar4 a)
277 {
278  return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
279 }
280 
281 inline __host__ __device__ int4 make_int4(int s)
282 {
283  return make_int4(s, s, s, s);
284 }
285 inline __host__ __device__ int4 make_int4(int3 a)
286 {
287  return make_int4(a.x, a.y, a.z, 0);
288 }
289 inline __host__ __device__ int4 make_int4(int3 a, int w)
290 {
291  return make_int4(a.x, a.y, a.z, w);
292 }
293 inline __host__ __device__ int4 make_int4(uint4 a)
294 {
295  return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
296 }
297 inline __host__ __device__ int4 make_int4(float4 a)
298 {
299  return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
300 }
301 
302 
303 inline __host__ __device__ uint4 make_uint4(uint s)
304 {
305  return make_uint4(s, s, s, s);
306 }
307 inline __host__ __device__ uint4 make_uint4(uint3 a)
308 {
309  return make_uint4(a.x, a.y, a.z, 0);
310 }
311 inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
312 {
313  return make_uint4(a.x, a.y, a.z, w);
314 }
315 inline __host__ __device__ uint4 make_uint4(int4 a)
316 {
317  return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
318 }
319 
320 inline __host__ __device__ uchar4 make_uchar4(uchar s)
321 {
322  return make_uchar4(s, s, s, s);
323 }
324 inline __host__ __device__ uchar4 make_uchar4(uint s)
325 {
326  return make_uchar4(s, s, s, s);
327 }
328 inline __host__ __device__ uchar4 make_uchar4(uint3 a)
329 {
330  return make_uchar4(a.x, a.y, a.z, 0);
331 }
332 inline __host__ __device__ uchar4 make_uchar4(uchar3 a)
333 {
334  return make_uchar4(a.x, a.y, a.z, 0);
335 }
336 inline __host__ __device__ uchar4 make_uchar4(uchar4 a)
337 {
338  return make_uchar4(a.x, a.y, a.z, a.w);
339 }
340 inline __host__ __device__ uchar4 make_uchar4(uint3 a, uint w)
341 {
342  return make_uchar4(a.x, a.y, a.z, w);
343 }
344 inline __host__ __device__ uchar4 make_uchar4(int4 a)
345 {
346  return make_uchar4(uchar(a.x), uchar(a.y), uchar(a.z), uchar(a.w));
347 }
348 inline __host__ __device__ uchar4 make_uchar4(float3 a)
349 {
350  return make_uchar4(a.x, a.y, a.z, 0);
351 }
352 inline __host__ __device__ uchar4 make_uchar4(float4 a)
353 {
354  return make_uchar4(a.x, a.y, a.z, a.w);
355 }
356 
358 // negate
360 
361 inline __host__ __device__ float2 operator-(float2 &a)
362 {
363  return make_float2(-a.x, -a.y);
364 }
365 inline __host__ __device__ int2 operator-(int2 &a)
366 {
367  return make_int2(-a.x, -a.y);
368 }
369 inline __host__ __device__ float3 operator-(float3 &a)
370 {
371  return make_float3(-a.x, -a.y, -a.z);
372 }
373 inline __host__ __device__ int3 operator-(int3 &a)
374 {
375  return make_int3(-a.x, -a.y, -a.z);
376 }
377 inline __host__ __device__ float4 operator-(float4 &a)
378 {
379  return make_float4(-a.x, -a.y, -a.z, -a.w);
380 }
381 inline __host__ __device__ int4 operator-(int4 &a)
382 {
383  return make_int4(-a.x, -a.y, -a.z, -a.w);
384 }
385 
387 // addition
389 
390 inline __host__ __device__ float2 operator+(float2 a, float2 b)
391 {
392  return make_float2(a.x + b.x, a.y + b.y);
393 }
394 inline __host__ __device__ void operator+=(float2 &a, float2 b)
395 {
396  a.x += b.x;
397  a.y += b.y;
398 }
399 inline __host__ __device__ float2 operator+(float2 a, float b)
400 {
401  return make_float2(a.x + b, a.y + b);
402 }
403 inline __host__ __device__ float2 operator+(float b, float2 a)
404 {
405  return make_float2(a.x + b, a.y + b);
406 }
407 inline __host__ __device__ void operator+=(float2 &a, float b)
408 {
409  a.x += b;
410  a.y += b;
411 }
412 
413 inline __host__ __device__ int2 operator+(int2 a, int2 b)
414 {
415  return make_int2(a.x + b.x, a.y + b.y);
416 }
417 inline __host__ __device__ void operator+=(int2 &a, int2 b)
418 {
419  a.x += b.x;
420  a.y += b.y;
421 }
422 inline __host__ __device__ int2 operator+(int2 a, int b)
423 {
424  return make_int2(a.x + b, a.y + b);
425 }
426 inline __host__ __device__ int2 operator+(int b, int2 a)
427 {
428  return make_int2(a.x + b, a.y + b);
429 }
430 inline __host__ __device__ void operator+=(int2 &a, int b)
431 {
432  a.x += b;
433  a.y += b;
434 }
435 
436 inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
437 {
438  return make_uint2(a.x + b.x, a.y + b.y);
439 }
440 inline __host__ __device__ void operator+=(uint2 &a, uint2 b)
441 {
442  a.x += b.x;
443  a.y += b.y;
444 }
445 inline __host__ __device__ uint2 operator+(uint2 a, uint b)
446 {
447  return make_uint2(a.x + b, a.y + b);
448 }
449 inline __host__ __device__ uint2 operator+(uint b, uint2 a)
450 {
451  return make_uint2(a.x + b, a.y + b);
452 }
453 inline __host__ __device__ void operator+=(uint2 &a, uint b)
454 {
455  a.x += b;
456  a.y += b;
457 }
458 
459 
460 inline __host__ __device__ float3 operator+(float3 a, float3 b)
461 {
462  return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
463 }
464 inline __host__ __device__ void operator+=(float3 &a, float3 b)
465 {
466  a.x += b.x;
467  a.y += b.y;
468  a.z += b.z;
469 }
470 inline __host__ __device__ float3 operator+(float3 a, float b)
471 {
472  return make_float3(a.x + b, a.y + b, a.z + b);
473 }
474 inline __host__ __device__ void operator+=(float3 &a, float b)
475 {
476  a.x += b;
477  a.y += b;
478  a.z += b;
479 }
480 
481 inline __host__ __device__ int3 operator+(int3 a, int3 b)
482 {
483  return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
484 }
485 inline __host__ __device__ void operator+=(int3 &a, int3 b)
486 {
487  a.x += b.x;
488  a.y += b.y;
489  a.z += b.z;
490 }
491 inline __host__ __device__ int3 operator+(int3 a, int b)
492 {
493  return make_int3(a.x + b, a.y + b, a.z + b);
494 }
495 inline __host__ __device__ void operator+=(int3 &a, int b)
496 {
497  a.x += b;
498  a.y += b;
499  a.z += b;
500 }
501 
502 inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
503 {
504  return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
505 }
506 inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
507 {
508  a.x += b.x;
509  a.y += b.y;
510  a.z += b.z;
511 }
512 inline __host__ __device__ uint3 operator+(uint3 a, uint b)
513 {
514  return make_uint3(a.x + b, a.y + b, a.z + b);
515 }
516 inline __host__ __device__ void operator+=(uint3 &a, uint b)
517 {
518  a.x += b;
519  a.y += b;
520  a.z += b;
521 }
522 
523 inline __host__ __device__ uchar3 operator+(uchar3 a, uchar3 b)
524 {
525  return make_uchar3(a.x + b.x, a.y + b.y, a.z + b.z);
526 }
527 inline __host__ __device__ void operator+=(uchar3 &a, uchar3 b)
528 {
529  a.x += b.x;
530  a.y += b.y;
531  a.z += b.z;
532 }
533 inline __host__ __device__ uchar3 operator+(uchar3 a, uchar b)
534 {
535  return make_uchar3(a.x + b, a.y + b, a.z + b);
536 }
537 inline __host__ __device__ void operator+=(uchar3 &a, uchar b)
538 {
539  a.x += b;
540  a.y += b;
541  a.z += b;
542 }
543 
544 inline __host__ __device__ int3 operator+(int b, int3 a)
545 {
546  return make_int3(a.x + b, a.y + b, a.z + b);
547 }
548 inline __host__ __device__ uint3 operator+(uint b, uint3 a)
549 {
550  return make_uint3(a.x + b, a.y + b, a.z + b);
551 }
552 inline __host__ __device__ float3 operator+(float b, float3 a)
553 {
554  return make_float3(a.x + b, a.y + b, a.z + b);
555 }
556 
557 inline __host__ __device__ float4 operator+(float4 a, float4 b)
558 {
559  return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
560 }
561 inline __host__ __device__ void operator+=(float4 &a, float4 b)
562 {
563  a.x += b.x;
564  a.y += b.y;
565  a.z += b.z;
566  a.w += b.w;
567 }
568 inline __host__ __device__ float4 operator+(float4 a, float b)
569 {
570  return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
571 }
572 inline __host__ __device__ float4 operator+(float b, float4 a)
573 {
574  return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
575 }
576 inline __host__ __device__ void operator+=(float4 &a, float b)
577 {
578  a.x += b;
579  a.y += b;
580  a.z += b;
581  a.w += b;
582 }
583 
584 inline __host__ __device__ int4 operator+(int4 a, int4 b)
585 {
586  return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
587 }
588 inline __host__ __device__ void operator+=(int4 &a, int4 b)
589 {
590  a.x += b.x;
591  a.y += b.y;
592  a.z += b.z;
593  a.w += b.w;
594 }
595 inline __host__ __device__ int4 operator+(int4 a, int b)
596 {
597  return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
598 }
599 inline __host__ __device__ int4 operator+(int b, int4 a)
600 {
601  return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
602 }
603 inline __host__ __device__ void operator+=(int4 &a, int b)
604 {
605  a.x += b;
606  a.y += b;
607  a.z += b;
608  a.w += b;
609 }
610 
611 inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
612 {
613  return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
614 }
615 inline __host__ __device__ void operator+=(uint4 &a, uint4 b)
616 {
617  a.x += b.x;
618  a.y += b.y;
619  a.z += b.z;
620  a.w += b.w;
621 }
622 inline __host__ __device__ uint4 operator+(uint4 a, uint b)
623 {
624  return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
625 }
626 inline __host__ __device__ uint4 operator+(uint b, uint4 a)
627 {
628  return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
629 }
630 inline __host__ __device__ void operator+=(uint4 &a, uint b)
631 {
632  a.x += b;
633  a.y += b;
634  a.z += b;
635  a.w += b;
636 }
637 
638 inline __host__ __device__ uchar4 operator+(uchar4 a, uchar4 b)
639 {
640  return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
641 }
642 inline __host__ __device__ void operator+=(uchar4 &a, uchar4 b)
643 {
644  a.x += b.x;
645  a.y += b.y;
646  a.z += b.z;
647  a.w += b.w;
648 }
649 inline __host__ __device__ uchar4 operator+(uchar4 a, uchar b)
650 {
651  return make_uchar4(a.x + b, a.y + b, a.z + b, a.w + b);
652 }
653 inline __host__ __device__ uchar4 operator+(uchar b, uchar4 a)
654 {
655  return make_uchar4(a.x + b, a.y + b, a.z + b, a.w + b);
656 }
657 inline __host__ __device__ void operator+=(uchar4 &a, uchar b)
658 {
659  a.x += b;
660  a.y += b;
661  a.z += b;
662  a.w += b;
663 }
664 
666 // subtract
668 
669 inline __host__ __device__ float2 operator-(float2 a, float2 b)
670 {
671  return make_float2(a.x - b.x, a.y - b.y);
672 }
673 inline __host__ __device__ void operator-=(float2 &a, float2 b)
674 {
675  a.x -= b.x;
676  a.y -= b.y;
677 }
678 inline __host__ __device__ float2 operator-(float2 a, float b)
679 {
680  return make_float2(a.x - b, a.y - b);
681 }
682 inline __host__ __device__ float2 operator-(float b, float2 a)
683 {
684  return make_float2(b - a.x, b - a.y);
685 }
686 inline __host__ __device__ void operator-=(float2 &a, float b)
687 {
688  a.x -= b;
689  a.y -= b;
690 }
691 
692 inline __host__ __device__ int2 operator-(int2 a, int2 b)
693 {
694  return make_int2(a.x - b.x, a.y - b.y);
695 }
696 inline __host__ __device__ void operator-=(int2 &a, int2 b)
697 {
698  a.x -= b.x;
699  a.y -= b.y;
700 }
701 inline __host__ __device__ int2 operator-(int2 a, int b)
702 {
703  return make_int2(a.x - b, a.y - b);
704 }
705 inline __host__ __device__ int2 operator-(int b, int2 a)
706 {
707  return make_int2(b - a.x, b - a.y);
708 }
709 inline __host__ __device__ void operator-=(int2 &a, int b)
710 {
711  a.x -= b;
712  a.y -= b;
713 }
714 
715 inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
716 {
717  return make_uint2(a.x - b.x, a.y - b.y);
718 }
719 inline __host__ __device__ void operator-=(uint2 &a, uint2 b)
720 {
721  a.x -= b.x;
722  a.y -= b.y;
723 }
724 inline __host__ __device__ uint2 operator-(uint2 a, uint b)
725 {
726  return make_uint2(a.x - b, a.y - b);
727 }
728 inline __host__ __device__ uint2 operator-(uint b, uint2 a)
729 {
730  return make_uint2(b - a.x, b - a.y);
731 }
732 inline __host__ __device__ void operator-=(uint2 &a, uint b)
733 {
734  a.x -= b;
735  a.y -= b;
736 }
737 
738 inline __host__ __device__ float3 operator-(float3 a, float3 b)
739 {
740  return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
741 }
742 inline __host__ __device__ void operator-=(float3 &a, float3 b)
743 {
744  a.x -= b.x;
745  a.y -= b.y;
746  a.z -= b.z;
747 }
748 inline __host__ __device__ float3 operator-(float3 a, float b)
749 {
750  return make_float3(a.x - b, a.y - b, a.z - b);
751 }
752 inline __host__ __device__ float3 operator-(float b, float3 a)
753 {
754  return make_float3(b - a.x, b - a.y, b - a.z);
755 }
756 inline __host__ __device__ void operator-=(float3 &a, float b)
757 {
758  a.x -= b;
759  a.y -= b;
760  a.z -= b;
761 }
762 
763 inline __host__ __device__ int3 operator-(int3 a, int3 b)
764 {
765  return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
766 }
767 inline __host__ __device__ void operator-=(int3 &a, int3 b)
768 {
769  a.x -= b.x;
770  a.y -= b.y;
771  a.z -= b.z;
772 }
773 inline __host__ __device__ int3 operator-(int3 a, int b)
774 {
775  return make_int3(a.x - b, a.y - b, a.z - b);
776 }
777 inline __host__ __device__ int3 operator-(int b, int3 a)
778 {
779  return make_int3(b - a.x, b - a.y, b - a.z);
780 }
781 inline __host__ __device__ void operator-=(int3 &a, int b)
782 {
783  a.x -= b;
784  a.y -= b;
785  a.z -= b;
786 }
787 
788 inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
789 {
790  return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
791 }
792 inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
793 {
794  a.x -= b.x;
795  a.y -= b.y;
796  a.z -= b.z;
797 }
798 inline __host__ __device__ uint3 operator-(uint3 a, uint b)
799 {
800  return make_uint3(a.x - b, a.y - b, a.z - b);
801 }
802 inline __host__ __device__ uint3 operator-(uint b, uint3 a)
803 {
804  return make_uint3(b - a.x, b - a.y, b - a.z);
805 }
806 inline __host__ __device__ void operator-=(uint3 &a, uint b)
807 {
808  a.x -= b;
809  a.y -= b;
810  a.z -= b;
811 }
812 
813 inline __host__ __device__ float4 operator-(float4 a, float4 b)
814 {
815  return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
816 }
817 inline __host__ __device__ void operator-=(float4 &a, float4 b)
818 {
819  a.x -= b.x;
820  a.y -= b.y;
821  a.z -= b.z;
822  a.w -= b.w;
823 }
824 inline __host__ __device__ float4 operator-(float4 a, float b)
825 {
826  return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);
827 }
828 inline __host__ __device__ void operator-=(float4 &a, float b)
829 {
830  a.x -= b;
831  a.y -= b;
832  a.z -= b;
833  a.w -= b;
834 }
835 
836 inline __host__ __device__ int4 operator-(int4 a, int4 b)
837 {
838  return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
839 }
840 inline __host__ __device__ void operator-=(int4 &a, int4 b)
841 {
842  a.x -= b.x;
843  a.y -= b.y;
844  a.z -= b.z;
845  a.w -= b.w;
846 }
847 inline __host__ __device__ int4 operator-(int4 a, int b)
848 {
849  return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);
850 }
851 inline __host__ __device__ int4 operator-(int b, int4 a)
852 {
853  return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
854 }
855 inline __host__ __device__ void operator-=(int4 &a, int b)
856 {
857  a.x -= b;
858  a.y -= b;
859  a.z -= b;
860  a.w -= b;
861 }
862 
863 inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
864 {
865  return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
866 }
867 inline __host__ __device__ void operator-=(uint4 &a, uint4 b)
868 {
869  a.x -= b.x;
870  a.y -= b.y;
871  a.z -= b.z;
872  a.w -= b.w;
873 }
874 inline __host__ __device__ uint4 operator-(uint4 a, uint b)
875 {
876  return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);
877 }
878 inline __host__ __device__ uint4 operator-(uint b, uint4 a)
879 {
880  return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
881 }
882 inline __host__ __device__ void operator-=(uint4 &a, uint b)
883 {
884  a.x -= b;
885  a.y -= b;
886  a.z -= b;
887  a.w -= b;
888 }
889 
891 // multiply
893 
894 inline __host__ __device__ float2 operator*(float2 a, float2 b)
895 {
896  return make_float2(a.x * b.x, a.y * b.y);
897 }
898 inline __host__ __device__ void operator*=(float2 &a, float2 b)
899 {
900  a.x *= b.x;
901  a.y *= b.y;
902 }
903 inline __host__ __device__ float2 operator*(float2 a, float b)
904 {
905  return make_float2(a.x * b, a.y * b);
906 }
907 inline __host__ __device__ float2 operator*(float b, float2 a)
908 {
909  return make_float2(b * a.x, b * a.y);
910 }
911 inline __host__ __device__ void operator*=(float2 &a, float b)
912 {
913  a.x *= b;
914  a.y *= b;
915 }
916 
917 inline __host__ __device__ int2 operator*(int2 a, int2 b)
918 {
919  return make_int2(a.x * b.x, a.y * b.y);
920 }
921 inline __host__ __device__ void operator*=(int2 &a, int2 b)
922 {
923  a.x *= b.x;
924  a.y *= b.y;
925 }
926 inline __host__ __device__ int2 operator*(int2 a, int b)
927 {
928  return make_int2(a.x * b, a.y * b);
929 }
930 inline __host__ __device__ int2 operator*(int b, int2 a)
931 {
932  return make_int2(b * a.x, b * a.y);
933 }
934 inline __host__ __device__ void operator*=(int2 &a, int b)
935 {
936  a.x *= b;
937  a.y *= b;
938 }
939 
940 inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
941 {
942  return make_uint2(a.x * b.x, a.y * b.y);
943 }
944 inline __host__ __device__ void operator*=(uint2 &a, uint2 b)
945 {
946  a.x *= b.x;
947  a.y *= b.y;
948 }
949 inline __host__ __device__ uint2 operator*(uint2 a, uint b)
950 {
951  return make_uint2(a.x * b, a.y * b);
952 }
953 inline __host__ __device__ uint2 operator*(uint b, uint2 a)
954 {
955  return make_uint2(b * a.x, b * a.y);
956 }
957 inline __host__ __device__ void operator*=(uint2 &a, uint b)
958 {
959  a.x *= b;
960  a.y *= b;
961 }
962 
963 inline __host__ __device__ uchar2 operator*(uchar2 a, uchar2 b)
964 {
965  return make_uchar2(a.x * b.x, a.y * b.y);
966 }
967 inline __host__ __device__ void operator*=(uchar2 &a, uchar2 b)
968 {
969  a.x *= b.x;
970  a.y *= b.y;
971 }
972 inline __host__ __device__ uchar2 operator*(uchar2 a, uchar b)
973 {
974  return make_uchar2(a.x * b, a.y * b);
975 }
976 inline __host__ __device__ uchar2 operator*(uchar b, uchar2 a)
977 {
978  return make_uchar2(b * a.x, b * a.y);
979 }
980 inline __host__ __device__ uchar2 operator*(uchar2 a, float b)
981 {
982  return make_uchar2(a.x * b, a.y * b);
983 }
984 inline __host__ __device__ uchar2 operator*(float b, uchar2 a)
985 {
986  return make_uchar2(b * a.x, b * a.y);
987 }
988 inline __host__ __device__ void operator*=(uchar2 &a, uchar b)
989 {
990  a.x *= b;
991  a.y *= b;
992 }
993 inline __host__ __device__ void operator*=(uchar2 &a, float b)
994 {
995  a.x *= b;
996  a.y *= b;
997 }
998 
999 inline __host__ __device__ float3 operator*(float3 a, float3 b)
1000 {
1001  return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
1002 }
1003 inline __host__ __device__ void operator*=(float3 &a, float3 b)
1004 {
1005  a.x *= b.x;
1006  a.y *= b.y;
1007  a.z *= b.z;
1008 }
1009 inline __host__ __device__ float3 operator*(float3 a, float b)
1010 {
1011  return make_float3(a.x * b, a.y * b, a.z * b);
1012 }
1013 inline __host__ __device__ float3 operator*(float b, float3 a)
1014 {
1015  return make_float3(b * a.x, b * a.y, b * a.z);
1016 }
1017 inline __host__ __device__ void operator*=(float3 &a, float b)
1018 {
1019  a.x *= b;
1020  a.y *= b;
1021  a.z *= b;
1022 }
1023 
1024 inline __host__ __device__ int3 operator*(int3 a, int3 b)
1025 {
1026  return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
1027 }
1028 inline __host__ __device__ void operator*=(int3 &a, int3 b)
1029 {
1030  a.x *= b.x;
1031  a.y *= b.y;
1032  a.z *= b.z;
1033 }
1034 inline __host__ __device__ int3 operator*(int3 a, int b)
1035 {
1036  return make_int3(a.x * b, a.y * b, a.z * b);
1037 }
1038 inline __host__ __device__ int3 operator*(int b, int3 a)
1039 {
1040  return make_int3(b * a.x, b * a.y, b * a.z);
1041 }
1042 inline __host__ __device__ void operator*=(int3 &a, int b)
1043 {
1044  a.x *= b;
1045  a.y *= b;
1046  a.z *= b;
1047 }
1048 
1049 inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
1050 {
1051  return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
1052 }
1053 inline __host__ __device__ void operator*=(uint3 &a, uint3 b)
1054 {
1055  a.x *= b.x;
1056  a.y *= b.y;
1057  a.z *= b.z;
1058 }
1059 inline __host__ __device__ uint3 operator*(uint3 a, uint b)
1060 {
1061  return make_uint3(a.x * b, a.y * b, a.z * b);
1062 }
1063 inline __host__ __device__ uint3 operator*(uint b, uint3 a)
1064 {
1065  return make_uint3(b * a.x, b * a.y, b * a.z);
1066 }
1067 inline __host__ __device__ void operator*=(uint3 &a, uint b)
1068 {
1069  a.x *= b;
1070  a.y *= b;
1071  a.z *= b;
1072 }
1073 
1074 inline __host__ __device__ uchar3 operator*(uchar3 a, uchar3 b)
1075 {
1076  return make_uchar3(a.x * b.x, a.y * b.y, a.z * b.z);
1077 }
1078 inline __host__ __device__ void operator*=(uchar3 &a, uchar3 b)
1079 {
1080  a.x *= b.x;
1081  a.y *= b.y;
1082  a.z *= b.z;
1083 }
1084 inline __host__ __device__ uchar3 operator*(uchar3 a, uchar b)
1085 {
1086  return make_uchar3(a.x * b, a.y * b, a.z * b);
1087 }
1088 inline __host__ __device__ uchar3 operator*(uchar b, uchar3 a)
1089 {
1090  return make_uchar3(b * a.x, b * a.y, b * a.z);
1091 }
1092 inline __host__ __device__ uchar3 operator*(uchar3 a, float b)
1093 {
1094  return make_uchar3(a.x * b, a.y * b, a.z * b);
1095 }
1096 inline __host__ __device__ uchar3 operator*(float b, uchar3 a)
1097 {
1098  return make_uchar3(b * a.x, b * a.y, b * a.z);
1099 }
1100 inline __host__ __device__ void operator*=(uchar3 &a, uchar b)
1101 {
1102  a.x *= b;
1103  a.y *= b;
1104  a.z *= b;
1105 }
1106 inline __host__ __device__ void operator*=(uchar3 &a, float b)
1107 {
1108  a.x *= b;
1109  a.y *= b;
1110  a.z *= b;
1111 }
1112 
1113 inline __host__ __device__ float4 operator*(float4 a, float4 b)
1114 {
1115  return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1116 }
1117 inline __host__ __device__ void operator*=(float4 &a, float4 b)
1118 {
1119  a.x *= b.x;
1120  a.y *= b.y;
1121  a.z *= b.z;
1122  a.w *= b.w;
1123 }
1124 inline __host__ __device__ float4 operator*(float4 a, float b)
1125 {
1126  return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
1127 }
1128 inline __host__ __device__ float4 operator*(float b, float4 a)
1129 {
1130  return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
1131 }
1132 inline __host__ __device__ void operator*=(float4 &a, float b)
1133 {
1134  a.x *= b;
1135  a.y *= b;
1136  a.z *= b;
1137  a.w *= b;
1138 }
1139 
1140 inline __host__ __device__ int4 operator*(int4 a, int4 b)
1141 {
1142  return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1143 }
1144 inline __host__ __device__ void operator*=(int4 &a, int4 b)
1145 {
1146  a.x *= b.x;
1147  a.y *= b.y;
1148  a.z *= b.z;
1149  a.w *= b.w;
1150 }
1151 inline __host__ __device__ int4 operator*(int4 a, int b)
1152 {
1153  return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);
1154 }
1155 inline __host__ __device__ int4 operator*(int b, int4 a)
1156 {
1157  return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
1158 }
1159 inline __host__ __device__ void operator*=(int4 &a, int b)
1160 {
1161  a.x *= b;
1162  a.y *= b;
1163  a.z *= b;
1164  a.w *= b;
1165 }
1166 
1167 inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
1168 {
1169  return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1170 }
1171 inline __host__ __device__ void operator*=(uint4 &a, uint4 b)
1172 {
1173  a.x *= b.x;
1174  a.y *= b.y;
1175  a.z *= b.z;
1176  a.w *= b.w;
1177 }
1178 inline __host__ __device__ uint4 operator*(uint4 a, uint b)
1179 {
1180  return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);
1181 }
1182 inline __host__ __device__ uint4 operator*(uint b, uint4 a)
1183 {
1184  return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
1185 }
1186 inline __host__ __device__ void operator*=(uint4 &a, uint b)
1187 {
1188  a.x *= b;
1189  a.y *= b;
1190  a.z *= b;
1191  a.w *= b;
1192 }
1193 
1194 inline __host__ __device__ uchar4 operator*(uchar4 a, uchar4 b)
1195 {
1196  return make_uchar4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1197 }
1198 inline __host__ __device__ void operator*=(uchar4 &a, uchar4 b)
1199 {
1200  a.x *= b.x;
1201  a.y *= b.y;
1202  a.z *= b.z;
1203  a.w *= b.w;
1204 }
1205 inline __host__ __device__ uchar4 operator*(uchar4 a, uchar b)
1206 {
1207  return make_uchar4(a.x * b, a.y * b, a.z * b, a.w * b);
1208 }
1209 inline __host__ __device__ uchar4 operator*(uchar b, uchar4 a)
1210 {
1211  return make_uchar4(b * a.x, b * a.y, b * a.z, b * a.w);
1212 }
1213 inline __host__ __device__ uchar4 operator*(uchar4 a, float b)
1214 {
1215  return make_uchar4(a.x * b, a.y * b, a.z * b, a.w * b);
1216 }
1217 inline __host__ __device__ uchar4 operator*(float b, uchar4 a)
1218 {
1219  return make_uchar4(b * a.x, b * a.y, b * a.z, b * a.w);
1220 }
1221 inline __host__ __device__ void operator*=(uchar4 &a, uchar b)
1222 {
1223  a.x *= b;
1224  a.y *= b;
1225  a.z *= b;
1226  a.w *= b;
1227 }
1228 inline __host__ __device__ void operator*=(uchar4 &a, float b)
1229 {
1230  a.x *= b;
1231  a.y *= b;
1232  a.z *= b;
1233  a.w *= b;
1234 }
1235 
1237 // divide
1239 
1240 inline __host__ __device__ float2 operator/(float2 a, float2 b)
1241 {
1242  return make_float2(a.x / b.x, a.y / b.y);
1243 }
1244 inline __host__ __device__ void operator/=(float2 &a, float2 b)
1245 {
1246  a.x /= b.x;
1247  a.y /= b.y;
1248 }
1249 inline __host__ __device__ float2 operator/(float2 a, float b)
1250 {
1251  return make_float2(a.x / b, a.y / b);
1252 }
1253 inline __host__ __device__ void operator/=(float2 &a, float b)
1254 {
1255  a.x /= b;
1256  a.y /= b;
1257 }
1258 inline __host__ __device__ float2 operator/(float b, float2 a)
1259 {
1260  return make_float2(b / a.x, b / a.y);
1261 }
1262 
1263 inline __host__ __device__ float3 operator/(float3 a, float3 b)
1264 {
1265  return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
1266 }
1267 inline __host__ __device__ void operator/=(float3 &a, float3 b)
1268 {
1269  a.x /= b.x;
1270  a.y /= b.y;
1271  a.z /= b.z;
1272 }
1273 inline __host__ __device__ float3 operator/(float3 a, float b)
1274 {
1275  return make_float3(a.x / b, a.y / b, a.z / b);
1276 }
1277 inline __host__ __device__ void operator/=(float3 &a, float b)
1278 {
1279  a.x /= b;
1280  a.y /= b;
1281  a.z /= b;
1282 }
1283 inline __host__ __device__ float3 operator/(float b, float3 a)
1284 {
1285  return make_float3(b / a.x, b / a.y, b / a.z);
1286 }
1287 
1288 inline __host__ __device__ float4 operator/(float4 a, float4 b)
1289 {
1290  return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
1291 }
1292 inline __host__ __device__ void operator/=(float4 &a, float4 b)
1293 {
1294  a.x /= b.x;
1295  a.y /= b.y;
1296  a.z /= b.z;
1297  a.w /= b.w;
1298 }
1299 inline __host__ __device__ float4 operator/(float4 a, float b)
1300 {
1301  return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
1302 }
1303 inline __host__ __device__ void operator/=(float4 &a, float b)
1304 {
1305  a.x /= b;
1306  a.y /= b;
1307  a.z /= b;
1308  a.w /= b;
1309 }
1310 inline __host__ __device__ float4 operator/(float b, float4 a)
1311 {
1312  return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
1313 }
1314 
1316 // min
1318 
1319 inline __host__ __device__ float2 fminf(float2 a, float2 b)
1320 {
1321  return make_float2(fminf(a.x,b.x), fminf(a.y,b.y));
1322 }
1323 inline __host__ __device__ float3 fminf(float3 a, float3 b)
1324 {
1325  return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
1326 }
1327 inline __host__ __device__ float4 fminf(float4 a, float4 b)
1328 {
1329  return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
1330 }
1331 
1332 inline __host__ __device__ int2 min(int2 a, int2 b)
1333 {
1334  return make_int2(min(a.x,b.x), min(a.y,b.y));
1335 }
1336 inline __host__ __device__ int3 min(int3 a, int3 b)
1337 {
1338  return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
1339 }
1340 inline __host__ __device__ int4 min(int4 a, int4 b)
1341 {
1342  return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
1343 }
1344 
1345 inline __host__ __device__ uint2 min(uint2 a, uint2 b)
1346 {
1347  return make_uint2(min(a.x,b.x), min(a.y,b.y));
1348 }
1349 inline __host__ __device__ uint3 min(uint3 a, uint3 b)
1350 {
1351  return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
1352 }
1353 inline __host__ __device__ uint4 min(uint4 a, uint4 b)
1354 {
1355  return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
1356 }
1357 
1359 // max
1361 
1362 inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
1363 {
1364  return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y));
1365 }
1366 inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
1367 {
1368  return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
1369 }
1370 inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
1371 {
1372  return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
1373 }
1374 
1375 inline __host__ __device__ int2 max(int2 a, int2 b)
1376 {
1377  return make_int2(max(a.x,b.x), max(a.y,b.y));
1378 }
1379 inline __host__ __device__ int3 max(int3 a, int3 b)
1380 {
1381  return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
1382 }
1383 inline __host__ __device__ int4 max(int4 a, int4 b)
1384 {
1385  return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
1386 }
1387 
1388 inline __host__ __device__ uint2 max(uint2 a, uint2 b)
1389 {
1390  return make_uint2(max(a.x,b.x), max(a.y,b.y));
1391 }
1392 inline __host__ __device__ uint3 max(uint3 a, uint3 b)
1393 {
1394  return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
1395 }
1396 inline __host__ __device__ uint4 max(uint4 a, uint4 b)
1397 {
1398  return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
1399 }
1400 
1402 // lerp
1403 // - linear interpolation between a and b, based on value t in [0, 1] range
1405 
1406 inline __device__ __host__ float lerp(float a, float b, float t)
1407 {
1408  return a + t*(b-a);
1409 }
1410 inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
1411 {
1412  return a + t*(b-a);
1413 }
1414 inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
1415 {
1416  return a + t*(b-a);
1417 }
1418 inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
1419 {
1420  return a + t*(b-a);
1421 }
1422 
1424 // clamp
1425 // - clamp the value v to be in the range [a, b]
1427 
1428 inline __device__ __host__ float clamp(float f, float a, float b)
1429 {
1430  return fmaxf(a, fminf(f, b));
1431 }
1432 inline __device__ __host__ int clamp(int f, int a, int b)
1433 {
1434  return max(a, min(f, b));
1435 }
1436 inline __device__ __host__ uint clamp(uint f, uint a, uint b)
1437 {
1438  return max(a, min(f, b));
1439 }
1440 
1441 inline __device__ __host__ float2 clamp(float2 v, float a, float b)
1442 {
1443  return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
1444 }
1445 inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
1446 {
1447  return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1448 }
1449 inline __device__ __host__ float3 clamp(float3 v, float a, float b)
1450 {
1451  return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1452 }
1453 inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
1454 {
1455  return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1456 }
1457 inline __device__ __host__ float4 clamp(float4 v, float a, float b)
1458 {
1459  return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1460 }
1461 inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
1462 {
1463  return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1464 }
1465 
1466 inline __device__ __host__ int2 clamp(int2 v, int a, int b)
1467 {
1468  return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
1469 }
1470 inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
1471 {
1472  return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1473 }
1474 inline __device__ __host__ int3 clamp(int3 v, int a, int b)
1475 {
1476  return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1477 }
1478 inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
1479 {
1480  return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1481 }
1482 inline __device__ __host__ int4 clamp(int4 v, int a, int b)
1483 {
1484  return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1485 }
1486 inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
1487 {
1488  return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1489 }
1490 
1491 inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
1492 {
1493  return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
1494 }
1495 inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
1496 {
1497  return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1498 }
1499 inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
1500 {
1501  return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1502 }
1503 inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
1504 {
1505  return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1506 }
1507 inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
1508 {
1509  return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1510 }
1511 inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
1512 {
1513  return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1514 }
1515 
1517 // dot product
1519 
1520 inline __host__ __device__ float dot(float2 a, float2 b)
1521 {
1522  return a.x * b.x + a.y * b.y;
1523 }
1524 inline __host__ __device__ float dot(float3 a, float3 b)
1525 {
1526  return a.x * b.x + a.y * b.y + a.z * b.z;
1527 }
1528 inline __host__ __device__ float dot(float4 a, float4 b)
1529 {
1530  return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1531 }
1532 
1533 inline __host__ __device__ int dot(int2 a, int2 b)
1534 {
1535  return a.x * b.x + a.y * b.y;
1536 }
1537 inline __host__ __device__ int dot(int3 a, int3 b)
1538 {
1539  return a.x * b.x + a.y * b.y + a.z * b.z;
1540 }
1541 inline __host__ __device__ int dot(int4 a, int4 b)
1542 {
1543  return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1544 }
1545 
1546 inline __host__ __device__ uint dot(uint2 a, uint2 b)
1547 {
1548  return a.x * b.x + a.y * b.y;
1549 }
1550 inline __host__ __device__ uint dot(uint3 a, uint3 b)
1551 {
1552  return a.x * b.x + a.y * b.y + a.z * b.z;
1553 }
1554 inline __host__ __device__ uint dot(uint4 a, uint4 b)
1555 {
1556  return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1557 }
1558 
1560 // length
1562 
1563 inline __host__ __device__ float length(float2 v)
1564 {
1565  return sqrtf(dot(v, v));
1566 }
1567 inline __host__ __device__ float length(float3 v)
1568 {
1569  return sqrtf(dot(v, v));
1570 }
1571 inline __host__ __device__ float length(float4 v)
1572 {
1573  return sqrtf(dot(v, v));
1574 }
1575 
1577 // normalize
1579 
1580 inline __host__ __device__ float2 normalize(float2 v)
1581 {
1582  float invLen = rsqrtf(dot(v, v));
1583  return v * invLen;
1584 }
1585 inline __host__ __device__ float3 normalize(float3 v)
1586 {
1587  float invLen = rsqrtf(dot(v, v));
1588  return v * invLen;
1589 }
1590 inline __host__ __device__ float4 normalize(float4 v)
1591 {
1592  float invLen = rsqrtf(dot(v, v));
1593  return v * invLen;
1594 }
1595 
1597 // floor
1599 
1600 inline __host__ __device__ float2 floorf(float2 v)
1601 {
1602  return make_float2(floorf(v.x), floorf(v.y));
1603 }
1604 inline __host__ __device__ float3 floorf(float3 v)
1605 {
1606  return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
1607 }
1608 inline __host__ __device__ float4 floorf(float4 v)
1609 {
1610  return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
1611 }
1612 
1614 // frac - returns the fractional portion of a scalar or each vector component
1616 
1617 inline __host__ __device__ float fracf(float v)
1618 {
1619  return v - floorf(v);
1620 }
1621 inline __host__ __device__ float2 fracf(float2 v)
1622 {
1623  return make_float2(fracf(v.x), fracf(v.y));
1624 }
1625 inline __host__ __device__ float3 fracf(float3 v)
1626 {
1627  return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
1628 }
1629 inline __host__ __device__ float4 fracf(float4 v)
1630 {
1631  return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
1632 }
1633 
1635 // fmod
1637 
1638 inline __host__ __device__ float2 fmodf(float2 a, float2 b)
1639 {
1640  return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
1641 }
1642 inline __host__ __device__ float3 fmodf(float3 a, float3 b)
1643 {
1644  return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
1645 }
1646 inline __host__ __device__ float4 fmodf(float4 a, float4 b)
1647 {
1648  return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
1649 }
1650 
1652 // absolute value
1654 
1655 inline __host__ __device__ float2 fabs(float2 v)
1656 {
1657  return make_float2(fabs(v.x), fabs(v.y));
1658 }
1659 inline __host__ __device__ float3 fabs(float3 v)
1660 {
1661  return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
1662 }
1663 inline __host__ __device__ float4 fabs(float4 v)
1664 {
1665  return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
1666 }
1667 
1668 inline __host__ __device__ int2 abs(int2 v)
1669 {
1670  return make_int2(abs(v.x), abs(v.y));
1671 }
1672 inline __host__ __device__ int3 abs(int3 v)
1673 {
1674  return make_int3(abs(v.x), abs(v.y), abs(v.z));
1675 }
1676 inline __host__ __device__ int4 abs(int4 v)
1677 {
1678  return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
1679 }
1680 
1682 // reflect
1683 // - returns reflection of incident ray I around surface normal N
1684 // - N should be normalized, reflected vector's length is equal to length of I
1686 
1687 inline __host__ __device__ float3 reflect(float3 i, float3 n)
1688 {
1689  return i - 2.0f * n * dot(n,i);
1690 }
1691 
1693 // cross product
1695 
1696 inline __host__ __device__ float3 cross(float3 a, float3 b)
1697 {
1698  return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
1699 }
1700 
1702 // smoothstep
1703 // - returns 0 if x < a
1704 // - returns 1 if x > b
1705 // - otherwise returns smooth interpolation between 0 and 1 based on x
1707 
1708 inline __device__ __host__ float smoothstep(float a, float b, float x)
1709 {
1710  float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1711  return (y*y*(3.0f - (2.0f*y)));
1712 }
1713 inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
1714 {
1715  float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1716  return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y)));
1717 }
1718 inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
1719 {
1720  float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1721  return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y)));
1722 }
1723 inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
1724 {
1725  float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1726  return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y)));
1727 }
1728 
1730 
1731 #endif
1732 
make_uint3
__host__ __device__ uint3 make_uint3(uint s)
Definition: cudaMath.h:186
make_int2
__host__ __device__ int2 make_int2(int s)
Definition: cudaMath.h:98
floorf
__host__ __device__ float2 floorf(float2 v)
Definition: cudaMath.h:1600
ushort
unsigned short ushort
Definition: cudaMath.h:38
make_uchar3
__host__ __device__ uchar3 make_uchar3(uchar s)
Definition: cudaMath.h:207
min
int min(int a, int b)
Definition: cudaMath.h:66
operator+=
__host__ __device__ void operator+=(float2 &a, float2 b)
Definition: cudaMath.h:394
make_float3
__host__ __device__ float3 make_float3(float s)
Definition: cudaMath.h:128
uint
unsigned int uint
Definition: cudaMath.h:36
make_uint4
__host__ __device__ uint4 make_uint4(uint s)
Definition: cudaMath.h:303
fracf
__host__ __device__ float fracf(float v)
Definition: cudaMath.h:1617
clamp
__device__ __host__ float clamp(float f, float a, float b)
Definition: cudaMath.h:1428
make_uint2
__host__ __device__ uint2 make_uint2(uint s)
Definition: cudaMath.h:115
dot
__host__ __device__ float dot(float2 a, float2 b)
Definition: cudaMath.h:1520
operator-=
__host__ __device__ void operator-=(float2 &a, float2 b)
Definition: cudaMath.h:673
rsqrtf
float rsqrtf(float x)
Definition: cudaMath.h:71
reflect
__host__ __device__ float3 reflect(float3 i, float3 n)
Definition: cudaMath.h:1687
lerp
__device__ __host__ float lerp(float a, float b, float t)
Definition: cudaMath.h:1406
operator/=
__host__ __device__ void operator/=(float2 &a, float2 b)
Definition: cudaMath.h:1244
max
int max(int a, int b)
Definition: cudaMath.h:61
cross
__host__ __device__ float3 cross(float3 a, float3 b)
Definition: cudaMath.h:1696
fminf
float fminf(float a, float b)
Definition: cudaMath.h:51
fabs
__host__ __device__ float2 fabs(float2 v)
Definition: cudaMath.h:1655
make_int4
__host__ __device__ int4 make_int4(int s)
Definition: cudaMath.h:281
operator*
__host__ __device__ float2 operator*(float2 a, float2 b)
Definition: cudaMath.h:894
fmodf
__host__ __device__ float2 fmodf(float2 a, float2 b)
Definition: cudaMath.h:1638
make_float4
__host__ __device__ float4 make_float4(float s)
Definition: cudaMath.h:248
length
__host__ __device__ float length(float2 v)
Definition: cudaMath.h:1563
make_float2
__host__ __device__ float2 make_float2(float s)
Definition: cudaMath.h:81
operator+
__host__ __device__ float2 operator+(float2 a, float2 b)
Definition: cudaMath.h:390
uchar
unsigned char uchar
Definition: cudaMath.h:37
operator-
__host__ __device__ float2 operator-(float2 &a)
Definition: cudaMath.h:361
make_uchar4
__host__ __device__ uchar4 make_uchar4(uchar s)
Definition: cudaMath.h:320
smoothstep
__device__ __host__ float smoothstep(float a, float b, float x)
Definition: cudaMath.h:1708
operator/
__host__ __device__ float2 operator/(float2 a, float2 b)
Definition: cudaMath.h:1240
fmaxf
float fmaxf(float a, float b)
Definition: cudaMath.h:56
normalize
__host__ __device__ float2 normalize(float2 v)
Definition: cudaMath.h:1580
abs
__host__ __device__ int2 abs(int2 v)
Definition: cudaMath.h:1668
make_int3
__host__ __device__ int3 make_int3(int s)
Definition: cudaMath.h:165
operator*=
__host__ __device__ void operator*=(float2 &a, float2 b)
Definition: cudaMath.h:898