Jetson Inference
DNN Vision Library
cudaMath.h
Go to the documentation of this file.
1 
12 /*
13  * This file implements common mathematical operations on vector types
14  * (float3, float4 etc.) since these are not provided as standard by CUDA.
15  *
16  * The syntax is modeled on the Cg standard library.
17  *
18  * This is part of the Helper library includes
19  *
20  * Thanks to Linh Hah for additions and fixes.
21  */
22 
23 #ifndef __CUDA_HELPER_MATH_H_
24 #define __CUDA_HELPER_MATH_H_
25 
26 #include "cuda_runtime.h"
27 
33 
35 
36 typedef unsigned int uint;
37 typedef unsigned char uchar;
38 typedef unsigned short ushort;
39 
40 #ifndef EXIT_WAIVED
41 #define EXIT_WAIVED 2
42 #endif
43 
44 #ifndef __CUDACC__
45 #include <math.h>
46 
48 // host implementations of CUDA functions
50 
51 inline float fminf(float a, float b)
52 {
53  return a < b ? a : b;
54 }
55 
56 inline float fmaxf(float a, float b)
57 {
58  return a > b ? a : b;
59 }
60 
61 inline int max(int a, int b)
62 {
63  return a > b ? a : b;
64 }
65 
66 inline int min(int a, int b)
67 {
68  return a < b ? a : b;
69 }
70 
71 inline float rsqrtf(float x)
72 {
73  return 1.0f / sqrtf(x);
74 }
75 #endif
76 
78 // constructors
80 
81 inline __host__ __device__ float2 make_float2(float s)
82 {
83  return make_float2(s, s);
84 }
85 inline __host__ __device__ float2 make_float2(float3 a)
86 {
87  return make_float2(a.x, a.y);
88 }
89 inline __host__ __device__ float2 make_float2(int2 a)
90 {
91  return make_float2(float(a.x), float(a.y));
92 }
93 inline __host__ __device__ float2 make_float2(uint2 a)
94 {
95  return make_float2(float(a.x), float(a.y));
96 }
97 
98 inline __host__ __device__ int2 make_int2(int s)
99 {
100  return make_int2(s, s);
101 }
102 inline __host__ __device__ int2 make_int2(int3 a)
103 {
104  return make_int2(a.x, a.y);
105 }
106 inline __host__ __device__ int2 make_int2(uint2 a)
107 {
108  return make_int2(int(a.x), int(a.y));
109 }
110 inline __host__ __device__ int2 make_int2(float2 a)
111 {
112  return make_int2(int(a.x), int(a.y));
113 }
114 
115 inline __host__ __device__ uint2 make_uint2(uint s)
116 {
117  return make_uint2(s, s);
118 }
119 inline __host__ __device__ uint2 make_uint2(uint3 a)
120 {
121  return make_uint2(a.x, a.y);
122 }
123 inline __host__ __device__ uint2 make_uint2(int2 a)
124 {
125  return make_uint2(uint(a.x), uint(a.y));
126 }
127 
128 inline __host__ __device__ float3 make_float3(float s)
129 {
130  return make_float3(s, s, s);
131 }
132 inline __host__ __device__ float3 make_float3(float2 a)
133 {
134  return make_float3(a.x, a.y, 0.0f);
135 }
136 inline __host__ __device__ float3 make_float3(float2 a, float s)
137 {
138  return make_float3(a.x, a.y, s);
139 }
140 inline __host__ __device__ float3 make_float3(float3 a)
141 {
142  return make_float3(a.x, a.y, a.z);
143 }
144 inline __host__ __device__ float3 make_float3(float4 a)
145 {
146  return make_float3(a.x, a.y, a.z);
147 }
148 inline __host__ __device__ float3 make_float3(int3 a)
149 {
150  return make_float3(float(a.x), float(a.y), float(a.z));
151 }
152 inline __host__ __device__ float3 make_float3(uint3 a)
153 {
154  return make_float3(float(a.x), float(a.y), float(a.z));
155 }
156 inline __host__ __device__ float3 make_float3(uchar3 a)
157 {
158  return make_float3(float(a.x), float(a.y), float(a.z));
159 }
160 inline __host__ __device__ float3 make_float3(uchar4 a)
161 {
162  return make_float3(float(a.x), float(a.y), float(a.z));
163 }
164 
165 inline __host__ __device__ int3 make_int3(int s)
166 {
167  return make_int3(s, s, s);
168 }
169 inline __host__ __device__ int3 make_int3(int2 a)
170 {
171  return make_int3(a.x, a.y, 0);
172 }
173 inline __host__ __device__ int3 make_int3(int2 a, int s)
174 {
175  return make_int3(a.x, a.y, s);
176 }
177 inline __host__ __device__ int3 make_int3(uint3 a)
178 {
179  return make_int3(int(a.x), int(a.y), int(a.z));
180 }
181 inline __host__ __device__ int3 make_int3(float3 a)
182 {
183  return make_int3(int(a.x), int(a.y), int(a.z));
184 }
185 
186 inline __host__ __device__ uint3 make_uint3(uint s)
187 {
188  return make_uint3(s, s, s);
189 }
190 inline __host__ __device__ uint3 make_uint3(uint2 a)
191 {
192  return make_uint3(a.x, a.y, 0);
193 }
194 inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
195 {
196  return make_uint3(a.x, a.y, s);
197 }
198 inline __host__ __device__ uint3 make_uint3(uint4 a)
199 {
200  return make_uint3(a.x, a.y, a.z);
201 }
202 inline __host__ __device__ uint3 make_uint3(int3 a)
203 {
204  return make_uint3(uint(a.x), uint(a.y), uint(a.z));
205 }
206 
207 inline __host__ __device__ uchar3 make_uchar3(uchar s)
208 {
209  return make_uchar3(s, s, s);
210 }
211 inline __host__ __device__ uchar3 make_uchar3(uint s)
212 {
213  return make_uchar3(s, s, s);
214 }
215 inline __host__ __device__ uchar3 make_uchar3(uint2 a)
216 {
217  return make_uchar3(a.x, a.y, 0);
218 }
219 inline __host__ __device__ uchar3 make_uchar3(uint2 a, uint s)
220 {
221  return make_uchar3(a.x, a.y, s);
222 }
223 inline __host__ __device__ uchar3 make_uchar3(uint4 a)
224 {
225  return make_uchar3(a.x, a.y, a.z);
226 }
227 inline __host__ __device__ uchar3 make_uchar3(uchar3 a)
228 {
229  return make_uchar3(a.x, a.y, a.z);
230 }
231 inline __host__ __device__ uchar3 make_uchar3(uchar4 a)
232 {
233  return make_uchar3(a.x, a.y, a.z);
234 }
235 inline __host__ __device__ uchar3 make_uchar3(int3 a)
236 {
237  return make_uchar3(uchar(a.x), uchar(a.y), uchar(a.z));
238 }
239 inline __host__ __device__ uchar3 make_uchar3(float3 a)
240 {
241  return make_uchar3(a.x, a.y, a.z);
242 }
243 inline __host__ __device__ uchar3 make_uchar3(float4 a)
244 {
245  return make_uchar3(a.x, a.y, a.z);
246 }
247 
248 inline __host__ __device__ float4 make_float4(float s)
249 {
250  return make_float4(s, s, s, s);
251 }
252 inline __host__ __device__ float4 make_float4(float3 a)
253 {
254  return make_float4(a.x, a.y, a.z, 0.0f);
255 }
256 inline __host__ __device__ float4 make_float4(float4 a)
257 {
258  return make_float4(a.x, a.y, a.z, a.w);
259 }
260 inline __host__ __device__ float4 make_float4(float3 a, float w)
261 {
262  return make_float4(a.x, a.y, a.z, w);
263 }
264 inline __host__ __device__ float4 make_float4(int4 a)
265 {
266  return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
267 }
268 inline __host__ __device__ float4 make_float4(uint4 a)
269 {
270  return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
271 }
272 inline __host__ __device__ float4 make_float4(uchar3 a)
273 {
274  return make_float4(float(a.x), float(a.y), float(a.z), 0.0f);
275 }
276 inline __host__ __device__ float4 make_float4(uchar4 a)
277 {
278  return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
279 }
280 
281 inline __host__ __device__ int4 make_int4(int s)
282 {
283  return make_int4(s, s, s, s);
284 }
285 inline __host__ __device__ int4 make_int4(int3 a)
286 {
287  return make_int4(a.x, a.y, a.z, 0);
288 }
289 inline __host__ __device__ int4 make_int4(int3 a, int w)
290 {
291  return make_int4(a.x, a.y, a.z, w);
292 }
293 inline __host__ __device__ int4 make_int4(uint4 a)
294 {
295  return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
296 }
297 inline __host__ __device__ int4 make_int4(float4 a)
298 {
299  return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
300 }
301 
302 
303 inline __host__ __device__ uint4 make_uint4(uint s)
304 {
305  return make_uint4(s, s, s, s);
306 }
307 inline __host__ __device__ uint4 make_uint4(uint3 a)
308 {
309  return make_uint4(a.x, a.y, a.z, 0);
310 }
311 inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
312 {
313  return make_uint4(a.x, a.y, a.z, w);
314 }
315 inline __host__ __device__ uint4 make_uint4(int4 a)
316 {
317  return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
318 }
319 
320 inline __host__ __device__ uchar4 make_uchar4(uchar s)
321 {
322  return make_uchar4(s, s, s, s);
323 }
324 inline __host__ __device__ uchar4 make_uchar4(uint s)
325 {
326  return make_uchar4(s, s, s, s);
327 }
328 inline __host__ __device__ uchar4 make_uchar4(uint3 a)
329 {
330  return make_uchar4(a.x, a.y, a.z, 0);
331 }
332 inline __host__ __device__ uchar4 make_uchar4(uchar3 a)
333 {
334  return make_uchar4(a.x, a.y, a.z, 0);
335 }
336 inline __host__ __device__ uchar4 make_uchar4(uchar4 a)
337 {
338  return make_uchar4(a.x, a.y, a.z, a.w);
339 }
340 inline __host__ __device__ uchar4 make_uchar4(uint3 a, uint w)
341 {
342  return make_uchar4(a.x, a.y, a.z, w);
343 }
344 inline __host__ __device__ uchar4 make_uchar4(int4 a)
345 {
346  return make_uchar4(uchar(a.x), uchar(a.y), uchar(a.z), uchar(a.w));
347 }
348 inline __host__ __device__ uchar4 make_uchar4(float3 a)
349 {
350  return make_uchar4(a.x, a.y, a.z, 0);
351 }
352 inline __host__ __device__ uchar4 make_uchar4(float4 a)
353 {
354  return make_uchar4(a.x, a.y, a.z, a.w);
355 }
356 
358 // negate
360 
361 inline __host__ __device__ float2 operator-(float2 &a)
362 {
363  return make_float2(-a.x, -a.y);
364 }
365 inline __host__ __device__ int2 operator-(int2 &a)
366 {
367  return make_int2(-a.x, -a.y);
368 }
369 inline __host__ __device__ float3 operator-(float3 &a)
370 {
371  return make_float3(-a.x, -a.y, -a.z);
372 }
373 inline __host__ __device__ int3 operator-(int3 &a)
374 {
375  return make_int3(-a.x, -a.y, -a.z);
376 }
377 inline __host__ __device__ float4 operator-(float4 &a)
378 {
379  return make_float4(-a.x, -a.y, -a.z, -a.w);
380 }
381 inline __host__ __device__ int4 operator-(int4 &a)
382 {
383  return make_int4(-a.x, -a.y, -a.z, -a.w);
384 }
385 
387 // addition
389 
390 inline __host__ __device__ float2 operator+(float2 a, float2 b)
391 {
392  return make_float2(a.x + b.x, a.y + b.y);
393 }
394 inline __host__ __device__ void operator+=(float2 &a, float2 b)
395 {
396  a.x += b.x;
397  a.y += b.y;
398 }
399 inline __host__ __device__ float2 operator+(float2 a, float b)
400 {
401  return make_float2(a.x + b, a.y + b);
402 }
403 inline __host__ __device__ float2 operator+(float b, float2 a)
404 {
405  return make_float2(a.x + b, a.y + b);
406 }
407 inline __host__ __device__ void operator+=(float2 &a, float b)
408 {
409  a.x += b;
410  a.y += b;
411 }
412 
413 inline __host__ __device__ int2 operator+(int2 a, int2 b)
414 {
415  return make_int2(a.x + b.x, a.y + b.y);
416 }
417 inline __host__ __device__ void operator+=(int2 &a, int2 b)
418 {
419  a.x += b.x;
420  a.y += b.y;
421 }
422 inline __host__ __device__ int2 operator+(int2 a, int b)
423 {
424  return make_int2(a.x + b, a.y + b);
425 }
426 inline __host__ __device__ int2 operator+(int b, int2 a)
427 {
428  return make_int2(a.x + b, a.y + b);
429 }
430 inline __host__ __device__ void operator+=(int2 &a, int b)
431 {
432  a.x += b;
433  a.y += b;
434 }
435 
436 inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
437 {
438  return make_uint2(a.x + b.x, a.y + b.y);
439 }
440 inline __host__ __device__ void operator+=(uint2 &a, uint2 b)
441 {
442  a.x += b.x;
443  a.y += b.y;
444 }
445 inline __host__ __device__ uint2 operator+(uint2 a, uint b)
446 {
447  return make_uint2(a.x + b, a.y + b);
448 }
449 inline __host__ __device__ uint2 operator+(uint b, uint2 a)
450 {
451  return make_uint2(a.x + b, a.y + b);
452 }
453 inline __host__ __device__ void operator+=(uint2 &a, uint b)
454 {
455  a.x += b;
456  a.y += b;
457 }
458 
459 
460 inline __host__ __device__ float3 operator+(float3 a, float3 b)
461 {
462  return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
463 }
464 inline __host__ __device__ void operator+=(float3 &a, float3 b)
465 {
466  a.x += b.x;
467  a.y += b.y;
468  a.z += b.z;
469 }
470 inline __host__ __device__ float3 operator+(float3 a, float b)
471 {
472  return make_float3(a.x + b, a.y + b, a.z + b);
473 }
474 inline __host__ __device__ void operator+=(float3 &a, float b)
475 {
476  a.x += b;
477  a.y += b;
478  a.z += b;
479 }
480 
481 inline __host__ __device__ int3 operator+(int3 a, int3 b)
482 {
483  return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
484 }
485 inline __host__ __device__ void operator+=(int3 &a, int3 b)
486 {
487  a.x += b.x;
488  a.y += b.y;
489  a.z += b.z;
490 }
491 inline __host__ __device__ int3 operator+(int3 a, int b)
492 {
493  return make_int3(a.x + b, a.y + b, a.z + b);
494 }
495 inline __host__ __device__ void operator+=(int3 &a, int b)
496 {
497  a.x += b;
498  a.y += b;
499  a.z += b;
500 }
501 
502 inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
503 {
504  return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
505 }
506 inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
507 {
508  a.x += b.x;
509  a.y += b.y;
510  a.z += b.z;
511 }
512 inline __host__ __device__ uint3 operator+(uint3 a, uint b)
513 {
514  return make_uint3(a.x + b, a.y + b, a.z + b);
515 }
516 inline __host__ __device__ void operator+=(uint3 &a, uint b)
517 {
518  a.x += b;
519  a.y += b;
520  a.z += b;
521 }
522 
523 inline __host__ __device__ int3 operator+(int b, int3 a)
524 {
525  return make_int3(a.x + b, a.y + b, a.z + b);
526 }
527 inline __host__ __device__ uint3 operator+(uint b, uint3 a)
528 {
529  return make_uint3(a.x + b, a.y + b, a.z + b);
530 }
531 inline __host__ __device__ float3 operator+(float b, float3 a)
532 {
533  return make_float3(a.x + b, a.y + b, a.z + b);
534 }
535 
536 inline __host__ __device__ float4 operator+(float4 a, float4 b)
537 {
538  return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
539 }
540 inline __host__ __device__ void operator+=(float4 &a, float4 b)
541 {
542  a.x += b.x;
543  a.y += b.y;
544  a.z += b.z;
545  a.w += b.w;
546 }
547 inline __host__ __device__ float4 operator+(float4 a, float b)
548 {
549  return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
550 }
551 inline __host__ __device__ float4 operator+(float b, float4 a)
552 {
553  return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
554 }
555 inline __host__ __device__ void operator+=(float4 &a, float b)
556 {
557  a.x += b;
558  a.y += b;
559  a.z += b;
560  a.w += b;
561 }
562 
563 inline __host__ __device__ int4 operator+(int4 a, int4 b)
564 {
565  return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
566 }
567 inline __host__ __device__ void operator+=(int4 &a, int4 b)
568 {
569  a.x += b.x;
570  a.y += b.y;
571  a.z += b.z;
572  a.w += b.w;
573 }
574 inline __host__ __device__ int4 operator+(int4 a, int b)
575 {
576  return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
577 }
578 inline __host__ __device__ int4 operator+(int b, int4 a)
579 {
580  return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
581 }
582 inline __host__ __device__ void operator+=(int4 &a, int b)
583 {
584  a.x += b;
585  a.y += b;
586  a.z += b;
587  a.w += b;
588 }
589 
590 inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
591 {
592  return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
593 }
594 inline __host__ __device__ void operator+=(uint4 &a, uint4 b)
595 {
596  a.x += b.x;
597  a.y += b.y;
598  a.z += b.z;
599  a.w += b.w;
600 }
601 inline __host__ __device__ uint4 operator+(uint4 a, uint b)
602 {
603  return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
604 }
605 inline __host__ __device__ uint4 operator+(uint b, uint4 a)
606 {
607  return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
608 }
609 inline __host__ __device__ void operator+=(uint4 &a, uint b)
610 {
611  a.x += b;
612  a.y += b;
613  a.z += b;
614  a.w += b;
615 }
616 
618 // subtract
620 
621 inline __host__ __device__ float2 operator-(float2 a, float2 b)
622 {
623  return make_float2(a.x - b.x, a.y - b.y);
624 }
625 inline __host__ __device__ void operator-=(float2 &a, float2 b)
626 {
627  a.x -= b.x;
628  a.y -= b.y;
629 }
630 inline __host__ __device__ float2 operator-(float2 a, float b)
631 {
632  return make_float2(a.x - b, a.y - b);
633 }
634 inline __host__ __device__ float2 operator-(float b, float2 a)
635 {
636  return make_float2(b - a.x, b - a.y);
637 }
638 inline __host__ __device__ void operator-=(float2 &a, float b)
639 {
640  a.x -= b;
641  a.y -= b;
642 }
643 
644 inline __host__ __device__ int2 operator-(int2 a, int2 b)
645 {
646  return make_int2(a.x - b.x, a.y - b.y);
647 }
648 inline __host__ __device__ void operator-=(int2 &a, int2 b)
649 {
650  a.x -= b.x;
651  a.y -= b.y;
652 }
653 inline __host__ __device__ int2 operator-(int2 a, int b)
654 {
655  return make_int2(a.x - b, a.y - b);
656 }
657 inline __host__ __device__ int2 operator-(int b, int2 a)
658 {
659  return make_int2(b - a.x, b - a.y);
660 }
661 inline __host__ __device__ void operator-=(int2 &a, int b)
662 {
663  a.x -= b;
664  a.y -= b;
665 }
666 
667 inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
668 {
669  return make_uint2(a.x - b.x, a.y - b.y);
670 }
671 inline __host__ __device__ void operator-=(uint2 &a, uint2 b)
672 {
673  a.x -= b.x;
674  a.y -= b.y;
675 }
676 inline __host__ __device__ uint2 operator-(uint2 a, uint b)
677 {
678  return make_uint2(a.x - b, a.y - b);
679 }
680 inline __host__ __device__ uint2 operator-(uint b, uint2 a)
681 {
682  return make_uint2(b - a.x, b - a.y);
683 }
684 inline __host__ __device__ void operator-=(uint2 &a, uint b)
685 {
686  a.x -= b;
687  a.y -= b;
688 }
689 
690 inline __host__ __device__ float3 operator-(float3 a, float3 b)
691 {
692  return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
693 }
694 inline __host__ __device__ void operator-=(float3 &a, float3 b)
695 {
696  a.x -= b.x;
697  a.y -= b.y;
698  a.z -= b.z;
699 }
700 inline __host__ __device__ float3 operator-(float3 a, float b)
701 {
702  return make_float3(a.x - b, a.y - b, a.z - b);
703 }
704 inline __host__ __device__ float3 operator-(float b, float3 a)
705 {
706  return make_float3(b - a.x, b - a.y, b - a.z);
707 }
708 inline __host__ __device__ void operator-=(float3 &a, float b)
709 {
710  a.x -= b;
711  a.y -= b;
712  a.z -= b;
713 }
714 
715 inline __host__ __device__ int3 operator-(int3 a, int3 b)
716 {
717  return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
718 }
719 inline __host__ __device__ void operator-=(int3 &a, int3 b)
720 {
721  a.x -= b.x;
722  a.y -= b.y;
723  a.z -= b.z;
724 }
725 inline __host__ __device__ int3 operator-(int3 a, int b)
726 {
727  return make_int3(a.x - b, a.y - b, a.z - b);
728 }
729 inline __host__ __device__ int3 operator-(int b, int3 a)
730 {
731  return make_int3(b - a.x, b - a.y, b - a.z);
732 }
733 inline __host__ __device__ void operator-=(int3 &a, int b)
734 {
735  a.x -= b;
736  a.y -= b;
737  a.z -= b;
738 }
739 
740 inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
741 {
742  return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
743 }
744 inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
745 {
746  a.x -= b.x;
747  a.y -= b.y;
748  a.z -= b.z;
749 }
750 inline __host__ __device__ uint3 operator-(uint3 a, uint b)
751 {
752  return make_uint3(a.x - b, a.y - b, a.z - b);
753 }
754 inline __host__ __device__ uint3 operator-(uint b, uint3 a)
755 {
756  return make_uint3(b - a.x, b - a.y, b - a.z);
757 }
758 inline __host__ __device__ void operator-=(uint3 &a, uint b)
759 {
760  a.x -= b;
761  a.y -= b;
762  a.z -= b;
763 }
764 
765 inline __host__ __device__ float4 operator-(float4 a, float4 b)
766 {
767  return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
768 }
769 inline __host__ __device__ void operator-=(float4 &a, float4 b)
770 {
771  a.x -= b.x;
772  a.y -= b.y;
773  a.z -= b.z;
774  a.w -= b.w;
775 }
776 inline __host__ __device__ float4 operator-(float4 a, float b)
777 {
778  return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);
779 }
780 inline __host__ __device__ void operator-=(float4 &a, float b)
781 {
782  a.x -= b;
783  a.y -= b;
784  a.z -= b;
785  a.w -= b;
786 }
787 
788 inline __host__ __device__ int4 operator-(int4 a, int4 b)
789 {
790  return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
791 }
792 inline __host__ __device__ void operator-=(int4 &a, int4 b)
793 {
794  a.x -= b.x;
795  a.y -= b.y;
796  a.z -= b.z;
797  a.w -= b.w;
798 }
799 inline __host__ __device__ int4 operator-(int4 a, int b)
800 {
801  return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);
802 }
803 inline __host__ __device__ int4 operator-(int b, int4 a)
804 {
805  return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
806 }
807 inline __host__ __device__ void operator-=(int4 &a, int b)
808 {
809  a.x -= b;
810  a.y -= b;
811  a.z -= b;
812  a.w -= b;
813 }
814 
815 inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
816 {
817  return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
818 }
819 inline __host__ __device__ void operator-=(uint4 &a, uint4 b)
820 {
821  a.x -= b.x;
822  a.y -= b.y;
823  a.z -= b.z;
824  a.w -= b.w;
825 }
826 inline __host__ __device__ uint4 operator-(uint4 a, uint b)
827 {
828  return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);
829 }
830 inline __host__ __device__ uint4 operator-(uint b, uint4 a)
831 {
832  return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
833 }
834 inline __host__ __device__ void operator-=(uint4 &a, uint b)
835 {
836  a.x -= b;
837  a.y -= b;
838  a.z -= b;
839  a.w -= b;
840 }
841 
843 // multiply
845 
846 inline __host__ __device__ float2 operator*(float2 a, float2 b)
847 {
848  return make_float2(a.x * b.x, a.y * b.y);
849 }
850 inline __host__ __device__ void operator*=(float2 &a, float2 b)
851 {
852  a.x *= b.x;
853  a.y *= b.y;
854 }
855 inline __host__ __device__ float2 operator*(float2 a, float b)
856 {
857  return make_float2(a.x * b, a.y * b);
858 }
859 inline __host__ __device__ float2 operator*(float b, float2 a)
860 {
861  return make_float2(b * a.x, b * a.y);
862 }
863 inline __host__ __device__ void operator*=(float2 &a, float b)
864 {
865  a.x *= b;
866  a.y *= b;
867 }
868 
869 inline __host__ __device__ int2 operator*(int2 a, int2 b)
870 {
871  return make_int2(a.x * b.x, a.y * b.y);
872 }
873 inline __host__ __device__ void operator*=(int2 &a, int2 b)
874 {
875  a.x *= b.x;
876  a.y *= b.y;
877 }
878 inline __host__ __device__ int2 operator*(int2 a, int b)
879 {
880  return make_int2(a.x * b, a.y * b);
881 }
882 inline __host__ __device__ int2 operator*(int b, int2 a)
883 {
884  return make_int2(b * a.x, b * a.y);
885 }
886 inline __host__ __device__ void operator*=(int2 &a, int b)
887 {
888  a.x *= b;
889  a.y *= b;
890 }
891 
892 inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
893 {
894  return make_uint2(a.x * b.x, a.y * b.y);
895 }
896 inline __host__ __device__ void operator*=(uint2 &a, uint2 b)
897 {
898  a.x *= b.x;
899  a.y *= b.y;
900 }
901 inline __host__ __device__ uint2 operator*(uint2 a, uint b)
902 {
903  return make_uint2(a.x * b, a.y * b);
904 }
905 inline __host__ __device__ uint2 operator*(uint b, uint2 a)
906 {
907  return make_uint2(b * a.x, b * a.y);
908 }
909 inline __host__ __device__ void operator*=(uint2 &a, uint b)
910 {
911  a.x *= b;
912  a.y *= b;
913 }
914 
915 inline __host__ __device__ float3 operator*(float3 a, float3 b)
916 {
917  return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
918 }
919 inline __host__ __device__ void operator*=(float3 &a, float3 b)
920 {
921  a.x *= b.x;
922  a.y *= b.y;
923  a.z *= b.z;
924 }
925 inline __host__ __device__ float3 operator*(float3 a, float b)
926 {
927  return make_float3(a.x * b, a.y * b, a.z * b);
928 }
929 inline __host__ __device__ float3 operator*(float b, float3 a)
930 {
931  return make_float3(b * a.x, b * a.y, b * a.z);
932 }
933 inline __host__ __device__ void operator*=(float3 &a, float b)
934 {
935  a.x *= b;
936  a.y *= b;
937  a.z *= b;
938 }
939 
940 inline __host__ __device__ int3 operator*(int3 a, int3 b)
941 {
942  return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
943 }
944 inline __host__ __device__ void operator*=(int3 &a, int3 b)
945 {
946  a.x *= b.x;
947  a.y *= b.y;
948  a.z *= b.z;
949 }
950 inline __host__ __device__ int3 operator*(int3 a, int b)
951 {
952  return make_int3(a.x * b, a.y * b, a.z * b);
953 }
954 inline __host__ __device__ int3 operator*(int b, int3 a)
955 {
956  return make_int3(b * a.x, b * a.y, b * a.z);
957 }
958 inline __host__ __device__ void operator*=(int3 &a, int b)
959 {
960  a.x *= b;
961  a.y *= b;
962  a.z *= b;
963 }
964 
965 inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
966 {
967  return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
968 }
969 inline __host__ __device__ void operator*=(uint3 &a, uint3 b)
970 {
971  a.x *= b.x;
972  a.y *= b.y;
973  a.z *= b.z;
974 }
975 inline __host__ __device__ uint3 operator*(uint3 a, uint b)
976 {
977  return make_uint3(a.x * b, a.y * b, a.z * b);
978 }
979 inline __host__ __device__ uint3 operator*(uint b, uint3 a)
980 {
981  return make_uint3(b * a.x, b * a.y, b * a.z);
982 }
983 inline __host__ __device__ void operator*=(uint3 &a, uint b)
984 {
985  a.x *= b;
986  a.y *= b;
987  a.z *= b;
988 }
989 
990 inline __host__ __device__ float4 operator*(float4 a, float4 b)
991 {
992  return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
993 }
994 inline __host__ __device__ void operator*=(float4 &a, float4 b)
995 {
996  a.x *= b.x;
997  a.y *= b.y;
998  a.z *= b.z;
999  a.w *= b.w;
1000 }
1001 inline __host__ __device__ float4 operator*(float4 a, float b)
1002 {
1003  return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
1004 }
1005 inline __host__ __device__ float4 operator*(float b, float4 a)
1006 {
1007  return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
1008 }
1009 inline __host__ __device__ void operator*=(float4 &a, float b)
1010 {
1011  a.x *= b;
1012  a.y *= b;
1013  a.z *= b;
1014  a.w *= b;
1015 }
1016 
1017 inline __host__ __device__ int4 operator*(int4 a, int4 b)
1018 {
1019  return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1020 }
1021 inline __host__ __device__ void operator*=(int4 &a, int4 b)
1022 {
1023  a.x *= b.x;
1024  a.y *= b.y;
1025  a.z *= b.z;
1026  a.w *= b.w;
1027 }
1028 inline __host__ __device__ int4 operator*(int4 a, int b)
1029 {
1030  return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);
1031 }
1032 inline __host__ __device__ int4 operator*(int b, int4 a)
1033 {
1034  return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
1035 }
1036 inline __host__ __device__ void operator*=(int4 &a, int b)
1037 {
1038  a.x *= b;
1039  a.y *= b;
1040  a.z *= b;
1041  a.w *= b;
1042 }
1043 
1044 inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
1045 {
1046  return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
1047 }
1048 inline __host__ __device__ void operator*=(uint4 &a, uint4 b)
1049 {
1050  a.x *= b.x;
1051  a.y *= b.y;
1052  a.z *= b.z;
1053  a.w *= b.w;
1054 }
1055 inline __host__ __device__ uint4 operator*(uint4 a, uint b)
1056 {
1057  return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);
1058 }
1059 inline __host__ __device__ uint4 operator*(uint b, uint4 a)
1060 {
1061  return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
1062 }
1063 inline __host__ __device__ void operator*=(uint4 &a, uint b)
1064 {
1065  a.x *= b;
1066  a.y *= b;
1067  a.z *= b;
1068  a.w *= b;
1069 }
1070 
1072 // divide
1074 
1075 inline __host__ __device__ float2 operator/(float2 a, float2 b)
1076 {
1077  return make_float2(a.x / b.x, a.y / b.y);
1078 }
1079 inline __host__ __device__ void operator/=(float2 &a, float2 b)
1080 {
1081  a.x /= b.x;
1082  a.y /= b.y;
1083 }
1084 inline __host__ __device__ float2 operator/(float2 a, float b)
1085 {
1086  return make_float2(a.x / b, a.y / b);
1087 }
1088 inline __host__ __device__ void operator/=(float2 &a, float b)
1089 {
1090  a.x /= b;
1091  a.y /= b;
1092 }
1093 inline __host__ __device__ float2 operator/(float b, float2 a)
1094 {
1095  return make_float2(b / a.x, b / a.y);
1096 }
1097 
1098 inline __host__ __device__ float3 operator/(float3 a, float3 b)
1099 {
1100  return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
1101 }
1102 inline __host__ __device__ void operator/=(float3 &a, float3 b)
1103 {
1104  a.x /= b.x;
1105  a.y /= b.y;
1106  a.z /= b.z;
1107 }
1108 inline __host__ __device__ float3 operator/(float3 a, float b)
1109 {
1110  return make_float3(a.x / b, a.y / b, a.z / b);
1111 }
1112 inline __host__ __device__ void operator/=(float3 &a, float b)
1113 {
1114  a.x /= b;
1115  a.y /= b;
1116  a.z /= b;
1117 }
1118 inline __host__ __device__ float3 operator/(float b, float3 a)
1119 {
1120  return make_float3(b / a.x, b / a.y, b / a.z);
1121 }
1122 
1123 inline __host__ __device__ float4 operator/(float4 a, float4 b)
1124 {
1125  return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
1126 }
1127 inline __host__ __device__ void operator/=(float4 &a, float4 b)
1128 {
1129  a.x /= b.x;
1130  a.y /= b.y;
1131  a.z /= b.z;
1132  a.w /= b.w;
1133 }
1134 inline __host__ __device__ float4 operator/(float4 a, float b)
1135 {
1136  return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
1137 }
1138 inline __host__ __device__ void operator/=(float4 &a, float b)
1139 {
1140  a.x /= b;
1141  a.y /= b;
1142  a.z /= b;
1143  a.w /= b;
1144 }
1145 inline __host__ __device__ float4 operator/(float b, float4 a)
1146 {
1147  return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
1148 }
1149 
1151 // min
1153 
1154 inline __host__ __device__ float2 fminf(float2 a, float2 b)
1155 {
1156  return make_float2(fminf(a.x,b.x), fminf(a.y,b.y));
1157 }
1158 inline __host__ __device__ float3 fminf(float3 a, float3 b)
1159 {
1160  return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
1161 }
1162 inline __host__ __device__ float4 fminf(float4 a, float4 b)
1163 {
1164  return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
1165 }
1166 
1167 inline __host__ __device__ int2 min(int2 a, int2 b)
1168 {
1169  return make_int2(min(a.x,b.x), min(a.y,b.y));
1170 }
1171 inline __host__ __device__ int3 min(int3 a, int3 b)
1172 {
1173  return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
1174 }
1175 inline __host__ __device__ int4 min(int4 a, int4 b)
1176 {
1177  return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
1178 }
1179 
1180 inline __host__ __device__ uint2 min(uint2 a, uint2 b)
1181 {
1182  return make_uint2(min(a.x,b.x), min(a.y,b.y));
1183 }
1184 inline __host__ __device__ uint3 min(uint3 a, uint3 b)
1185 {
1186  return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
1187 }
1188 inline __host__ __device__ uint4 min(uint4 a, uint4 b)
1189 {
1190  return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
1191 }
1192 
1194 // max
1196 
1197 inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
1198 {
1199  return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y));
1200 }
1201 inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
1202 {
1203  return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
1204 }
1205 inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
1206 {
1207  return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
1208 }
1209 
1210 inline __host__ __device__ int2 max(int2 a, int2 b)
1211 {
1212  return make_int2(max(a.x,b.x), max(a.y,b.y));
1213 }
1214 inline __host__ __device__ int3 max(int3 a, int3 b)
1215 {
1216  return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
1217 }
1218 inline __host__ __device__ int4 max(int4 a, int4 b)
1219 {
1220  return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
1221 }
1222 
1223 inline __host__ __device__ uint2 max(uint2 a, uint2 b)
1224 {
1225  return make_uint2(max(a.x,b.x), max(a.y,b.y));
1226 }
1227 inline __host__ __device__ uint3 max(uint3 a, uint3 b)
1228 {
1229  return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
1230 }
1231 inline __host__ __device__ uint4 max(uint4 a, uint4 b)
1232 {
1233  return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
1234 }
1235 
1237 // lerp
1238 // - linear interpolation between a and b, based on value t in [0, 1] range
1240 
1241 inline __device__ __host__ float lerp(float a, float b, float t)
1242 {
1243  return a + t*(b-a);
1244 }
1245 inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
1246 {
1247  return a + t*(b-a);
1248 }
1249 inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
1250 {
1251  return a + t*(b-a);
1252 }
1253 inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
1254 {
1255  return a + t*(b-a);
1256 }
1257 
1259 // clamp
1260 // - clamp the value v to be in the range [a, b]
1262 
1263 inline __device__ __host__ float clamp(float f, float a, float b)
1264 {
1265  return fmaxf(a, fminf(f, b));
1266 }
1267 inline __device__ __host__ int clamp(int f, int a, int b)
1268 {
1269  return max(a, min(f, b));
1270 }
1271 inline __device__ __host__ uint clamp(uint f, uint a, uint b)
1272 {
1273  return max(a, min(f, b));
1274 }
1275 
1276 inline __device__ __host__ float2 clamp(float2 v, float a, float b)
1277 {
1278  return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
1279 }
1280 inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
1281 {
1282  return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1283 }
1284 inline __device__ __host__ float3 clamp(float3 v, float a, float b)
1285 {
1286  return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1287 }
1288 inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
1289 {
1290  return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1291 }
1292 inline __device__ __host__ float4 clamp(float4 v, float a, float b)
1293 {
1294  return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1295 }
1296 inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
1297 {
1298  return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1299 }
1300 
1301 inline __device__ __host__ int2 clamp(int2 v, int a, int b)
1302 {
1303  return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
1304 }
1305 inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
1306 {
1307  return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1308 }
1309 inline __device__ __host__ int3 clamp(int3 v, int a, int b)
1310 {
1311  return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1312 }
1313 inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
1314 {
1315  return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1316 }
1317 inline __device__ __host__ int4 clamp(int4 v, int a, int b)
1318 {
1319  return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1320 }
1321 inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
1322 {
1323  return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1324 }
1325 
1326 inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
1327 {
1328  return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
1329 }
1330 inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
1331 {
1332  return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1333 }
1334 inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
1335 {
1336  return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1337 }
1338 inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
1339 {
1340  return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1341 }
1342 inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
1343 {
1344  return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1345 }
1346 inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
1347 {
1348  return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1349 }
1350 
1352 // dot product
1354 
1355 inline __host__ __device__ float dot(float2 a, float2 b)
1356 {
1357  return a.x * b.x + a.y * b.y;
1358 }
1359 inline __host__ __device__ float dot(float3 a, float3 b)
1360 {
1361  return a.x * b.x + a.y * b.y + a.z * b.z;
1362 }
1363 inline __host__ __device__ float dot(float4 a, float4 b)
1364 {
1365  return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1366 }
1367 
1368 inline __host__ __device__ int dot(int2 a, int2 b)
1369 {
1370  return a.x * b.x + a.y * b.y;
1371 }
1372 inline __host__ __device__ int dot(int3 a, int3 b)
1373 {
1374  return a.x * b.x + a.y * b.y + a.z * b.z;
1375 }
1376 inline __host__ __device__ int dot(int4 a, int4 b)
1377 {
1378  return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1379 }
1380 
1381 inline __host__ __device__ uint dot(uint2 a, uint2 b)
1382 {
1383  return a.x * b.x + a.y * b.y;
1384 }
1385 inline __host__ __device__ uint dot(uint3 a, uint3 b)
1386 {
1387  return a.x * b.x + a.y * b.y + a.z * b.z;
1388 }
1389 inline __host__ __device__ uint dot(uint4 a, uint4 b)
1390 {
1391  return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1392 }
1393 
1395 // length
1397 
1398 inline __host__ __device__ float length(float2 v)
1399 {
1400  return sqrtf(dot(v, v));
1401 }
1402 inline __host__ __device__ float length(float3 v)
1403 {
1404  return sqrtf(dot(v, v));
1405 }
1406 inline __host__ __device__ float length(float4 v)
1407 {
1408  return sqrtf(dot(v, v));
1409 }
1410 
1412 // normalize
1414 
1415 inline __host__ __device__ float2 normalize(float2 v)
1416 {
1417  float invLen = rsqrtf(dot(v, v));
1418  return v * invLen;
1419 }
1420 inline __host__ __device__ float3 normalize(float3 v)
1421 {
1422  float invLen = rsqrtf(dot(v, v));
1423  return v * invLen;
1424 }
1425 inline __host__ __device__ float4 normalize(float4 v)
1426 {
1427  float invLen = rsqrtf(dot(v, v));
1428  return v * invLen;
1429 }
1430 
1432 // floor
1434 
1435 inline __host__ __device__ float2 floorf(float2 v)
1436 {
1437  return make_float2(floorf(v.x), floorf(v.y));
1438 }
1439 inline __host__ __device__ float3 floorf(float3 v)
1440 {
1441  return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
1442 }
1443 inline __host__ __device__ float4 floorf(float4 v)
1444 {
1445  return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
1446 }
1447 
1449 // frac - returns the fractional portion of a scalar or each vector component
1451 
1452 inline __host__ __device__ float fracf(float v)
1453 {
1454  return v - floorf(v);
1455 }
1456 inline __host__ __device__ float2 fracf(float2 v)
1457 {
1458  return make_float2(fracf(v.x), fracf(v.y));
1459 }
1460 inline __host__ __device__ float3 fracf(float3 v)
1461 {
1462  return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
1463 }
1464 inline __host__ __device__ float4 fracf(float4 v)
1465 {
1466  return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
1467 }
1468 
1470 // fmod
1472 
1473 inline __host__ __device__ float2 fmodf(float2 a, float2 b)
1474 {
1475  return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
1476 }
1477 inline __host__ __device__ float3 fmodf(float3 a, float3 b)
1478 {
1479  return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
1480 }
1481 inline __host__ __device__ float4 fmodf(float4 a, float4 b)
1482 {
1483  return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
1484 }
1485 
1487 // absolute value
1489 
1490 inline __host__ __device__ float2 fabs(float2 v)
1491 {
1492  return make_float2(fabs(v.x), fabs(v.y));
1493 }
1494 inline __host__ __device__ float3 fabs(float3 v)
1495 {
1496  return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
1497 }
1498 inline __host__ __device__ float4 fabs(float4 v)
1499 {
1500  return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
1501 }
1502 
1503 inline __host__ __device__ int2 abs(int2 v)
1504 {
1505  return make_int2(abs(v.x), abs(v.y));
1506 }
1507 inline __host__ __device__ int3 abs(int3 v)
1508 {
1509  return make_int3(abs(v.x), abs(v.y), abs(v.z));
1510 }
1511 inline __host__ __device__ int4 abs(int4 v)
1512 {
1513  return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
1514 }
1515 
1517 // reflect
1518 // - returns reflection of incident ray I around surface normal N
1519 // - N should be normalized, reflected vector's length is equal to length of I
1521 
1522 inline __host__ __device__ float3 reflect(float3 i, float3 n)
1523 {
1524  return i - 2.0f * n * dot(n,i);
1525 }
1526 
1528 // cross product
1530 
1531 inline __host__ __device__ float3 cross(float3 a, float3 b)
1532 {
1533  return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
1534 }
1535 
1537 // smoothstep
1538 // - returns 0 if x < a
1539 // - returns 1 if x > b
1540 // - otherwise returns smooth interpolation between 0 and 1 based on x
1542 
1543 inline __device__ __host__ float smoothstep(float a, float b, float x)
1544 {
1545  float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1546  return (y*y*(3.0f - (2.0f*y)));
1547 }
1548 inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
1549 {
1550  float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1551  return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y)));
1552 }
1553 inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
1554 {
1555  float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1556  return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y)));
1557 }
1558 inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
1559 {
1560  float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1561  return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y)));
1562 }
1563 
1565 
1566 #endif
1567 
int max(int a, int b)
Definition: cudaMath.h:61
float fmaxf(float a, float b)
Definition: cudaMath.h:56
__host__ __device__ float2 operator*(float2 a, float2 b)
Definition: cudaMath.h:846
__host__ __device__ uint4 make_uint4(uint s)
Definition: cudaMath.h:303
__host__ __device__ float2 make_float2(float s)
Definition: cudaMath.h:81
__host__ __device__ uint3 make_uint3(uint s)
Definition: cudaMath.h:186
__host__ __device__ float fracf(float v)
Definition: cudaMath.h:1452
__host__ __device__ float dot(float2 a, float2 b)
Definition: cudaMath.h:1355
__host__ __device__ float2 fmodf(float2 a, float2 b)
Definition: cudaMath.h:1473
__host__ __device__ float2 operator/(float2 a, float2 b)
Definition: cudaMath.h:1075
float fminf(float a, float b)
Definition: cudaMath.h:51
__host__ __device__ float2 operator-(float2 &a)
Definition: cudaMath.h:361
__device__ __host__ float smoothstep(float a, float b, float x)
Definition: cudaMath.h:1543
int min(int a, int b)
Definition: cudaMath.h:66
__host__ __device__ float2 normalize(float2 v)
Definition: cudaMath.h:1415
__host__ __device__ void operator*=(float2 &a, float2 b)
Definition: cudaMath.h:850
__host__ __device__ float3 make_float3(float s)
Definition: cudaMath.h:128
__host__ __device__ int3 make_int3(int s)
Definition: cudaMath.h:165
__device__ __host__ float clamp(float f, float a, float b)
Definition: cudaMath.h:1263
__host__ __device__ uint2 make_uint2(uint s)
Definition: cudaMath.h:115
__host__ __device__ float2 operator+(float2 a, float2 b)
Definition: cudaMath.h:390
__host__ __device__ int2 make_int2(int s)
Definition: cudaMath.h:98
__host__ __device__ void operator/=(float2 &a, float2 b)
Definition: cudaMath.h:1079
__device__ __host__ float lerp(float a, float b, float t)
Definition: cudaMath.h:1241
__host__ __device__ uchar4 make_uchar4(uchar s)
Definition: cudaMath.h:320
__host__ __device__ uchar3 make_uchar3(uchar s)
Definition: cudaMath.h:207
__host__ __device__ void operator+=(float2 &a, float2 b)
Definition: cudaMath.h:394
unsigned int uint
Definition: cudaMath.h:36
__host__ __device__ int2 abs(int2 v)
Definition: cudaMath.h:1503
__host__ __device__ float4 make_float4(float s)
Definition: cudaMath.h:248
unsigned short ushort
Definition: cudaMath.h:38
float rsqrtf(float x)
Definition: cudaMath.h:71
__host__ __device__ float length(float2 v)
Definition: cudaMath.h:1398
unsigned char uchar
Definition: cudaMath.h:37
__host__ __device__ float3 reflect(float3 i, float3 n)
Definition: cudaMath.h:1522
__host__ __device__ float2 floorf(float2 v)
Definition: cudaMath.h:1435
__host__ __device__ void operator-=(float2 &a, float2 b)
Definition: cudaMath.h:625
__host__ __device__ float2 fabs(float2 v)
Definition: cudaMath.h:1490
__host__ __device__ float3 cross(float3 a, float3 b)
Definition: cudaMath.h:1531
__host__ __device__ int4 make_int4(int s)
Definition: cudaMath.h:281