-
Notifications
You must be signed in to change notification settings - Fork 10
/
ml.hlsli
3094 lines (2427 loc) · 105 KB
/
ml.hlsli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// © 2021 NVIDIA Corporation
#ifndef ML_HLSLI
#define ML_HLSLI
#define compiletime
#ifndef __cplusplus
#define ML_Unused( x )
#define ML_INLINE
#define ML_OUT( T ) out T
#define ML_INOUT( T ) inout T
#else
#define ML_OUT( T ) T&
#define ML_INOUT( T ) T&
#endif
//===========================================================================================================================
// Settings
//===========================================================================================================================
#define ML_WINDOW_ORIGIN_OGL 0
// Math
#define ML_SIGN_DEFAULT ML_SIGN_FAST
#define ML_SQRT_DEFAULT ML_SQRT_SAFE
#define ML_RSQRT_DEFAULT ML_POSITIVE_RSQRT_ACCURATE_SAFE
#define ML_POSITIVE_RCP_DEFAULT ML_POSITIVE_RCP_ACCURATE_SAFE
#define ML_SMALL_EPS 1e-15f
#define ML_EPS 1e-6f
// BRDF
#define ML_SPECULAR_DOMINANT_DIRECTION_DEFAULT ML_SPECULAR_DOMINANT_DIRECTION_APPROX
#define ML_RF0_DIELECTRICS 0.04f
#define ML_GTR_GAMMA 1.5f
#define ML_VNDF_VERSION 3
// Text
#define ML_TEXT_DIGIT_FORMAT 10000
#define ML_TEXT_WITH_NICE_ONE_PIXEL_BACKGROUND 0
// Other
#define ML_RNG_NEXT_MODE ML_RNG_HASH
#define ML_RNG_FLOAT01_MODE ML_RNG_MANTISSA_BITS
#define ML_BAYER_DEFAULT ML_BAYER_REVERSEBITS
#ifdef ML_NAMESPACE
namespace ml
{
#endif
//=======================================================================================================================
// MATH
//=======================================================================================================================
namespace Math
{
// Pi
#define _Pi( x ) radians( x * 180.0f )
ML_INLINE float Pi( float x )
{ return _Pi( x ); }
ML_INLINE float2 Pi( float2 x )
{ return _Pi( x ); }
ML_INLINE float3 Pi( float3 x )
{ return _Pi( x ); }
ML_INLINE float4 Pi( float4 x )
{ return _Pi( x ); }
// Radians to degrees
#define _RadToDeg( x ) ( x * 180.0f / Pi( 1.0f ) )
ML_INLINE float RadToDeg( float x )
{ return _RadToDeg( x ); }
ML_INLINE float2 RadToDeg( float2 x )
{ return _RadToDeg( x ); }
ML_INLINE float3 RadToDeg( float3 x )
{ return _RadToDeg( x ); }
ML_INLINE float4 RadToDeg( float4 x )
{ return _RadToDeg( x ); }
// Degrees to radians
#define _DegToRad( x ) ( x * Pi( 1.0f ) / 180.0f )
ML_INLINE float DegToRad( float x )
{ return _DegToRad( x ); }
ML_INLINE float2 DegToRad( float2 x )
{ return _DegToRad( x ); }
ML_INLINE float3 DegToRad( float3 x )
{ return _DegToRad( x ); }
ML_INLINE float4 DegToRad( float4 x )
{ return _DegToRad( x ); }
// LinearStep
#define _LinearStep( a, b, x ) saturate( ( x - a ) / ( b - a ) )
ML_INLINE float LinearStep( float a, float b, float x )
{ return _LinearStep( a, b, x ); }
ML_INLINE float2 LinearStep( float2 a, float2 b, float2 x )
{ return _LinearStep( a, b, x ); }
ML_INLINE float3 LinearStep( float3 a, float3 b, float3 x )
{ return _LinearStep( a, b, x ); }
ML_INLINE float4 LinearStep( float4 a, float4 b, float4 x )
{ return _LinearStep( a, b, x ); }
// SmoothStep
#define _SmoothStep01( x ) ( x * x * ( 3.0f - x * 2.0f ) )
ML_INLINE float SmoothStep01( float x )
{ return _SmoothStep01( saturate( x ) ); }
ML_INLINE float2 SmoothStep01( float2 x )
{ return _SmoothStep01( saturate( x ) ); }
ML_INLINE float3 SmoothStep01( float3 x )
{ return _SmoothStep01( saturate( x ) ); }
ML_INLINE float4 SmoothStep01( float4 x )
{ return _SmoothStep01( saturate( x ) ); }
ML_INLINE float SmoothStep( float a, float b, float x )
{ x = _LinearStep( a, b, x ); return _SmoothStep01( x ); }
ML_INLINE float2 SmoothStep( float2 a, float2 b, float2 x )
{ x = _LinearStep( a, b, x ); return _SmoothStep01( x ); }
ML_INLINE float3 SmoothStep( float3 a, float3 b, float3 x )
{ x = _LinearStep( a, b, x ); return _SmoothStep01( x ); }
ML_INLINE float4 SmoothStep( float4 a, float4 b, float4 x )
{ x = _LinearStep( a, b, x ); return _SmoothStep01( x ); }
// SmootherStep
// https://en.wikipedia.org/wiki/Smoothstep
#define _SmootherStep01( x ) ( x * x * x * ( x * ( x * 6.0f - 15.0f ) + 10.0f ) )
ML_INLINE float SmootherStep( float a, float b, float x )
{ x = _LinearStep( a, b, x ); return _SmootherStep01( x ); }
ML_INLINE float2 SmootherStep( float2 a, float2 b, float2 x )
{ x = _LinearStep( a, b, x ); return _SmootherStep01( x ); }
ML_INLINE float3 SmootherStep( float3 a, float3 b, float3 x )
{ x = _LinearStep( a, b, x ); return _SmootherStep01( x ); }
ML_INLINE float4 SmootherStep( float4 a, float4 b, float4 x )
{ x = _LinearStep( a, b, x ); return _SmootherStep01( x ); }
// Sign
#define ML_SIGN_BUILTIN 0
#define ML_SIGN_FAST 1
#define _Sign( x ) ( step( 0.0f, x ) * 2.0f - 1.0f )
ML_INLINE float Sign( float x, compiletime const uint mode = ML_SIGN_DEFAULT )
{ return mode == ML_SIGN_FAST ? _Sign( x ) : sign( x ); }
ML_INLINE float2 Sign( float2 x, compiletime const uint mode = ML_SIGN_DEFAULT )
{ return mode == ML_SIGN_FAST ? _Sign( x ) : sign( x ); }
ML_INLINE float3 Sign( float3 x, compiletime const uint mode = ML_SIGN_DEFAULT )
{ return mode == ML_SIGN_FAST ? _Sign( x ) : sign( x ); }
ML_INLINE float4 Sign( float4 x, compiletime const uint mode = ML_SIGN_DEFAULT )
{ return mode == ML_SIGN_FAST ? _Sign( x ) : sign( x ); }
// Pow
ML_INLINE float Pow( float x, float y )
{ return pow( abs( x ), y ); }
ML_INLINE float2 Pow( float2 x, float y )
{ return pow( abs( x ), y ); }
ML_INLINE float2 Pow( float2 x, float2 y )
{ return pow( abs( x ), y ); }
ML_INLINE float3 Pow( float3 x, float y )
{ return pow( abs( x ), y ); }
ML_INLINE float3 Pow( float3 x, float3 y )
{ return pow( abs( x ), y ); }
ML_INLINE float4 Pow( float4 x, float y )
{ return pow( abs( x ), y ); }
ML_INLINE float4 Pow( float4 x, float4 y )
{ return pow( abs( x ), y ); }
// Pow for values in range [0; 1]
ML_INLINE float Pow01( float x, float y )
{ return pow( saturate( x ), y ); }
ML_INLINE float2 Pow01( float2 x, float y )
{ return pow( saturate( x ), y ); }
ML_INLINE float2 Pow01( float2 x, float2 y )
{ return pow( saturate( x ), y ); }
ML_INLINE float3 Pow01( float3 x, float y )
{ return pow( saturate( x ), y ); }
ML_INLINE float3 Pow01( float3 x, float3 y )
{ return pow( saturate( x ), y ); }
ML_INLINE float4 Pow01( float4 x, float y )
{ return pow( saturate( x ), y ); }
ML_INLINE float4 Pow01( float4 x, float4 y )
{ return pow( saturate( x ), y ); }
// Sqrt
#define ML_SQRT_BUILTIN 0
#define ML_SQRT_SAFE 1
ML_INLINE float Sqrt( float x, compiletime const uint mode = ML_SQRT_DEFAULT )
{ return sqrt( mode == ML_SQRT_SAFE ? max( x, 0.0f ) : x ); }
ML_INLINE float2 Sqrt( float2 x, compiletime const uint mode = ML_SQRT_DEFAULT )
{ return sqrt( mode == ML_SQRT_SAFE ? max( x, 0.0f ) : x ); }
ML_INLINE float3 Sqrt( float3 x, compiletime const uint mode = ML_SQRT_DEFAULT )
{ return sqrt( mode == ML_SQRT_SAFE ? max( x, 0.0f ) : x ); }
ML_INLINE float4 Sqrt( float4 x, compiletime const uint mode = ML_SQRT_DEFAULT )
{ return sqrt( mode == ML_SQRT_SAFE ? max( x, 0.0f ) : x ); }
// Sqrt for values in range [0; 1]
ML_INLINE float Sqrt01( float x )
{ return sqrt( saturate( x ) ); }
ML_INLINE float2 Sqrt01( float2 x )
{ return sqrt( saturate( x ) ); }
ML_INLINE float3 Sqrt01( float3 x )
{ return sqrt( saturate( x ) ); }
ML_INLINE float4 Sqrt01( float4 x )
{ return sqrt( saturate( x ) ); }
// 1 / Sqrt
#define ML_POSITIVE_RSQRT_BUILTIN 0
#define ML_POSITIVE_RSQRT_BUILTIN_SAFE 1
#define ML_POSITIVE_RSQRT_ACCURATE 2
#define ML_POSITIVE_RSQRT_ACCURATE_SAFE 3
ML_INLINE float Rsqrt( float x, compiletime const uint mode = ML_RSQRT_DEFAULT )
{
if( mode <= ML_POSITIVE_RSQRT_BUILTIN_SAFE )
return rsqrt( mode == ML_POSITIVE_RSQRT_BUILTIN ? x : max( x, ML_SMALL_EPS ) );
return 1.0f / sqrt( mode == ML_POSITIVE_RSQRT_ACCURATE ? x : max( x, ML_SMALL_EPS ) );
}
ML_INLINE float2 Rsqrt( float2 x, compiletime const uint mode = ML_RSQRT_DEFAULT )
{
if( mode <= ML_POSITIVE_RSQRT_BUILTIN_SAFE )
return rsqrt( mode == ML_POSITIVE_RSQRT_BUILTIN ? x : max( x, ML_SMALL_EPS ) );
return 1.0f / sqrt( mode == ML_POSITIVE_RSQRT_ACCURATE ? x : max( x, ML_SMALL_EPS ) );
}
ML_INLINE float3 Rsqrt( float3 x, compiletime const uint mode = ML_RSQRT_DEFAULT )
{
if( mode <= ML_POSITIVE_RSQRT_BUILTIN_SAFE )
return rsqrt( mode == ML_POSITIVE_RSQRT_BUILTIN ? x : max( x, ML_SMALL_EPS ) );
return 1.0f / sqrt( mode == ML_POSITIVE_RSQRT_ACCURATE ? x : max( x, ML_SMALL_EPS ) );
}
ML_INLINE float4 Rsqrt( float4 x, compiletime const uint mode = ML_RSQRT_DEFAULT )
{
if( mode <= ML_POSITIVE_RSQRT_BUILTIN_SAFE )
return rsqrt( mode == ML_POSITIVE_RSQRT_BUILTIN ? x : max( x, ML_SMALL_EPS ) );
return 1.0f / sqrt( mode == ML_POSITIVE_RSQRT_ACCURATE ? x : max( x, ML_SMALL_EPS ) );
}
// Acos(x) (approximate)
// https://www.desmos.com/calculator/x6ut8ros1u
#define _AcosApprox( x ) ( sqrt( 2.0f ) * sqrt( saturate( 1.0f - x ) ) )
ML_INLINE float AcosApprox( float x )
{ return _AcosApprox( x ); }
ML_INLINE float2 AcosApprox( float2 x )
{ return _AcosApprox( x ); }
ML_INLINE float3 AcosApprox( float3 x )
{ return _AcosApprox( x ); }
ML_INLINE float4 AcosApprox( float4 x )
{ return _AcosApprox( x ); }
// Atan(x) (approximate, for x in range [-1; 1])
// https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1628884
// https://www.desmos.com/calculator/0h8hv7kfp6
#define _AtanApprox( x ) ( Math::Pi( 0.25f ) * x - ( abs( x ) * x - x ) * ( 0.2447f + 0.0663f * abs( x ) ) )
ML_INLINE float AtanApprox( float x )
{ return _AtanApprox( x ); }
ML_INLINE float2 AtanApprox( float2 x )
{ return _AtanApprox( x ); }
ML_INLINE float3 AtanApprox( float3 x )
{ return _AtanApprox( x ); }
ML_INLINE float4 AtanApprox( float4 x )
{ return _AtanApprox( x ); }
// 1 / positive
#define ML_POSITIVE_RCP_BUILTIN 0
#define ML_POSITIVE_RCP_BUILTIN_SAFE 1
#define ML_POSITIVE_RCP_ACCURATE 2
#define ML_POSITIVE_RCP_ACCURATE_SAFE 3
ML_INLINE float PositiveRcp( float x, compiletime const uint mode = ML_POSITIVE_RCP_DEFAULT )
{
if( mode <= ML_POSITIVE_RCP_BUILTIN_SAFE )
return rcp( mode == ML_POSITIVE_RCP_BUILTIN ? x : max( x, ML_SMALL_EPS ) );
return 1.0f / ( mode == ML_POSITIVE_RCP_ACCURATE ? x : max( x, ML_SMALL_EPS ) );
}
ML_INLINE float2 PositiveRcp( float2 x, compiletime const uint mode = ML_POSITIVE_RCP_DEFAULT )
{
if( mode <= ML_POSITIVE_RCP_BUILTIN_SAFE )
return rcp( mode == ML_POSITIVE_RCP_BUILTIN ? x : max( x, ML_SMALL_EPS ) );
return 1.0f / ( mode == ML_POSITIVE_RCP_ACCURATE ? x : max( x, ML_SMALL_EPS ) );
}
ML_INLINE float3 PositiveRcp( float3 x, compiletime const uint mode = ML_POSITIVE_RCP_DEFAULT )
{
if( mode <= ML_POSITIVE_RCP_BUILTIN_SAFE )
return rcp( mode == ML_POSITIVE_RCP_BUILTIN ? x : max( x, ML_SMALL_EPS ) );
return 1.0f / ( mode == ML_POSITIVE_RCP_ACCURATE ? x : max( x, ML_SMALL_EPS ) );
}
ML_INLINE float4 PositiveRcp( float4 x, compiletime const uint mode = ML_POSITIVE_RCP_DEFAULT )
{
if( mode <= ML_POSITIVE_RCP_BUILTIN_SAFE )
return rcp( mode == ML_POSITIVE_RCP_BUILTIN ? x : max( x, ML_SMALL_EPS ) );
return 1.0f / ( mode == ML_POSITIVE_RCP_ACCURATE ? x : max( x, ML_SMALL_EPS ) );
}
// LengthSquared
ML_INLINE float LengthSquared( float2 v )
{ return dot( v, v ); }
ML_INLINE float LengthSquared( float3 v )
{ return dot( v, v ); }
ML_INLINE float LengthSquared( float4 v )
{ return dot( v, v ); }
// Distance
ML_INLINE float Distance( float2 a, float2 b )
{ return length( a - b ); }
ML_INLINE float Distance( float3 a, float3 b )
{ return length( a - b ); }
// Manhattan distance
ML_INLINE float ManhattanDistance( float2 a, float2 b )
{ return dot( abs( a - b ), 1.0f ); }
ML_INLINE float ManhattanDistance( float3 a, float3 b )
{ return dot( abs( a - b ), 1.0f ); }
// Bit operations
ML_INLINE uint ReverseBits4( uint x )
{
x = ( ( x & 0x5 ) << 1 ) | ( ( x & 0xA ) >> 1 );
x = ( ( x & 0x3 ) << 2 ) | ( ( x & 0xC ) >> 2 );
return x;
}
ML_INLINE uint ReverseBits8( uint x )
{
x = ( ( x & 0x55 ) << 1 ) | ( ( x & 0xAA ) >> 1 );
x = ( ( x & 0x33 ) << 2 ) | ( ( x & 0xCC ) >> 2 );
x = ( ( x & 0x0F ) << 4 ) | ( ( x & 0xF0 ) >> 4 );
return x;
}
ML_INLINE uint ReverseBits16( uint x )
{
x = ( ( x & 0x5555 ) << 1 ) | ( ( x & 0xAAAA ) >> 1 );
x = ( ( x & 0x3333 ) << 2 ) | ( ( x & 0xCCCC ) >> 2 );
x = ( ( x & 0x0F0F ) << 4 ) | ( ( x & 0xF0F0 ) >> 4 );
x = ( ( x & 0x00FF ) << 8 ) | ( ( x & 0xFF00 ) >> 8 );
return x;
}
ML_INLINE uint ReverseBits32( uint x )
{
#ifndef __cplusplus
x = reversebits( x );
#else
x = ( x << 16 ) | ( x >> 16 );
x = ( ( x & 0x55555555 ) << 1 ) | ( ( x & 0xAAAAAAAA ) >> 1 );
x = ( ( x & 0x33333333 ) << 2 ) | ( ( x & 0xCCCCCCCC ) >> 2 );
x = ( ( x & 0x0F0F0F0F ) << 4 ) | ( ( x & 0xF0F0F0F0 ) >> 4 );
x = ( ( x & 0x00FF00FF ) << 8 ) | ( ( x & 0xFF00FF00 ) >> 8 );
#endif
return x;
}
ML_INLINE uint CompactBits( uint x )
{
x &= 0x55555555;
x = ( x ^ ( x >> 1 ) ) & 0x33333333;
x = ( x ^ ( x >> 2 ) ) & 0x0F0F0F0F;
x = ( x ^ ( x >> 4 ) ) & 0x00FF00FF;
x = ( x ^ ( x >> 8 ) ) & 0x0000FFFF;
return x;
}
}
//=======================================================================================================================
// GEOMETRY
//=======================================================================================================================
namespace Geometry
{
ML_INLINE float4 GetRotator( float angle )
{
float ca = cos( angle );
float sa = sin( angle );
return float4( ca, sa, -sa, ca );
}
#ifndef __cplusplus
ML_INLINE float3x3 GetRotator( float3 axis, float angle )
{
float sa = sin( angle );
float ca = cos( angle );
float one_ca = 1.0f - ca;
float3 a = sa * axis;
float3 b = one_ca * axis.xyx * axis.yzz;
float3 t1 = one_ca * ( axis * axis ) + ca;
float3 t2 = b.xyz - a.zxy;
float3 t3 = b.zxy + a.yzx;
return float3x3
(
t1.x, t2.x, t3.x,
t3.y, t1.y, t2.y,
t2.z, t3.z, t1.z
);
}
#endif
ML_INLINE float4 GetRotator( float sa, float ca )
{ return float4( ca, sa, -sa, ca ); }
ML_INLINE float4 CombineRotators( float4 r1, float4 r2 )
{ return float4( r1.xyxy ) * r2.xxzz + float4( r1.zwzw ) * r2.yyww; }
ML_INLINE float4 ScaleRotator( float4 r, float scale )
{ return r * scale; }
ML_INLINE float4 ScaleRotator( float4 r, float2 scale )
{ return float4( scale.x * r.xz, scale.y * r.yw ); }
ML_INLINE float2 RotateVector( float4 rotator, float2 v )
{ return v.x * rotator.xz + v.y * rotator.yw; }
#ifndef __cplusplus
ML_INLINE float3 RotateVector( float4x4 m, float3 v )
{ return mul( ( float3x3 )m, v ); }
ML_INLINE float3 RotateVector( float3x3 m, float3 v )
{ return mul( m, v ); }
#endif
ML_INLINE float2 RotateVectorInverse( float4 rotator, float2 v )
{ return v.x * rotator.xy + v.y * rotator.zw; }
#ifndef __cplusplus
ML_INLINE float3 RotateVectorInverse( float4x4 m, float3 v )
{ return mul( ( float3x3 )transpose( m ), v ); }
ML_INLINE float3 RotateVectorInverse( float3x3 m, float3 v )
{ return mul( transpose( m ), v ); }
#endif
ML_INLINE float3 AffineTransform( float4x4 m, float3 p )
{ return mul( m, float4( p, 1.0f ) ).xyz; }
#ifndef __cplusplus
ML_INLINE float3 AffineTransform( float3x4 m, float3 p )
{ return mul( m, float4( p, 1.0f ) ); }
#endif
ML_INLINE float3 AffineTransform( float4x4 m, float4 p )
{ return mul( m, p ).xyz; }
ML_INLINE float4 ProjectiveTransform( float4x4 m, float3 p )
{ return mul( m, float4( p, 1.0f ) ); }
ML_INLINE float4 ProjectiveTransform( float4x4 m, float4 p )
{ return mul( m, p ); }
ML_INLINE float2 GetPerpendicular( float2 v )
{ return float2( -v.y, v.x ); }
ML_INLINE float3 GetPerpendicularVector( float3 N )
{
float3 T = float3( N.z, -N.x, N.y );
T -= N * dot( T, N );
return normalize( T );
}
#ifndef __cplusplus
// http://marc-b-reynolds.github.io/quaternions/2016/07/06/Orthonormal.html
ML_INLINE float3x3 GetBasis( float3 N )
{
float sz = Math::Sign( N.z );
float a = 1.0f / ( sz + N.z );
float ya = N.y * a;
float b = N.x * ya;
float c = N.x * sz;
float3 T = float3( c * N.x * a - 1.0f, sz * b, c );
float3 B = float3( b, N.y * ya - sz, N.y );
// Note: due to the quaternion formulation, the generated frame is rotated by 180 degrees,
// s.t. if N = (0, 0, 1), then T = (-1, 0, 0) and B = (0, -1, 0).
return float3x3( T, B, N );
}
#endif
ML_INLINE float2 GetBarycentricCoords( float3 p, float3 a, float3 b, float3 c )
{
float3 v0 = b - a;
float3 v1 = c - a;
float3 v2 = p - a;
float d00 = dot( v0, v0 );
float d01 = dot( v0, v1 );
float d11 = dot( v1, v1 );
float d20 = dot( v2, v0 );
float d21 = dot( v2, v1 );
float2 barys;
barys.x = d11 * d20 - d01 * d21;
barys.y = d00 * d21 - d01 * d20;
float invDenom = 1.0f / ( d00 * d11 - d01 * d01 );
return barys * invDenom;
}
ML_INLINE float DistanceAttenuation( float dist, float Rmax )
{
// [Brian Karis 2013, "Real Shading in Unreal Engine 4 ( course notes )"]
float falloff = dist / Rmax;
falloff *= falloff;
falloff = saturate( 1.0f - falloff * falloff );
falloff *= falloff;
float atten = falloff;
atten *= Math::PositiveRcp( dist * dist + 1.0f );
return atten;
}
ML_INLINE float3 UnpackLocalNormal( float2 localNormal, bool isUnorm = true )
{
float3 n;
n.xy = isUnorm ? min( localNormal * ( 255.0f / 127.0f ) - 1.0f, 1.0f ) : localNormal;
n.z = Math::Sqrt01( 1.0f - Math::LengthSquared( n.xy ) );
return n;
}
ML_INLINE float3 TransformLocalNormal( float2 localNormal, float4 T, float3 N )
{
float3 n = UnpackLocalNormal( localNormal );
float3 B = cross( N, T.xyz ); // TODO: potentially "normalize" is needed here
return normalize( n.x * T.xyz + T.w * n.y * B + n.z * N );
}
ML_INLINE float SolidAngle( float cosHalfAngle )
{
return Math::Pi( 2.0f ) * ( 1.0f - cosHalfAngle );
}
// orthoMode = { 0 - perspective, -1 - right handed ortho, 1 - left handed ortho }
ML_INLINE float3 ReconstructViewPosition( float2 uv, float4 cameraFrustum, float viewZ = 1.0, float orthoMode = 0.0f )
{
float3 p;
p.xy = uv * cameraFrustum.zw + cameraFrustum.xy;
p.xy *= viewZ * ( 1.0f - abs( orthoMode ) ) + orthoMode;
p.z = viewZ;
return p;
}
ML_INLINE float2 GetScreenUv( float4x4 worldToClip, float3 X, bool killBackprojection = false )
{
float4 clip = Geometry::ProjectiveTransform( worldToClip, X );
#if( ML_WINDOW_ORIGIN_OGL == 1 )
float2 uv = ( clip.xy / clip.w ) * 0.5f + 0.5f;
#else
float2 uv = ( float2( clip.xy ) / clip.w ) * float2( 0.5f, -0.5f ) + 0.5f;
#endif
if( killBackprojection )
uv = clip.w < 0.0 ? 99999.0f : uv;
return uv;
}
#define ML_SCREEN_MOTION 0
#define ML_WORLD_MOTION 1
ML_INLINE float2 GetPrevUvFromMotion( float2 uv, float3 X, float4x4 worldToClipPrev, float3 motionVector, compiletime const uint motionType = ML_WORLD_MOTION )
{
float3 Xprev = X + motionVector;
float2 uvPrev = GetScreenUv( worldToClipPrev, Xprev );
if( motionType == ML_SCREEN_MOTION )
uvPrev = uv + motionVector.xy;
return uvPrev;
}
#ifndef __cplusplus
ML_INLINE float3x3 GetMirrorMatrix( float3 n )
{
float3x3 m;
m[ 0 ] = float3( 1.0f - 2.0f * n.x * n.x, 0.0f - 2.0f * n.y * n.x, 0.0f - 2.0f * n.z * n.x );
m[ 1 ] = float3( 0.0f - 2.0f * n.x * n.y, 1.0f - 2.0f * n.y * n.y, 0.0f - 2.0f * n.z * n.y );
m[ 2 ] = float3( 0.0f - 2.0f * n.x * n.z, 0.0f - 2.0f * n.y * n.z, 1.0f - 2.0f * n.z * n.z );
return m;
}
#endif
}
//=======================================================================================================================
// COLOR
//=======================================================================================================================
// https://chrisbrejon.com/cg-cinematography/chapter-1-color-management/
// https://viereck.ch/hue-xy-rgb/
// https://handwiki.org/wiki/Color_spaces_with_RGB_primaries
namespace Color
{
ML_INLINE float Luminance( float3 x )
{
// https://en.wikipedia.org/wiki/Relative_luminance
return dot( x, float3( 0.2126f, 0.7152f, 0.0722f ) );
}
ML_INLINE float3 Saturation( float3 x, float amount )
{
float luma = Luminance( x );
return lerp( x, luma, amount );
}
/*
Gamma ramps and encoding transfer functions
Taken from https://github.com/Microsoft/DirectX-Graphics-Samples/blob/master/MiniEngine/Core/Shaders/ColorSpaceUtility.hlsli
Orthogonal to color space though usually tightly coupled. For instance, sRGB is both a
color space (defined by three basis vectors and a white point) and a gamma ramp. Gamma
ramps are designed to reduce perceptual error when quantizing floats to integers with a
limited number of bits. More variation is needed in darker colors because our eyes are
more sensitive in the dark. The way the curve helps is that it spreads out dark values
across more code words allowing for more variation. Likewise, bright values are merged
together into fewer code words allowing for less variation.
The sRGB curve is not a true gamma ramp but rather a piecewise function comprising a linear
section and a power function. When sRGB-encoded colors are passed to an LCD monitor, they
look correct on screen because the monitor expects the colors to be encoded with sRGB, and it
removes the sRGB curve to linearize the values. When textures are encoded with sRGB, as many
are, the sRGB curve needs to be removed before involving the colors in linear mathematics such
as physically based lighting.
*/
ML_INLINE float3 ToGamma( float3 x, float gamma = 2.2f )
{ return Math::Pow01( x, 1.0f / gamma ); }
ML_INLINE float3 FromGamma( float3 x, float gamma = 2.2f )
{ return Math::Pow01( x, gamma ); }
// "Full RGB": approximately pow( x, 1.0f / 2.2f )
ML_INLINE float3 ToSrgb( float3 x )
{
const float4 consts = float4( 1.055f, 0.41666f, -0.055f, 12.92f );
return lerp( x * consts.w, consts.x * Math::Pow( x, consts.yyy ) + consts.z, step( 0.0031308f, x ) );
}
ML_INLINE float3 FromSrgb( float3 x )
{
const float4 consts = float4( 1.0f / 12.92f, 1.0f / 1.055f, 0.055f / 1.055f, 1.0f / 0.41666f );
return lerp( x * consts.x, Math::Pow( x * consts.y + consts.z, consts.www ), step( 0.04045f, x ) );
}
// "Limited RGB"
// The OETF (opto-electronic transfer function) recommended for content shown on HDTVs.
// This "gamma ramp" may increase contrast as appropriate for viewing in a dark environment.
// Always use this curve with "Limited RGB" as it is used in conjunction with HDTVs.
ML_INLINE float3 ToRec709( float3 x )
{
const float4 consts = float4( 1.0993f, 0.45f, -0.0993f, 4.5f );
return lerp( x * consts.w, consts.x * Math::Pow( x, consts.yyy ) + consts.zzz, step( 0.0181f, x ) );
}
ML_INLINE float3 FromRec709( float3 x )
{
const float4 consts = float4( 1.0f / 4.5f, 1.0f / 1.0993f, 0.0993f / 1.0993f, 1.0f / 0.45f );
return lerp( x * consts.x, Math::Pow( x * consts.y + consts.z, consts.www ), step( 0.08145f, x ) );
}
// This is the new HDR transfer function, also called "PQ" for perceptual quantizer. Note that REC2084
// does not also refer to a color space. REC2084 is typically used with the REC2020 color space.
ML_INLINE float3 ToRec2084( float3 x )
{
const float m1 = 2610.0f / 4096.0f / 4.0f;
const float m2 = 2523.0f / 4096.0f * 128.0f;
const float c1 = 3424.0f / 4096.0f;
const float c2 = 2413.0f / 4096.0f * 32.0f;
const float c3 = 2392.0f / 4096.0f * 32.0f;
float3 Lp = pow( x, m1 );
return pow( ( c1 + c2 * Lp ) / ( 1.0f + c3 * Lp ), m2 );
}
ML_INLINE float3 FromRec2084( float3 x )
{
const float m1 = 2610.0f / 4096.0f / 4.0f;
const float m2 = 2523.0f / 4096.0f * 128.0f;
const float c1 = 3424.0f / 4096.0f;
const float c2 = 2413.0f / 4096.0f * 32.0f;
const float c3 = 2392.0f / 4096.0f * 32.0f;
float3 Np = pow( x, 1.0f / m2 );
return pow( max( Np - c1, 0.0f ) / ( c2 - c3 * Np ), 1.0f / m1 );
}
/*
Color space conversions:
Taken from https://github.com/Microsoft/DirectX-Graphics-Samples/blob/master/MiniEngine/Core/Shaders/ColorSpaceUtility.hlsli
These assume linear (not gamma-encoded) values. A color space conversion is a change
of basis (like in Linear Algebra). Since a color space is defined by three vectors,
the basis vectors, changing space involves a matrix-vector multiplication. Note that
changing the color space may result in colors that are "out of bounds" because some
color spaces have larger gamuts than others. When converting some colors from a wide
gamut to small gamut, negative values may result, which are inexpressible in that new
color space.
It would be ideal to build a color pipeline which never throws away inexpressible (but
perceivable) colors. This means using a color space that is as wide as possible. The
XYZ color space is the neutral, all-encompassing color space, but it has the unfortunate
property of having negative values (specifically in X and Z). To correct this, a further
transformation can be made to X and Z to make them always positive. They can have their
precision needs reduced by dividing by Y, allowing X and Z to be packed into two UNORM8s.
This color space is called YUV for lack of a better name.
Note: Rec.709 and sRGB share the same color primaries and white point. Their only difference
is the transfer curve used.
*/
// YCoCg
ML_INLINE float3 RgbToYCoCg( float3 x )
{
float Y = dot( x, float3( 0.25, 0.5, 0.25 ) );
float Co = dot( x, float3( 0.5, 0.0, -0.5 ) );
float Cg = dot( x, float3( -0.25, 0.5, -0.25 ) );
return float3( Y, Co, Cg );
}
ML_INLINE float3 YCoCgToRgb( float3 x )
{
float t = x.x - x.z;
float3 r;
r.y = x.x + x.z;
r.x = t + x.y;
r.z = t - x.y;
return r;
}
#ifndef __cplusplus
// REC2020
ML_INLINE float3 RgbToRec2020( float3 x )
{
static const float3x3 M =
{
0.627402, 0.329292, 0.043306,
0.069095, 0.919544, 0.011360,
0.016394, 0.088028, 0.895578
};
return mul( M, x );
}
ML_INLINE float3 Rec2020ToRgb( float3 x )
{
static const float3x3 M =
{
1.660496, -0.587656, -0.072840,
-0.124547, 1.132895, -0.008348,
-0.018154, -0.100597, 1.118751
};
return mul( M, x );
}
// DCIP3
ML_INLINE float3 RgbToDcip3( float3 x )
{
static const float3x3 M =
{
0.822458, 0.177542, 0.000000,
0.033193, 0.966807, 0.000000,
0.017085, 0.072410, 0.910505
};
return mul( M, x );
}
ML_INLINE float3 Dcip3ToRgb( float3 x )
{
static const float3x3 M =
{
1.224947, -0.224947, 0.000000,
-0.042056, 1.042056, 0.000000,
-0.019641, -0.078651, 1.098291
};
return mul( M, x );
}
// CIE XYZ
ML_INLINE float3 RgbToXyz( float3 x )
{
static const float3x3 M =
{
0.4123907992659595, 0.3575843393838780, 0.1804807884018343,
0.2126390058715104, 0.7151686787677559, 0.0721923153607337,
0.0193308187155918, 0.1191947797946259, 0.9505321522496608
};
return mul( M, x );
}
ML_INLINE float3 XyzToRgb( float3 x )
{
static const float3x3 M =
{
3.240969941904522, -1.537383177570094, -0.4986107602930032,
-0.9692436362808803, 1.875967501507721, 0.04155505740717569,
0.05563007969699373, -0.2039769588889765, 1.056971514242878
};
return mul( M, x );
}
// Encode an RGB color into a 32-bit LogLuv HDR format. The supported luminance range is roughly 10^-6..10^6 in 0.17% steps.
// The log-luminance is encoded with 14 bits and chroma with 9 bits each. This was empirically more accurate than using 8 bit chroma.
// Black (all zeros) is handled exactly
ML_INLINE uint ToLogLuv( float3 x )
{
// Convert RGB to XYZ
float3 XYZ = RgbToXyz( x );
// Encode log2( Y ) over the range [ -20, 20 ) in 14 bits ( no sign bit ).
// TODO: Fast path that uses the bits from the fp32 representation directly
float logY = 409.6f * ( log2( XYZ.y ) + 20.0f ); // -inf if Y == 0
uint Le = uint( clamp( logY, 0.0f, 16383.0f ) );
// Early out if zero luminance to avoid NaN in chroma computation.
// Note Le == 0 if Y < 9.55e-7. We'll decode that as exactly zero
if( Le == 0 )
return 0;
// Compute chroma (u,v) values by:
// x = X / ( X + Y + Z )
// y = Y / ( X + Y + Z )
// u = 4x / ( -2x + 12y + 3 )
// v = 9y / ( -2x + 12y + 3 )
// These expressions can be refactored to avoid a division by:
// u = 4X / ( -2X + 12Y + 3(X + Y + Z) )
// v = 9Y / ( -2X + 12Y + 3(X + Y + Z) )
float invDenom = 1.0f / ( -2.0f * XYZ.x + 12.0f * XYZ.y + 3.0f * ( XYZ.x + XYZ.y + XYZ.z ) );
float2 uv = float2( 4.0f, 9.0f ) * XYZ.xy * invDenom;
// Encode chroma (u,v) in 9 bits each.
// The gamut of perceivable uv values is roughly [0,0.62], so scale by 820 to get 9-bit values
uint2 uve = uint2( clamp( 820.0f * uv, 0.0, 511.0f ) );
return ( Le << 18 ) | ( uve.x << 9 ) | uve.y;
}
// Decode an RGB color stored in a 32-bit LogLuv HDR format
ML_INLINE float3 FromLogLuv( uint x )
{
// Decode luminance Y from encoded log-luminance
uint Le = x >> 18;
if( Le == 0 )
return 0;
float logY = ( float( Le ) + 0.5f ) / 409.6f - 20.0f;
float Y = exp2( logY );
// Decode normalized chromaticity xy from encoded chroma (u,v)
// x = 9u / (6u - 16v + 12)
// y = 4v / (6u - 16v + 12)
uint2 uve = uint2( x >> 9, x ) & 0x1ff;
float2 uv = ( float2( uve ) + 0.5f ) / 820.0f;
float invDenom = 1.0f / ( 6.0f * uv.x - 16.0f * uv.y + 12.0f );
float2 xy = float2( 9.0f, 4.0f ) * uv * invDenom;
// Convert chromaticity to XYZ and back to RGB.
// X = Y / y * x
// Z = Y / y * (1 - x - y)
float s = Y / xy.y;
float3 XYZ = float3( s * xy.x, Y, s * ( 1.0f - xy.x - xy.y ) );
// Convert back to RGB and clamp to avoid out-of-gamut colors
float3 color = max( XyzToRgb( XYZ ), 0.0f );
return color;
}
#endif
// HDR compression ( tone mapping )
ML_INLINE float3 HdrToLinear( float3 colorMulExposure )
{
float3 x0 = colorMulExposure * 0.38317f;
float3 x1 = FromGamma( 1.0f - exp( -colorMulExposure ) );
float3 color = lerp( x0, x1, step( 1.413f, colorMulExposure ) );
return color;
}
ML_INLINE float3 HdrToLinear_Reinhard( float3 color, float exposure = 1.0f )
{
float luma = Luminance( color );
return color * Math::PositiveRcp( 1.0f + luma * exposure );
}
ML_INLINE float3 _UnchartedCurve( float3 x )
{
float A = 0.22f; // Shoulder Strength
float B = 0.3f; // Linear Strength
float C = 0.1f; // Linear Angle
float D = 0.2f; // Toe Strength
float E = 0.01f; // Toe Numerator
float F = 0.3f; // Toe Denominator
return ( ( x * ( A * x + C * B ) + D * E ) / ( x * ( A * x + B ) + D * F ) ) - ( E / F );
}