Texture2D t7 : register(t7); Texture2D t6 : register(t6); Texture2D t5 : register(t5); Texture2D t4 : register(t4); Texture2D t3 : register(t3); Texture2D t2 : register(t2); Texture2D t1 : register(t1); Texture2D t0 : register(t0); RWTexture3D _OutputTexture : register(u0); SamplerState s0_s : register(s0); cbuffer cb0 : register(b0) { float4 cb0[18]; } #define cmp // Rec.709 color grading, no ACES, no tonemapping [numthreads(4, 4, 4)] void main(uint3 vThreadID : SV_DispatchThreadID) { float4 r0,r1,r2,r3,r4,r5,r6,r7; r0.xyz = (uint3)vThreadID.xyz; r0.w = cmp(0 < cb0[17].x); if (r0.w != 0) { r1.xyz = r0.xyz * cb0[0].yyy + float3(-0.386036009,-0.386036009,-0.386036009); r1.xyz = float3(13.6054821,13.6054821,13.6054821) * r1.xyz; r1.xyz = exp2(r1.xyz); r1.xyz = float3(-0.0479959995,-0.0479959995,-0.0479959995) + r1.xyz; r1.xyz = float3(0.179999992,0.179999992,0.179999992) * r1.xyz; r2.x = dot(float3(0.390404999,0.549941003,0.00892631989), r1.xyz); r2.y = dot(float3(0.070841603,0.963172019,0.00135775004), r1.xyz); r2.z = dot(float3(0.0231081992,0.128021002,0.936245024), r1.xyz); r1.xyz = cb0[2].xyz * r2.xyz; r2.x = dot(float3(2.85846996,-1.62879002,-0.0248910002), r1.xyz); r2.y = dot(float3(-0.210181996,1.15820003,0.000324280991), r1.xyz); r2.z = dot(float3(-0.0418119989,-0.118169002,1.06867003), r1.xyz); r1.xyz = r2.xyz * float3(5.55555582,5.55555582,5.55555582) + float3(0.0479959995,0.0479959995,0.0479959995); r1.xyz = max(float3(0,0,0), r1.xyz); //TODO: remove and abs etc r1.xyz = log2(r1.xyz); r1.xyz = r1.xyz * float3(0.0734997839,0.0734997839,0.0734997839) + float3(-0.0275523961,-0.0275523961,-0.0275523961); r1.xyz = r1.xyz * cb0[7].zzz + float3(0.0275523961,0.0275523961,0.0275523961); r1.xyz = float3(13.6054821,13.6054821,13.6054821) * r1.xyz; r1.xyz = exp2(r1.xyz); r1.xyz = float3(-0.0479959995,-0.0479959995,-0.0479959995) + r1.xyz; r1.xyz = cb0[3].xyz * r1.xyz; r1.xyz = float3(0.179999992,0.179999992,0.179999992) * r1.xyz; #if 0 // LUMA: fixed negative values support (this seems to break rendering...) r1.xyz = pow(abs(r1.xyz), 1.0 / 2.2) * sign(r1.xyz); #else r1.xyz = pow(max(r1.xyz, 0.0), 1.0 / 2.2); #endif r2.xyz = min(float3(1,1,1), r1.xyz); //TODO: remove min? And test above for follow up nans. Also, is it right that this runs in Rec.709 while the other LUT uses AP1 luminance coeffs? r0.w = dot(r2.xyz, float3(0.212672904,0.715152204,0.0721750036)); r0.w = saturate(cb0[15].w + r0.w); r1.w = 1 + -r0.w; r2.xyz = float3(-0.5,-0.5,-0.5) + cb0[15].xyz; r2.xyz = r1.www * r2.xyz + float3(0.5,0.5,0.5); r3.xyz = float3(-0.5,-0.5,-0.5) + cb0[16].xyz; r3.xyz = r0.www * r3.xyz + float3(0.5,0.5,0.5); r4.xyz = r1.xyz + r1.xyz; r5.xyz = r1.xyz * r1.xyz; r6.xyz = -r2.xyz * float3(2,2,2) + float3(1,1,1); r5.xyz = r6.xyz * r5.xyz; r5.xyz = r4.xyz * r2.xyz + r5.xyz; r1.xyz = sqrt(r1.xyz); r6.xyz = r2.xyz * float3(2,2,2) + float3(-1,-1,-1); r7.xyz = float3(1,1,1) + -r2.xyz; r4.xyz = r7.xyz * r4.xyz; r1.xyz = r1.xyz * r6.xyz + r4.xyz; r2.xyz = cmp(r2.xyz >= float3(0.5,0.5,0.5)); r4.xyz = r2.xyz ? float3(1,1,1) : 0; r2.xyz = r2.xyz ? float3(0,0,0) : float3(1,1,1); r2.xyz = r2.xyz * r5.xyz; r1.xyz = r1.xyz * r4.xyz + r2.xyz; r2.xyz = r1.xyz + r1.xyz; r4.xyz = r1.xyz * r1.xyz; r5.xyz = -r3.xyz * float3(2,2,2) + float3(1,1,1); r4.xyz = r5.xyz * r4.xyz; r4.xyz = r2.xyz * r3.xyz + r4.xyz; r1.xyz = sqrt(r1.xyz); r5.xyz = r3.xyz * float3(2,2,2) + float3(-1,-1,-1); r6.xyz = float3(1,1,1) + -r3.xyz; r2.xyz = r6.xyz * r2.xyz; r1.xyz = r1.xyz * r5.xyz + r2.xyz; r2.xyz = cmp(r3.xyz >= float3(0.5,0.5,0.5)); r3.xyz = r2.xyz ? float3(1,1,1) : 0; r2.xyz = r2.xyz ? float3(0,0,0) : float3(1,1,1); r2.xyz = r2.xyz * r4.xyz; r1.xyz = r1.xyz * r3.xyz + r2.xyz; r1.xyz = log2(abs(r1.xyz)); // TODO: this isn't mirrored after pow? In other similar shaders too! r1.xyz = float3(2.20000005,2.20000005,2.20000005) * r1.xyz; r1.xyz = exp2(r1.xyz); r2.x = dot(r1.xyz, cb0[4].xyz); r2.y = dot(r1.xyz, cb0[5].xyz); r2.z = dot(r1.xyz, cb0[6].xyz); r0.w = dot(r2.xyz, float3(0.212672904,0.715152204,0.0721750036)); r1.xy = cb0[14].yw + -cb0[14].xz; r1.zw = -cb0[14].xz + r0.ww; r1.xy = float2(1,1) / r1.xy; r1.xy = saturate(r1.zw * r1.xy); r1.zw = r1.xy * float2(-2,-2) + float2(3,3); r1.xy = r1.xy * r1.xy; r0.w = r1.w * r1.y; r1.x = -r1.z * r1.x + 1; r1.z = 1 + -r1.x; r1.y = -r1.w * r1.y + r1.z; r3.xyz = cb0[11].xyz * r2.xyz; r4.xyz = cb0[12].xyz * r2.xyz; r1.yzw = r4.xyz * r1.yyy; r1.xyz = r3.xyz * r1.xxx + r1.yzw; r2.xyz = cb0[13].xyz * r2.xyz; r1.xyz = r2.xyz * r0.www + r1.xyz; r1.xyz = r1.xyz * cb0[10].xyz + cb0[8].xyz; r2.xyz = sign(r1.xyz); r1.xyz = pow(abs(r1.xyz), cb0[9].xyz); r3.xyz = r2.xyz * r1.xyz; r0.w = cmp(r3.y >= r3.z); r0.w = r0.w ? 1.000000 : 0; r4.xy = r3.zy; r4.zw = float2(-1,0.666666687); r1.xy = r2.yz * r1.yz + -r4.xy; r1.zw = float2(1,-1); r1.xyzw = r0.wwww * r1.xyzw + r4.xyzw; r0.w = cmp(r3.x >= r1.x); r0.w = r0.w ? 1.000000 : 0; r2.xyz = r1.xyw; r2.w = r3.x; r1.xyw = r2.wyx; r1.xyzw = r1.xyzw + -r2.xyzw; r1.xyzw = r0.wwww * r1.xyzw + r2.xyzw; r0.w = min(r1.w, r1.y); r0.w = r1.x + -r0.w; r1.y = r1.w + -r1.y; r1.w = r0.w * 6 + 9.99999975e-005; r1.y = r1.y / r1.w; r1.y = r1.z + r1.y; r2.x = abs(r1.y); r1.y = 9.99999975e-005 + r1.x; r2.z = r0.w / r1.y; r2.yw = float2(0,0); r0.w = t5.SampleLevel(s0_s, r2.xy, 0).x; r0.w = saturate(r0.w); r0.w = r0.w + r0.w; r1.y = t6.SampleLevel(s0_s, r2.zw, 0).x; r1.y = saturate(r1.y); r1.y = r1.y + r1.y; r0.w = r1.y * r0.w; r3.x = dot(r3.xyz, float3(0.212672904,0.715152204,0.0721750036)); r3.yw = float2(0,0); r1.y = t7.SampleLevel(s0_s, r3.xy, 0).x; r1.y = saturate(r1.y); r0.w = r1.y * r0.w; r3.z = cb0[7].x + r2.x; r1.y = t4.SampleLevel(s0_s, r3.zw, 0).x; r1.y = saturate(r1.y); r1.y = r1.y + r3.z; r1.yzw = float3(-0.5,0.5,-1.5) + r1.yyy; r2.x = cmp(r1.y < 0); r2.y = cmp(1 < r1.y); r1.y = r2.y ? r1.w : r1.y; r1.y = r2.x ? r1.z : r1.y; r1.yzw = float3(1,0.666666687,0.333333343) + r1.yyy; r1.yzw = frac(r1.yzw); r1.yzw = r1.yzw * float3(6,6,6) + float3(-3,-3,-3); r1.yzw = saturate(float3(-1,-1,-1) + abs(r1.yzw)); r1.yzw = float3(-1,-1,-1) + r1.yzw; r1.yzw = r2.zzz * r1.yzw + float3(1,1,1); r2.xyz = r1.xxx * r1.yzw; r2.x = dot(r2.xyz, float3(0.212672904,0.715152204,0.0721750036)); r0.w = dot(cb0[7].yy, r0.ww); r1.xyz = r1.xxx * r1.yzw + -r2.xxx; r1.xyz = r0.www * r1.xyz + r2.xxx; r0.w = max(r1.x, r1.y); r0.w = max(r0.w, r1.z); r0.w = 1 + r0.w; r0.w = rcp(r0.w); r1.xyz = r1.xyz * r0.www + float3(0.00390625,0.00390625,0.00390625); r1.w = 0; r2.x = t0.SampleLevel(s0_s, r1.xw, 0).x; r2.x = saturate(r2.x); r2.y = t0.SampleLevel(s0_s, r1.yw, 0).x; r2.y = saturate(r2.y); r2.z = t0.SampleLevel(s0_s, r1.zw, 0).x; r2.z = saturate(r2.z); r1.xyz = float3(0.00390625,0.00390625,0.00390625) + r2.xyz; r1.w = 0; r2.x = t1.SampleLevel(s0_s, r1.xw, 0).x; r2.x = saturate(r2.x); r2.y = t2.SampleLevel(s0_s, r1.yw, 0).x; r2.y = saturate(r2.y); r2.z = t3.SampleLevel(s0_s, r1.zw, 0).x; r2.z = saturate(r2.z); r0.w = max(r2.x, r2.y); r0.w = max(r0.w, r2.z); r0.w = 1 + -r0.w; r0.w = rcp(r0.w); r1.xyz = r2.xyz * r0.www; #if 1 r1.xyz = max(float3(0,0,0), r1.xyz); #endif } else { r0.xyz = r0.xyz * cb0[0].yyy + float3(-0.386036009,-0.386036009,-0.386036009); r0.xyz = float3(13.6054821,13.6054821,13.6054821) * r0.xyz; r0.xyz = exp2(r0.xyz); r0.xyz = float3(-0.0479959995,-0.0479959995,-0.0479959995) + r0.xyz; r1.xyz = float3(0.179999992,0.179999992,0.179999992) * r0.xyz; } r0.xyz = r1.xyz; #if 0 r0.xyz = max(float3(0,0,0), r0.xyz); #endif _OutputTexture[vThreadID.xyz] = float4(r0.xyz, 1.0); }