1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
|
// FMV (RPZA) shader
// by Millennium Cyborg
// Ported to FiniteSingularity's OBS Retro Effects plugin
// Loosely based on the encoding method in ffmpeg's RPZA encoder:
// https://ffmpeg.org/doxygen/trunk/rpzaenc_8c_source.html
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float2 uv_size;
uniform texture2d prev_frame;
uniform float is_keyframe;
uniform float colors_per_channel;
uniform float rpza_threshold_prev_frame;
uniform float rpza_threshold_solid;
uniform float rpza_threshold_gradient;
// #define RPZA_DEBUG_COLORS
// #define SRGB_CONV
#ifdef SRGB_CONV
#define INPUT_CONV srgb_linear_to_nonlinear
#define OUTPUT_CONV srgb_nonlinear_to_linear
#else
#define INPUT_CONV
#define OUTPUT_CONV
#endif
sampler_state textureSampler{
Filter = Linear;
AddressU = Clamp;
AddressV = Clamp;
MinLOD = 0;
MaxLOD = 0;
};
struct VertData
{
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
// From libobs/data/color.effect
// Can't get the relevant include to work at the moment
float srgb_linear_to_nonlinear_channel(float u)
{
return (u <= 0.0031308) ? (12.92 * u) : ((1.055 * pow(u, 1. / 2.4)) - 0.055);
}
float3 srgb_linear_to_nonlinear(float3 v)
{
return float3(srgb_linear_to_nonlinear_channel(v.r), srgb_linear_to_nonlinear_channel(v.g), srgb_linear_to_nonlinear_channel(v.b));
}
float srgb_nonlinear_to_linear_channel(float u)
{
return (u <= 0.04045) ? (u / 12.92) : pow((u + 0.055) / 1.055, 2.4);
}
float3 srgb_nonlinear_to_linear(float3 v)
{
return float3(srgb_nonlinear_to_linear_channel(v.r), srgb_nonlinear_to_linear_channel(v.g), srgb_nonlinear_to_linear_channel(v.b));
}
// UV: proportion across source texture, 0 to 1
// px: output-scaled pixels, 0 to uv_size-1
float2 pxFromUv(float2 uv)
{
return floor(uv * uv_size.xy);
}
float2 uvFromPx(float2 tx)
{
return (tx + 0.5) / uv_size.xy;
}
float4 sampleSource(float2 px)
{
// Sample, nearest-neighbour
float4 col = image.Sample(textureSampler, uvFromPx(px));
col.rgb = INPUT_CONV(col.rgb);
return col;
}
float4 samplePrev(float2 px)
{
// Sample, nearest-neighbour
float4 col = prev_frame.Sample(textureSampler, uvFromPx(px));
col.rgb = INPUT_CONV(col.rgb);
return col;
}
float3 quantize(float3 color, float dither)
{
return floor(color.rgb * colors_per_channel + dither) / colors_per_channel;
}
float2 mod(float2 x, float2 y)
{
return x - y * floor(x / y);
}
float get_bayer(float2 px)
{
#ifdef OPENGL
const int bayer4[16] = int[16](
0, 8, 2, 10,
12, 4, 14, 6,
3, 11, 1, 9,
15, 7, 13, 5
);
#else
int bayer4[16] =
{
0, 8, 2, 10,
12, 4, 14, 6,
3, 11, 1, 9,
15, 7, 13, 5
};
#endif
float2 sample_coord = mod(px, float2(4.0, 4.0));
return float(bayer4[int(sample_coord.y) * 4 + int(sample_coord.x)]) / 16.0;
}
float4 ApplyDitherNoise(float4 color, float2 px)
{
float noise = get_bayer(px);
color.rgb = quantize(color.rgb, noise);
return color;
}
float4 sampleWithDither(float2 px)
{
float4 col = sampleSource(px);
col = ApplyDitherNoise(col, px);
return col;
}
// rotate components
float3 rotateCol(float3 col, int amount)
{
amount %= 3;
if (amount == 0)
return col;
if (amount == 1)
return col.gbr;
return col.brg;
}
float calcSquareError(float3 ref, float3 col)
{
float3 d = saturate(abs(col - ref));
return dot(d, d);
}
// Calculating per-pixel isn't ideal but it'll do for now
void calcGradient(int2 blockCoord, out float3 mean, out float3 startCol, out float3 endCol)
{
// What I think the correct ordering is:
// 1) add dither mask scaled down appropriately (/16 then /32) and offset appropriately (-8/16)
// 2) calc gradient
// 3) find nearest colours in rgb555
// Here I follow the method in the ffpmeg RPZA encoder:
// ( https://ffmpeg.org/doxygen/trunk/rpzaenc_8c_source.html )
// Pick as primary the channel with the largest range of values.
// That channel has a gradient from its min to its max.
// For the other two channels, use a least squares regression.
// Find min/max per channel
float3 minCol = float3(1.0, 1.0, 1.0);
float3 maxCol = float3(0.0, 0.0, 0.0);
float3 sum = float3(0.0, 0.0, 0.0); // r, g, b
float3 sumSq = float3(0.0, 0.0, 0.0); // rr, gg, bb
float3 sumProd = float3(0.0, 0.0, 0.0); // bg, gr, rb (index by (max+cur)%3)
for (int bx = 0; bx < 4; ++bx)
{
for (int by = 0; by < 4; ++by)
{
int2 px = blockCoord * 4 + int2(bx, by);
float3 col = sampleWithDither(px).rgb;
minCol = min(minCol, col);
maxCol = max(maxCol, col);
sum += col;
sumSq += col * col;
sumProd += float3(col.b * col.g, col.g * col.r, col.r * col.b);
}
}
// Find max range across channels
int maxChanIdx = 0;
float maxChanRange = 0;
float3 chanRange = abs(minCol - maxCol);
for (int c = 0; c < 3; ++c)
{
if (chanRange[c] > maxChanRange)
{
maxChanIdx = c;
maxChanRange = chanRange[c];
}
}
#ifdef OPENGL
bvec3 maxChanMask = equal(int3(0, 1, 2), int3(maxChanIdx));
#else
float3 maxChanMask = (int3(0, 1, 2) == maxChanIdx);
#endif
sumProd = rotateCol(sumProd, maxChanIdx);
// Find slope per channel vs max channel
float3 m = float3(1.0, 1.0, 1.0);
float3 b = float3(0.0, 0.0, 0.0);
float d = 16.0f * sumSq[maxChanIdx] - sum[maxChanIdx] * sum[maxChanIdx];
mean = sum / 16.0f;
// Avoid div by 0. All the pixels in the block are the same colour or very close.
if (abs(d) < 0.00001f)
{
startCol = endCol = mean;
}
else
{
// This is a rearrangement of the least squares regression formula
float sumMax = sum[maxChanIdx];
m = (16.0f * sumProd - sumMax * sum) / d;
b = (sum - m * sumMax) / 16.0f;
#ifdef OPENGL
startCol = mix(m * minCol[maxChanIdx] + b, minCol, maxChanMask);
endCol = mix(m * maxCol[maxChanIdx] + b, maxCol, maxChanMask);
#else
startCol = maxChanMask ? minCol : m * minCol[maxChanIdx] + b;
endCol = maxChanMask ? maxCol : m * maxCol[maxChanIdx] + b;
#endif
}
startCol = saturate(startCol);
endCol = saturate(endCol);
}
float4 rpzaEncode(float2 sourcePx)
{
int2 coordInBlock = int2(floor(mod(sourcePx, float2(4.0, 4.0))));
int2 blockCoord = int2(floor(sourcePx / 4.0));
// NOTE: error calcs not going to be as accurate as they could be because they're not taking the quantization into account
// Compute the gradient by least squares
float3 mean = float3(0.0, 0.0, 0.0);
float3 startCol = float3(0.0, 0.0, 0.0);
float3 endCol = float3(1.0, 1.0, 1.0);
calcGradient(blockCoord, mean, startCol, endCol);
mean = quantize(mean, 0.0);
startCol = quantize(startCol, 0.0);
endCol = quantize(endCol, 0.0);
// Compute 4-col gradient
float3 gradCol[4];
for (int i = 0; i < 4; ++i)
{
float3 p = lerp(startCol, endCol, float(i) / 3.0);
// convert to rgb555
gradCol[i] = quantize(p, 0.0);
}
// Iterate pixels and compute total square error then sqrt mean square error
float sumSqErr0 = 0.0f;
float sumSqErr1 = 0.0f;
float sumSqErr4 = 0.0f;
for (int bx = 0; bx < 4; ++bx)
{
for (int by = 0; by < 4; ++by)
{
int2 px = blockCoord * 4 + int2(bx, by);
float3 col = sampleWithDither(px).rgb;
sumSqErr0 += calcSquareError(col, samplePrev(px).rgb);
sumSqErr1 += calcSquareError(col, mean);
// Find closest color in gradient
float minErr = 9999999.0;
float3 minErrCol = float3(1.0, 1.0, 1.0);
for (int i = 0; i < 4; ++i)
{
float3 p = gradCol[i];
float err = calcSquareError(p, col);
if (err < minErr)
{
minErrCol = p;
minErr = err;
}
}
sumSqErr4 += minErr;
}
}
float3 col = sampleWithDither(sourcePx).rgb;
// option 0: keep previous block
if (is_keyframe < 0.5f && sumSqErr0 < rpza_threshold_prev_frame * rpza_threshold_prev_frame)
{
// Adding a debug col for this doesn't quite work
return float4(samplePrev(sourcePx).rgb, 1);
}
// option 1: single color
if (sumSqErr1 < rpza_threshold_solid * rpza_threshold_solid)
{
#ifdef RPZA_DEBUG_COLORS
return float4(1, 0, 0, 1);
#else
return float4(mean, 1);
#endif
}
// option 2: 4 colors
if (sumSqErr4 < rpza_threshold_gradient * rpza_threshold_gradient)
{
// Find closest color in gradient
float minErr = 9999999.0;
float3 minErrCol = float3(1.0, 1.0, 1.0);
for (int i = 0; i < 4; ++i)
{
float3 p = gradCol[i];
float err = calcSquareError(p, col);
if (err < minErr)
{
minErrCol = p;
minErr = err;
}
}
#ifdef RPZA_DEBUG_COLORS
return float4(0, 1, 0, 1);
#else
return float4(minErrCol, 1);
#endif
}
// option 3: 16 colors
#ifdef RPZA_DEBUG_COLORS
return float4(0, 0, 1, 1);
#else
return float4(col, 1);
#endif
}
VertData mainTransform(VertData v_in)
{
v_in.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj);
return v_in;
}
float4 mainImageRPZA(VertData v_in) : TARGET
{
float2 uv = v_in.uv;
float2 px = pxFromUv(uv);
float4 col = sampleWithDither(px);
col.rgb = rpzaEncode(px).rgb;
// Convert srgb to linear for output
col.rgb = OUTPUT_CONV(col.rgb);
return col;
}
technique DrawRPZA
{
pass
{
vertex_shader = mainTransform(v_in);
pixel_shader = mainImageRPZA(v_in);
}
}
|