tonemap.cl 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #define REFERENCE_WHITE 100.0f
  19. extern float3 lrgb2yuv(float3);
  20. extern float lrgb2y(float3);
  21. extern float3 yuv2lrgb(float3);
  22. extern float3 lrgb2lrgb(float3);
  23. extern float get_luma_src(float3);
  24. extern float get_luma_dst(float3);
  25. extern float3 ootf(float3 c, float peak);
  26. extern float3 inverse_ootf(float3 c, float peak);
  27. extern float3 get_chroma_sample(float3, float3, float3, float3);
  28. struct detection_result {
  29. float peak;
  30. float average;
  31. };
  32. float hable_f(float in) {
  33. float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
  34. return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
  35. }
  36. float direct(float s, float peak) {
  37. return s;
  38. }
  39. float linear(float s, float peak) {
  40. return s * tone_param / peak;
  41. }
  42. float gamma(float s, float peak) {
  43. float p = s > 0.05f ? s /peak : 0.05f / peak;
  44. float v = powr(p, 1.0f / tone_param);
  45. return s > 0.05f ? v : (s * v /0.05f);
  46. }
  47. float clip(float s, float peak) {
  48. return clamp(s * tone_param, 0.0f, 1.0f);
  49. }
  50. float reinhard(float s, float peak) {
  51. return s / (s + tone_param) * (peak + tone_param) / peak;
  52. }
  53. float hable(float s, float peak) {
  54. return hable_f(s)/hable_f(peak);
  55. }
  56. float mobius(float s, float peak) {
  57. float j = tone_param;
  58. float a, b;
  59. if (s <= j)
  60. return s;
  61. a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
  62. b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f);
  63. return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b);
  64. }
  65. // detect peak/average signal of a frame, the algorithm was ported from:
  66. // libplacebo (https://github.com/haasn/libplacebo)
  67. struct detection_result
  68. detect_peak_avg(global uint *util_buf, __local uint *sum_wg,
  69. float signal, float peak) {
  70. // layout of the util buffer
  71. //
  72. // Name: : Size (units of 4-bytes)
  73. // average buffer : detection_frames + 1
  74. // peak buffer : detection_frames + 1
  75. // workgroup counter : 1
  76. // total of peak : 1
  77. // total of average : 1
  78. // frame index : 1
  79. // frame number : 1
  80. global uint *avg_buf = util_buf;
  81. global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1;
  82. global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1;
  83. global uint *max_total_p = counter_wg_p + 1;
  84. global uint *avg_total_p = max_total_p + 1;
  85. global uint *frame_idx_p = avg_total_p + 1;
  86. global uint *scene_frame_num_p = frame_idx_p + 1;
  87. uint frame_idx = *frame_idx_p;
  88. uint scene_frame_num = *scene_frame_num_p;
  89. size_t lidx = get_local_id(0);
  90. size_t lidy = get_local_id(1);
  91. size_t lsizex = get_local_size(0);
  92. size_t lsizey = get_local_size(1);
  93. uint num_wg = get_num_groups(0) * get_num_groups(1);
  94. size_t group_idx = get_group_id(0);
  95. size_t group_idy = get_group_id(1);
  96. struct detection_result r = {peak, sdr_avg};
  97. if (lidx == 0 && lidy == 0)
  98. *sum_wg = 0;
  99. barrier(CLK_LOCAL_MEM_FENCE);
  100. // update workgroup sum
  101. atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE));
  102. barrier(CLK_LOCAL_MEM_FENCE);
  103. // update frame peak/avg using work-group-average.
  104. if (lidx == 0 && lidy == 0) {
  105. uint avg_wg = *sum_wg / (lsizex * lsizey);
  106. atomic_max(&peak_buf[frame_idx], avg_wg);
  107. atomic_add(&avg_buf[frame_idx], avg_wg);
  108. }
  109. if (scene_frame_num > 0) {
  110. float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num);
  111. float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num);
  112. r.peak = max(1.0f, peak);
  113. r.average = max(0.25f, avg);
  114. }
  115. if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) {
  116. *counter_wg_p = 0;
  117. avg_buf[frame_idx] /= num_wg;
  118. if (scene_threshold > 0.0f) {
  119. uint cur_max = peak_buf[frame_idx];
  120. uint cur_avg = avg_buf[frame_idx];
  121. int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p;
  122. if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) {
  123. for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
  124. avg_buf[i] = 0;
  125. for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
  126. peak_buf[i] = 0;
  127. *avg_total_p = *max_total_p = 0;
  128. *scene_frame_num_p = 0;
  129. avg_buf[frame_idx] = cur_avg;
  130. peak_buf[frame_idx] = cur_max;
  131. }
  132. }
  133. uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1);
  134. // add current frame, subtract next frame
  135. *max_total_p += peak_buf[frame_idx] - peak_buf[next];
  136. *avg_total_p += avg_buf[frame_idx] - avg_buf[next];
  137. // reset next frame
  138. peak_buf[next] = avg_buf[next] = 0;
  139. *frame_idx_p = next;
  140. *scene_frame_num_p = min(*scene_frame_num_p + 1,
  141. (uint)DETECTION_FRAMES);
  142. }
  143. return r;
  144. }
  145. float3 map_one_pixel_rgb(float3 rgb, float peak, float average) {
  146. float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f);
  147. // Rescale the variables in order to bring it into a representation where
  148. // 1.0 represents the dst_peak. This is because all of the tone mapping
  149. // algorithms are defined in such a way that they map to the range [0.0, 1.0].
  150. if (target_peak > 1.0f) {
  151. sig *= 1.0f / target_peak;
  152. peak *= 1.0f / target_peak;
  153. }
  154. float sig_old = sig;
  155. // Scale the signal to compensate for differences in the average brightness
  156. float slope = min(1.0f, sdr_avg / average);
  157. sig *= slope;
  158. peak *= slope;
  159. // Desaturate the color using a coefficient dependent on the signal level
  160. if (desat_param > 0.0f) {
  161. float luma = get_luma_dst(rgb);
  162. float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f);
  163. coeff = native_powr(coeff, 10.0f / desat_param);
  164. rgb = mix(rgb, (float3)luma, (float3)coeff);
  165. sig = mix(sig, luma * slope, coeff);
  166. }
  167. sig = TONE_FUNC(sig, peak);
  168. sig = min(sig, 1.0f);
  169. rgb *= (sig/sig_old);
  170. return rgb;
  171. }
  172. // map from source space YUV to destination space RGB
  173. float3 map_to_dst_space_from_yuv(float3 yuv, float peak) {
  174. float3 c = yuv2lrgb(yuv);
  175. c = ootf(c, peak);
  176. c = lrgb2lrgb(c);
  177. return c;
  178. }
  179. __kernel void tonemap(__write_only image2d_t dst1,
  180. __read_only image2d_t src1,
  181. __write_only image2d_t dst2,
  182. __read_only image2d_t src2,
  183. global uint *util_buf,
  184. float peak
  185. )
  186. {
  187. __local uint sum_wg;
  188. const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  189. CLK_ADDRESS_CLAMP_TO_EDGE |
  190. CLK_FILTER_NEAREST);
  191. int xi = get_global_id(0);
  192. int yi = get_global_id(1);
  193. // each work item process four pixels
  194. int x = 2 * xi;
  195. int y = 2 * yi;
  196. float y0 = read_imagef(src1, sampler, (int2)(x, y)).x;
  197. float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x;
  198. float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x;
  199. float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x;
  200. float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy;
  201. float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak);
  202. float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak);
  203. float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak);
  204. float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak);
  205. float sig0 = max(c0.x, max(c0.y, c0.z));
  206. float sig1 = max(c1.x, max(c1.y, c1.z));
  207. float sig2 = max(c2.x, max(c2.y, c2.z));
  208. float sig3 = max(c3.x, max(c3.y, c3.z));
  209. float sig = max(sig0, max(sig1, max(sig2, sig3)));
  210. struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak);
  211. float3 c0_old = c0, c1_old = c1, c2_old = c2;
  212. c0 = map_one_pixel_rgb(c0, r.peak, r.average);
  213. c1 = map_one_pixel_rgb(c1, r.peak, r.average);
  214. c2 = map_one_pixel_rgb(c2, r.peak, r.average);
  215. c3 = map_one_pixel_rgb(c3, r.peak, r.average);
  216. c0 = inverse_ootf(c0, target_peak);
  217. c1 = inverse_ootf(c1, target_peak);
  218. c2 = inverse_ootf(c2, target_peak);
  219. c3 = inverse_ootf(c3, target_peak);
  220. y0 = lrgb2y(c0);
  221. y1 = lrgb2y(c1);
  222. y2 = lrgb2y(c2);
  223. y3 = lrgb2y(c3);
  224. float3 chroma_c = get_chroma_sample(c0, c1, c2, c3);
  225. float3 chroma = lrgb2yuv(chroma_c);
  226. if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) {
  227. write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f));
  228. write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f));
  229. write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f));
  230. write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f));
  231. write_imagef(dst2, (int2)(xi, yi),
  232. (float4)(chroma.y, chroma.z, 0.0f, 1.0f));
  233. }
  234. }