nlmeans.cl 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  19. CLK_ADDRESS_CLAMP_TO_EDGE |
  20. CLK_FILTER_NEAREST);
  21. kernel void horiz_sum(__global uint4 *integral_img,
  22. __read_only image2d_t src,
  23. int width,
  24. int height,
  25. int4 dx,
  26. int4 dy)
  27. {
  28. int y = get_global_id(0);
  29. int work_size = get_global_size(0);
  30. uint4 sum = (uint4)(0);
  31. float4 s2;
  32. for (int i = 0; i < width; i++) {
  33. float s1 = read_imagef(src, sampler, (int2)(i, y)).x;
  34. s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x;
  35. s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x;
  36. s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x;
  37. s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x;
  38. sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255);
  39. integral_img[y * width + i] = sum;
  40. }
  41. }
  42. kernel void vert_sum(__global uint4 *integral_img,
  43. __global int *overflow,
  44. int width,
  45. int height)
  46. {
  47. int x = get_global_id(0);
  48. uint4 sum = 0;
  49. for (int i = 0; i < height; i++) {
  50. if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum))
  51. atomic_inc(overflow);
  52. integral_img[i * width + x] += sum;
  53. sum = integral_img[i * width + x];
  54. }
  55. }
  56. kernel void weight_accum(global float *sum, global float *weight,
  57. global uint4 *integral_img, __read_only image2d_t src,
  58. int width, int height, int p, float h,
  59. int4 dx, int4 dy)
  60. {
  61. // w(x) = integral_img(x-p, y-p) +
  62. // integral_img(x+p, y+p) -
  63. // integral_img(x+p, y-p) -
  64. // integral_img(x-p, y+p)
  65. // total_sum[x] += w(x, y) * src(x + dx, y + dy)
  66. // total_weight += w(x, y)
  67. int x = get_global_id(0);
  68. int y = get_global_id(1);
  69. int4 xoff = x + dx;
  70. int4 yoff = y + dy;
  71. uint4 a = 0, b = 0, c = 0, d = 0;
  72. uint4 src_pix = 0;
  73. // out-of-bounding-box?
  74. int oobb = (x - p) < 0 || (y - p) < 0 || (y + p) >= height || (x + p) >= width;
  75. src_pix.x = (int)(255 * read_imagef(src, sampler, (int2)(xoff.x, yoff.x)).x);
  76. src_pix.y = (int)(255 * read_imagef(src, sampler, (int2)(xoff.y, yoff.y)).x);
  77. src_pix.z = (int)(255 * read_imagef(src, sampler, (int2)(xoff.z, yoff.z)).x);
  78. src_pix.w = (int)(255 * read_imagef(src, sampler, (int2)(xoff.w, yoff.w)).x);
  79. if (!oobb) {
  80. a = integral_img[(y - p) * width + x - p];
  81. b = integral_img[(y + p) * width + x - p];
  82. c = integral_img[(y - p) * width + x + p];
  83. d = integral_img[(y + p) * width + x + p];
  84. }
  85. float4 patch_diff = convert_float4(d + a - c - b);
  86. float4 w = native_exp(-patch_diff / (h * h));
  87. float w_sum = w.x + w.y + w.z + w.w;
  88. weight[y * width + x] += w_sum;
  89. sum[y * width + x] += dot(w, convert_float4(src_pix));
  90. }
  91. kernel void average(__write_only image2d_t dst,
  92. __read_only image2d_t src,
  93. global float *sum, global float *weight) {
  94. int x = get_global_id(0);
  95. int y = get_global_id(1);
  96. int2 dim = get_image_dim(dst);
  97. float w = weight[y * dim.x + x];
  98. float s = sum[y * dim.x + x];
  99. float src_pix = read_imagef(src, sampler, (int2)(x, y)).x;
  100. float r = (s + src_pix * 255) / (1.0f + w) / 255.0f;
  101. if (x < dim.x && y < dim.y)
  102. write_imagef(dst, (int2)(x, y), (float4)(r, 0.0f, 0.0f, 1.0f));
  103. }