unsharp.cl 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. __kernel void unsharp_global(__write_only image2d_t dst,
  19. __read_only image2d_t src,
  20. int size_x,
  21. int size_y,
  22. float amount,
  23. __constant float *coef_matrix)
  24. {
  25. const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  26. CLK_FILTER_NEAREST);
  27. int2 loc = (int2)(get_global_id(0), get_global_id(1));
  28. int2 centre = (int2)(size_x / 2, size_y / 2);
  29. float4 val = read_imagef(src, sampler, loc);
  30. float4 sum = 0.0f;
  31. int x, y;
  32. for (y = 0; y < size_y; y++) {
  33. for (x = 0; x < size_x; x++) {
  34. int2 pos = loc + (int2)(x, y) - centre;
  35. sum += coef_matrix[y * size_x + x] *
  36. read_imagef(src, sampler, pos);
  37. }
  38. }
  39. write_imagef(dst, loc, val + (val - sum) * amount);
  40. }
  41. __kernel void unsharp_local(__write_only image2d_t dst,
  42. __read_only image2d_t src,
  43. int size_x,
  44. int size_y,
  45. float amount,
  46. __constant float *coef_x,
  47. __constant float *coef_y)
  48. {
  49. const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  50. CLK_ADDRESS_CLAMP_TO_EDGE |
  51. CLK_FILTER_NEAREST);
  52. int2 block = (int2)(get_group_id(0), get_group_id(1)) * 16;
  53. int2 pos = (int2)(get_local_id(0), get_local_id(1));
  54. __local float4 tmp[32][32];
  55. int rad_x = size_x / 2;
  56. int rad_y = size_y / 2;
  57. int x, y;
  58. for (y = 0; y <= 1; y++) {
  59. for (x = 0; x <= 1; x++) {
  60. tmp[pos.y + 16 * y][pos.x + 16 * x] =
  61. read_imagef(src, sampler, block + pos + (int2)(16 * x - 8, 16 * y - 8));
  62. }
  63. }
  64. barrier(CLK_LOCAL_MEM_FENCE);
  65. float4 val = tmp[pos.y + 8][pos.x + 8];
  66. float4 horiz[2];
  67. for (y = 0; y <= 1; y++) {
  68. horiz[y] = 0.0f;
  69. for (x = 0; x < size_x; x++)
  70. horiz[y] += coef_x[x] * tmp[pos.y + y * 16][pos.x + 8 + x - rad_x];
  71. }
  72. barrier(CLK_LOCAL_MEM_FENCE);
  73. for (y = 0; y <= 1; y++) {
  74. tmp[pos.y + y * 16][pos.x + 8] = horiz[y];
  75. }
  76. barrier(CLK_LOCAL_MEM_FENCE);
  77. float4 sum = 0.0f;
  78. for (y = 0; y < size_y; y++)
  79. sum += coef_y[y] * tmp[pos.y + 8 + y - rad_y][pos.x + 8];
  80. if (block.x + pos.x < get_image_width(dst) &&
  81. block.y + pos.y < get_image_height(dst))
  82. write_imagef(dst, block + pos, val + (val - sum) * amount);
  83. }