vf_threshold.asm 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. ;*****************************************************************************
  2. ;* x86-optimized functions for threshold filter
  3. ;*
  4. ;* Copyright (C) 2017 Paul B Mahol
  5. ;*
  6. ;* This file is part of FFmpeg.
  7. ;*
  8. ;* FFmpeg is free software; you can redistribute it and/or
  9. ;* modify it under the terms of the GNU Lesser General Public
  10. ;* License as published by the Free Software Foundation; either
  11. ;* version 2.1 of the License, or (at your option) any later version.
  12. ;*
  13. ;* FFmpeg is distributed in the hope that it will be useful,
  14. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. ;* Lesser General Public License for more details.
  17. ;*
  18. ;* You should have received a copy of the GNU Lesser General Public
  19. ;* License along with FFmpeg; if not, write to the Free Software
  20. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. ;*****************************************************************************
  22. %include "libavutil/x86/x86util.asm"
  23. SECTION_RODATA
  24. pb_128: times 16 db 128
  25. pb_128_0 : times 8 db 0, 128
  26. SECTION .text
  27. ;%1 depth (8 or 16) ; %2 b or w ; %3 constant
  28. %macro THRESHOLD 3
  29. %if ARCH_X86_64
  30. cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
  31. mov wd, dword wm
  32. mov hd, dword hm
  33. %else
  34. cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x
  35. mov wd, r10m
  36. %define ilinesizeq r5mp
  37. %define tlinesizeq r6mp
  38. %define flinesizeq r7mp
  39. %define slinesizeq r8mp
  40. %define olinesizeq r9mp
  41. %define hd r11mp
  42. %endif
  43. VBROADCASTI128 m4, [%3]
  44. %if %1 == 16
  45. add wq, wq ; w *= 2 (16 bits instead of 8)
  46. %endif
  47. add inq, wq
  48. add thresholdq, wq
  49. add minq, wq
  50. add maxq, wq
  51. add outq, wq
  52. neg wq
  53. .nextrow:
  54. mov xq, wq
  55. .loop:
  56. movu m1, [inq + xq]
  57. movu m0, [thresholdq + xq]
  58. movu m2, [minq + xq]
  59. movu m3, [maxq + xq]
  60. pxor m0, m4
  61. pxor m1, m4
  62. pcmpgt%2 m0, m1
  63. PBLENDVB m3, m2, m0
  64. movu [outq + xq], m3
  65. add xq, mmsize
  66. jl .loop
  67. add inq, ilinesizeq
  68. add thresholdq, tlinesizeq
  69. add minq, flinesizeq
  70. add maxq, slinesizeq
  71. add outq, olinesizeq
  72. sub hd, 1
  73. jg .nextrow
  74. RET
  75. %endmacro
  76. INIT_XMM sse4
  77. THRESHOLD 8, b, pb_128
  78. THRESHOLD 16, w, pb_128_0
  79. %if HAVE_AVX2_EXTERNAL
  80. INIT_YMM avx2
  81. THRESHOLD 8, b, pb_128
  82. THRESHOLD 16, w, pb_128_0
  83. %endif