vf_gradfun.asm 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. ;******************************************************************************
  2. ;* x86-optimized functions for gradfun filter
  3. ;*
  4. ;* This file is part of FFmpeg.
  5. ;*
  6. ;* FFmpeg is free software; you can redistribute it and/or
  7. ;* modify it under the terms of the GNU Lesser General Public
  8. ;* License as published by the Free Software Foundation; either
  9. ;* version 2.1 of the License, or (at your option) any later version.
  10. ;*
  11. ;* FFmpeg is distributed in the hope that it will be useful,
  12. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ;* Lesser General Public License for more details.
  15. ;*
  16. ;* You should have received a copy of the GNU Lesser General Public
  17. ;* License along with FFmpeg; if not, write to the Free Software
  18. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. ;******************************************************************************
  20. %include "libavutil/x86/x86util.asm"
  21. SECTION_RODATA
  22. pw_7f: times 8 dw 0x7F
  23. pw_ff: times 8 dw 0xFF
  24. SECTION .text
  25. %macro FILTER_LINE 1
  26. movh m0, [r2+r0]
  27. movh m1, [r3+r0]
  28. punpcklbw m0, m7
  29. punpcklwd m1, m1
  30. psllw m0, 7
  31. psubw m1, m0
  32. PABSW m2, m1
  33. pmulhuw m2, m5
  34. psubw m2, m6
  35. pminsw m2, m7
  36. pmullw m2, m2
  37. psllw m1, 2
  38. paddw m0, %1
  39. pmulhw m1, m2
  40. paddw m0, m1
  41. psraw m0, 7
  42. packuswb m0, m0
  43. movh [r1+r0], m0
  44. %endmacro
  45. INIT_MMX mmxext
  46. cglobal gradfun_filter_line, 6, 6
  47. movh m5, r4d
  48. pxor m7, m7
  49. pshufw m5, m5,0
  50. mova m6, [pw_7f]
  51. mova m3, [r5]
  52. mova m4, [r5+8]
  53. .loop:
  54. FILTER_LINE m3
  55. add r0, 4
  56. jge .end
  57. FILTER_LINE m4
  58. add r0, 4
  59. jl .loop
  60. .end:
  61. REP_RET
  62. INIT_XMM ssse3
  63. cglobal gradfun_filter_line, 6, 6, 8
  64. movd m5, r4d
  65. pxor m7, m7
  66. pshuflw m5, m5, 0
  67. mova m6, [pw_7f]
  68. punpcklqdq m5, m5
  69. mova m4, [r5]
  70. .loop:
  71. FILTER_LINE m4
  72. add r0, 8
  73. jl .loop
  74. REP_RET
  75. %macro BLUR_LINE 1
  76. cglobal gradfun_blur_line_%1, 6, 6, 8
  77. mova m7, [pw_ff]
  78. .loop:
  79. %1 m0, [r4+r0]
  80. %1 m1, [r5+r0]
  81. mova m2, m0
  82. mova m3, m1
  83. psrlw m0, 8
  84. psrlw m1, 8
  85. pand m2, m7
  86. pand m3, m7
  87. paddw m0, m1
  88. paddw m2, m3
  89. paddw m0, m2
  90. paddw m0, [r2+r0]
  91. mova m1, [r1+r0]
  92. mova [r1+r0], m0
  93. psubw m0, m1
  94. mova [r3+r0], m0
  95. add r0, 16
  96. jl .loop
  97. REP_RET
  98. %endmacro
  99. INIT_XMM sse2
  100. BLUR_LINE movdqa
  101. BLUR_LINE movdqu