vf_noise.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. /*
  2. * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
  3. * Copyright (c) 2013 Paul B Mahol
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/attributes.h"
  22. #include "libavutil/x86/cpu.h"
  23. #include "libavutil/x86/asm.h"
  24. #include "libavfilter/vf_noise.h"
  25. #if HAVE_INLINE_ASM
  26. static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
  27. const int8_t *noise, int len, int shift)
  28. {
  29. x86_reg mmx_len= len & (~7);
  30. noise += shift;
  31. __asm__ volatile(
  32. "mov %3, %%"FF_REG_a" \n\t"
  33. "pcmpeqb %%mm7, %%mm7 \n\t"
  34. "psllw $15, %%mm7 \n\t"
  35. "packsswb %%mm7, %%mm7 \n\t"
  36. ".p2align 4 \n\t"
  37. "1: \n\t"
  38. "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
  39. "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
  40. "pxor %%mm7, %%mm0 \n\t"
  41. "paddsb %%mm1, %%mm0 \n\t"
  42. "pxor %%mm7, %%mm0 \n\t"
  43. "movq %%mm0, (%2, %%"FF_REG_a") \n\t"
  44. "add $8, %%"FF_REG_a" \n\t"
  45. " js 1b \n\t"
  46. :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
  47. : "%"FF_REG_a
  48. );
  49. if (mmx_len != len)
  50. ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
  51. }
  52. #if HAVE_6REGS
  53. static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
  54. int len, const int8_t * const *shift)
  55. {
  56. x86_reg mmx_len = len & (~7);
  57. __asm__ volatile(
  58. "mov %5, %%"FF_REG_a" \n\t"
  59. ".p2align 4 \n\t"
  60. "1: \n\t"
  61. "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
  62. "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
  63. "paddb (%2, %%"FF_REG_a"), %%mm1\n\t"
  64. "paddb (%3, %%"FF_REG_a"), %%mm1\n\t"
  65. "movq %%mm0, %%mm2 \n\t"
  66. "movq %%mm1, %%mm3 \n\t"
  67. "punpcklbw %%mm0, %%mm0 \n\t"
  68. "punpckhbw %%mm2, %%mm2 \n\t"
  69. "punpcklbw %%mm1, %%mm1 \n\t"
  70. "punpckhbw %%mm3, %%mm3 \n\t"
  71. "pmulhw %%mm0, %%mm1 \n\t"
  72. "pmulhw %%mm2, %%mm3 \n\t"
  73. "paddw %%mm1, %%mm1 \n\t"
  74. "paddw %%mm3, %%mm3 \n\t"
  75. "paddw %%mm0, %%mm1 \n\t"
  76. "paddw %%mm2, %%mm3 \n\t"
  77. "psrlw $8, %%mm1 \n\t"
  78. "psrlw $8, %%mm3 \n\t"
  79. "packuswb %%mm3, %%mm1 \n\t"
  80. "movq %%mm1, (%4, %%"FF_REG_a") \n\t"
  81. "add $8, %%"FF_REG_a" \n\t"
  82. " js 1b \n\t"
  83. :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
  84. "r" (dst+mmx_len), "g" (-mmx_len)
  85. : "%"FF_REG_a
  86. );
  87. if (mmx_len != len){
  88. const int8_t *shift2[3] = { shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len };
  89. ff_line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2);
  90. }
  91. }
  92. #endif /* HAVE_6REGS */
  93. static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
  94. const int8_t *noise, int len, int shift)
  95. {
  96. x86_reg mmx_len = len & (~7);
  97. noise += shift;
  98. __asm__ volatile(
  99. "mov %3, %%"FF_REG_a" \n\t"
  100. "pcmpeqb %%mm7, %%mm7 \n\t"
  101. "psllw $15, %%mm7 \n\t"
  102. "packsswb %%mm7, %%mm7 \n\t"
  103. ".p2align 4 \n\t"
  104. "1: \n\t"
  105. "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
  106. "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
  107. "pxor %%mm7, %%mm0 \n\t"
  108. "paddsb %%mm1, %%mm0 \n\t"
  109. "pxor %%mm7, %%mm0 \n\t"
  110. "movntq %%mm0, (%2, %%"FF_REG_a") \n\t"
  111. "add $8, %%"FF_REG_a" \n\t"
  112. " js 1b \n\t"
  113. :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
  114. : "%"FF_REG_a
  115. );
  116. if (mmx_len != len)
  117. ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
  118. }
  119. #endif /* HAVE_INLINE_ASM */
  120. av_cold void ff_noise_init_x86(NoiseContext *n)
  121. {
  122. #if HAVE_INLINE_ASM
  123. int cpu_flags = av_get_cpu_flags();
  124. if (INLINE_MMX(cpu_flags)) {
  125. n->line_noise = line_noise_mmx;
  126. #if HAVE_6REGS
  127. n->line_noise_avg = line_noise_avg_mmx;
  128. #endif
  129. }
  130. if (INLINE_MMXEXT(cpu_flags)) {
  131. n->line_noise = line_noise_mmxext;
  132. }
  133. #endif
  134. }