2
0

mpegvideoenc.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /*
  2. * The simplest mpeg encoder (well, it was the simplest!)
  3. * Copyright (c) 2000,2001 Fabrice Bellard
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/attributes.h"
  22. #include "libavutil/cpu.h"
  23. #include "libavutil/x86/asm.h"
  24. #include "libavutil/x86/cpu.h"
  25. #include "libavcodec/avcodec.h"
  26. #include "libavcodec/dct.h"
  27. #include "libavcodec/mpegvideo.h"
  28. /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
  29. DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = {
  30. 1, 2, 6, 7, 15, 16, 28, 29,
  31. 3, 5, 8, 14, 17, 27, 30, 43,
  32. 4, 9, 13, 18, 26, 31, 42, 44,
  33. 10, 12, 19, 25, 32, 41, 45, 54,
  34. 11, 20, 24, 33, 40, 46, 53, 55,
  35. 21, 23, 34, 39, 47, 52, 56, 61,
  36. 22, 35, 38, 48, 51, 57, 60, 62,
  37. 36, 37, 49, 50, 58, 59, 63, 64,
  38. };
  39. #if HAVE_6REGS
  40. #if HAVE_MMX_INLINE
  41. #define COMPILE_TEMPLATE_MMXEXT 0
  42. #define COMPILE_TEMPLATE_SSE2 0
  43. #define COMPILE_TEMPLATE_SSSE3 0
  44. #define RENAME(a) a ## _mmx
  45. #define RENAME_FDCT(a) a ## _mmx
  46. #include "mpegvideoenc_template.c"
  47. #endif /* HAVE_MMX_INLINE */
  48. #if HAVE_MMXEXT_INLINE
  49. #undef COMPILE_TEMPLATE_SSSE3
  50. #undef COMPILE_TEMPLATE_SSE2
  51. #undef COMPILE_TEMPLATE_MMXEXT
  52. #define COMPILE_TEMPLATE_MMXEXT 1
  53. #define COMPILE_TEMPLATE_SSE2 0
  54. #define COMPILE_TEMPLATE_SSSE3 0
  55. #undef RENAME
  56. #undef RENAME_FDCT
  57. #define RENAME(a) a ## _mmxext
  58. #define RENAME_FDCT(a) a ## _mmxext
  59. #include "mpegvideoenc_template.c"
  60. #endif /* HAVE_MMXEXT_INLINE */
  61. #if HAVE_SSE2_INLINE
  62. #undef COMPILE_TEMPLATE_MMXEXT
  63. #undef COMPILE_TEMPLATE_SSE2
  64. #undef COMPILE_TEMPLATE_SSSE3
  65. #define COMPILE_TEMPLATE_MMXEXT 0
  66. #define COMPILE_TEMPLATE_SSE2 1
  67. #define COMPILE_TEMPLATE_SSSE3 0
  68. #undef RENAME
  69. #undef RENAME_FDCT
  70. #define RENAME(a) a ## _sse2
  71. #define RENAME_FDCT(a) a ## _sse2
  72. #include "mpegvideoenc_template.c"
  73. #endif /* HAVE_SSE2_INLINE */
  74. #if HAVE_SSSE3_INLINE
  75. #undef COMPILE_TEMPLATE_MMXEXT
  76. #undef COMPILE_TEMPLATE_SSE2
  77. #undef COMPILE_TEMPLATE_SSSE3
  78. #define COMPILE_TEMPLATE_MMXEXT 0
  79. #define COMPILE_TEMPLATE_SSE2 1
  80. #define COMPILE_TEMPLATE_SSSE3 1
  81. #undef RENAME
  82. #undef RENAME_FDCT
  83. #define RENAME(a) a ## _ssse3
  84. #define RENAME_FDCT(a) a ## _sse2
  85. #include "mpegvideoenc_template.c"
  86. #endif /* HAVE_SSSE3_INLINE */
  87. #endif /* HAVE_6REGS */
  88. #if HAVE_INLINE_ASM
  89. #if HAVE_MMX_INLINE
  90. static void denoise_dct_mmx(MpegEncContext *s, int16_t *block){
  91. const int intra= s->mb_intra;
  92. int *sum= s->dct_error_sum[intra];
  93. uint16_t *offset= s->dct_offset[intra];
  94. s->dct_count[intra]++;
  95. __asm__ volatile(
  96. "pxor %%mm7, %%mm7 \n\t"
  97. "1: \n\t"
  98. "pxor %%mm0, %%mm0 \n\t"
  99. "pxor %%mm1, %%mm1 \n\t"
  100. "movq (%0), %%mm2 \n\t"
  101. "movq 8(%0), %%mm3 \n\t"
  102. "pcmpgtw %%mm2, %%mm0 \n\t"
  103. "pcmpgtw %%mm3, %%mm1 \n\t"
  104. "pxor %%mm0, %%mm2 \n\t"
  105. "pxor %%mm1, %%mm3 \n\t"
  106. "psubw %%mm0, %%mm2 \n\t"
  107. "psubw %%mm1, %%mm3 \n\t"
  108. "movq %%mm2, %%mm4 \n\t"
  109. "movq %%mm3, %%mm5 \n\t"
  110. "psubusw (%2), %%mm2 \n\t"
  111. "psubusw 8(%2), %%mm3 \n\t"
  112. "pxor %%mm0, %%mm2 \n\t"
  113. "pxor %%mm1, %%mm3 \n\t"
  114. "psubw %%mm0, %%mm2 \n\t"
  115. "psubw %%mm1, %%mm3 \n\t"
  116. "movq %%mm2, (%0) \n\t"
  117. "movq %%mm3, 8(%0) \n\t"
  118. "movq %%mm4, %%mm2 \n\t"
  119. "movq %%mm5, %%mm3 \n\t"
  120. "punpcklwd %%mm7, %%mm4 \n\t"
  121. "punpckhwd %%mm7, %%mm2 \n\t"
  122. "punpcklwd %%mm7, %%mm5 \n\t"
  123. "punpckhwd %%mm7, %%mm3 \n\t"
  124. "paddd (%1), %%mm4 \n\t"
  125. "paddd 8(%1), %%mm2 \n\t"
  126. "paddd 16(%1), %%mm5 \n\t"
  127. "paddd 24(%1), %%mm3 \n\t"
  128. "movq %%mm4, (%1) \n\t"
  129. "movq %%mm2, 8(%1) \n\t"
  130. "movq %%mm5, 16(%1) \n\t"
  131. "movq %%mm3, 24(%1) \n\t"
  132. "add $16, %0 \n\t"
  133. "add $32, %1 \n\t"
  134. "add $16, %2 \n\t"
  135. "cmp %3, %0 \n\t"
  136. " jb 1b \n\t"
  137. : "+r" (block), "+r" (sum), "+r" (offset)
  138. : "r"(block+64)
  139. );
  140. }
  141. #endif /* HAVE_MMX_INLINE */
  142. #if HAVE_SSE2_INLINE
  143. static void denoise_dct_sse2(MpegEncContext *s, int16_t *block){
  144. const int intra= s->mb_intra;
  145. int *sum= s->dct_error_sum[intra];
  146. uint16_t *offset= s->dct_offset[intra];
  147. s->dct_count[intra]++;
  148. __asm__ volatile(
  149. "pxor %%xmm7, %%xmm7 \n\t"
  150. "1: \n\t"
  151. "pxor %%xmm0, %%xmm0 \n\t"
  152. "pxor %%xmm1, %%xmm1 \n\t"
  153. "movdqa (%0), %%xmm2 \n\t"
  154. "movdqa 16(%0), %%xmm3 \n\t"
  155. "pcmpgtw %%xmm2, %%xmm0 \n\t"
  156. "pcmpgtw %%xmm3, %%xmm1 \n\t"
  157. "pxor %%xmm0, %%xmm2 \n\t"
  158. "pxor %%xmm1, %%xmm3 \n\t"
  159. "psubw %%xmm0, %%xmm2 \n\t"
  160. "psubw %%xmm1, %%xmm3 \n\t"
  161. "movdqa %%xmm2, %%xmm4 \n\t"
  162. "movdqa %%xmm3, %%xmm5 \n\t"
  163. "psubusw (%2), %%xmm2 \n\t"
  164. "psubusw 16(%2), %%xmm3 \n\t"
  165. "pxor %%xmm0, %%xmm2 \n\t"
  166. "pxor %%xmm1, %%xmm3 \n\t"
  167. "psubw %%xmm0, %%xmm2 \n\t"
  168. "psubw %%xmm1, %%xmm3 \n\t"
  169. "movdqa %%xmm2, (%0) \n\t"
  170. "movdqa %%xmm3, 16(%0) \n\t"
  171. "movdqa %%xmm4, %%xmm6 \n\t"
  172. "movdqa %%xmm5, %%xmm0 \n\t"
  173. "punpcklwd %%xmm7, %%xmm4 \n\t"
  174. "punpckhwd %%xmm7, %%xmm6 \n\t"
  175. "punpcklwd %%xmm7, %%xmm5 \n\t"
  176. "punpckhwd %%xmm7, %%xmm0 \n\t"
  177. "paddd (%1), %%xmm4 \n\t"
  178. "paddd 16(%1), %%xmm6 \n\t"
  179. "paddd 32(%1), %%xmm5 \n\t"
  180. "paddd 48(%1), %%xmm0 \n\t"
  181. "movdqa %%xmm4, (%1) \n\t"
  182. "movdqa %%xmm6, 16(%1) \n\t"
  183. "movdqa %%xmm5, 32(%1) \n\t"
  184. "movdqa %%xmm0, 48(%1) \n\t"
  185. "add $32, %0 \n\t"
  186. "add $64, %1 \n\t"
  187. "add $32, %2 \n\t"
  188. "cmp %3, %0 \n\t"
  189. " jb 1b \n\t"
  190. : "+r" (block), "+r" (sum), "+r" (offset)
  191. : "r"(block+64)
  192. XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
  193. "%xmm4", "%xmm5", "%xmm6", "%xmm7")
  194. );
  195. }
  196. #endif /* HAVE_SSE2_INLINE */
  197. #endif /* HAVE_INLINE_ASM */
  198. av_cold void ff_dct_encode_init_x86(MpegEncContext *s)
  199. {
  200. const int dct_algo = s->avctx->dct_algo;
  201. if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
  202. #if HAVE_MMX_INLINE
  203. int cpu_flags = av_get_cpu_flags();
  204. if (INLINE_MMX(cpu_flags)) {
  205. #if HAVE_6REGS
  206. s->dct_quantize = dct_quantize_mmx;
  207. #endif
  208. s->denoise_dct = denoise_dct_mmx;
  209. }
  210. #endif
  211. #if HAVE_6REGS && HAVE_MMXEXT_INLINE
  212. if (INLINE_MMXEXT(cpu_flags))
  213. s->dct_quantize = dct_quantize_mmxext;
  214. #endif
  215. #if HAVE_SSE2_INLINE
  216. if (INLINE_SSE2(cpu_flags)) {
  217. #if HAVE_6REGS
  218. s->dct_quantize = dct_quantize_sse2;
  219. #endif
  220. s->denoise_dct = denoise_dct_sse2;
  221. }
  222. #endif
  223. #if HAVE_6REGS && HAVE_SSSE3_INLINE
  224. if (INLINE_SSSE3(cpu_flags))
  225. s->dct_quantize = dct_quantize_ssse3;
  226. #endif
  227. }
  228. }