cavsdsp.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. /*
  2. * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
  3. * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
  4. *
  5. * MMX-optimized DSP functions, based on H.264 optimizations by
  6. * Michael Niedermayer and Loren Merritt
  7. *
  8. * This file is part of FFmpeg.
  9. *
  10. * FFmpeg is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * FFmpeg is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with FFmpeg; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. #include "libavutil/attributes.h"
  25. #include "libavutil/common.h"
  26. #include "libavutil/cpu.h"
  27. #include "libavutil/x86/asm.h"
  28. #include "libavutil/x86/cpu.h"
  29. #include "libavcodec/cavsdsp.h"
  30. #include "libavcodec/idctdsp.h"
  31. #include "constants.h"
  32. #include "fpel.h"
  33. #include "idctdsp.h"
  34. #include "config.h"
  35. #if HAVE_MMX_EXTERNAL
  36. void ff_cavs_idct8_mmx(int16_t *out, const int16_t *in);
  37. static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride)
  38. {
  39. LOCAL_ALIGNED(16, int16_t, b2, [64]);
  40. ff_cavs_idct8_mmx(b2, block);
  41. ff_add_pixels_clamped_mmx(b2, dst, stride);
  42. }
  43. void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in);
  44. static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride)
  45. {
  46. LOCAL_ALIGNED(16, int16_t, b2, [64]);
  47. ff_cavs_idct8_sse2(b2, block);
  48. ff_add_pixels_clamped_sse2(b2, dst, stride);
  49. }
  50. #endif /* HAVE_MMX_EXTERNAL */
  51. #if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE)
  52. /*****************************************************************************
  53. *
  54. * motion compensation
  55. *
  56. ****************************************************************************/
  57. /* vertical filter [-1 -2 96 42 -7 0] */
  58. #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
  59. "movd (%0), "#F" \n\t"\
  60. "movq "#C", %%mm6 \n\t"\
  61. "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
  62. "movq "#D", %%mm7 \n\t"\
  63. "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
  64. "psllw $3, "#E" \n\t"\
  65. "psubw "#E", %%mm6 \n\t"\
  66. "psraw $3, "#E" \n\t"\
  67. "paddw %%mm7, %%mm6 \n\t"\
  68. "paddw "#E", %%mm6 \n\t"\
  69. "paddw "#B", "#B" \n\t"\
  70. "pxor %%mm7, %%mm7 \n\t"\
  71. "add %2, %0 \n\t"\
  72. "punpcklbw %%mm7, "#F" \n\t"\
  73. "psubw "#B", %%mm6 \n\t"\
  74. "psraw $1, "#B" \n\t"\
  75. "psubw "#A", %%mm6 \n\t"\
  76. "paddw "MANGLE(ADD)", %%mm6 \n\t"\
  77. "psraw $7, %%mm6 \n\t"\
  78. "packuswb %%mm6, %%mm6 \n\t"\
  79. OP(%%mm6, (%1), A, d) \
  80. "add %3, %1 \n\t"
  81. /* vertical filter [ 0 -1 5 5 -1 0] */
  82. #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
  83. "movd (%0), "#F" \n\t"\
  84. "movq "#C", %%mm6 \n\t"\
  85. "paddw "#D", %%mm6 \n\t"\
  86. "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
  87. "add %2, %0 \n\t"\
  88. "punpcklbw %%mm7, "#F" \n\t"\
  89. "psubw "#B", %%mm6 \n\t"\
  90. "psubw "#E", %%mm6 \n\t"\
  91. "paddw "MANGLE(ADD)", %%mm6 \n\t"\
  92. "psraw $3, %%mm6 \n\t"\
  93. "packuswb %%mm6, %%mm6 \n\t"\
  94. OP(%%mm6, (%1), A, d) \
  95. "add %3, %1 \n\t"
  96. /* vertical filter [ 0 -7 42 96 -2 -1] */
  97. #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
  98. "movd (%0), "#F" \n\t"\
  99. "movq "#C", %%mm6 \n\t"\
  100. "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
  101. "movq "#D", %%mm7 \n\t"\
  102. "pmullw "MANGLE(MUL1)", %%mm7\n\t"\
  103. "psllw $3, "#B" \n\t"\
  104. "psubw "#B", %%mm6 \n\t"\
  105. "psraw $3, "#B" \n\t"\
  106. "paddw %%mm7, %%mm6 \n\t"\
  107. "paddw "#B", %%mm6 \n\t"\
  108. "paddw "#E", "#E" \n\t"\
  109. "pxor %%mm7, %%mm7 \n\t"\
  110. "add %2, %0 \n\t"\
  111. "punpcklbw %%mm7, "#F" \n\t"\
  112. "psubw "#E", %%mm6 \n\t"\
  113. "psraw $1, "#E" \n\t"\
  114. "psubw "#F", %%mm6 \n\t"\
  115. "paddw "MANGLE(ADD)", %%mm6 \n\t"\
  116. "psraw $7, %%mm6 \n\t"\
  117. "packuswb %%mm6, %%mm6 \n\t"\
  118. OP(%%mm6, (%1), A, d) \
  119. "add %3, %1 \n\t"
  120. #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
  121. int w= 2;\
  122. src -= 2*srcStride;\
  123. \
  124. while(w--){\
  125. __asm__ volatile(\
  126. "pxor %%mm7, %%mm7 \n\t"\
  127. "movd (%0), %%mm0 \n\t"\
  128. "add %2, %0 \n\t"\
  129. "movd (%0), %%mm1 \n\t"\
  130. "add %2, %0 \n\t"\
  131. "movd (%0), %%mm2 \n\t"\
  132. "add %2, %0 \n\t"\
  133. "movd (%0), %%mm3 \n\t"\
  134. "add %2, %0 \n\t"\
  135. "movd (%0), %%mm4 \n\t"\
  136. "add %2, %0 \n\t"\
  137. "punpcklbw %%mm7, %%mm0 \n\t"\
  138. "punpcklbw %%mm7, %%mm1 \n\t"\
  139. "punpcklbw %%mm7, %%mm2 \n\t"\
  140. "punpcklbw %%mm7, %%mm3 \n\t"\
  141. "punpcklbw %%mm7, %%mm4 \n\t"\
  142. VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
  143. VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
  144. VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
  145. VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
  146. VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
  147. VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
  148. VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
  149. VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
  150. \
  151. : "+a"(src), "+c"(dst)\
  152. : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
  153. NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
  154. : "memory"\
  155. );\
  156. if(h==16){\
  157. __asm__ volatile(\
  158. VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
  159. VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
  160. VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
  161. VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
  162. VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
  163. VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
  164. VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
  165. VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
  166. \
  167. : "+a"(src), "+c"(dst)\
  168. : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
  169. NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
  170. : "memory"\
  171. );\
  172. }\
  173. src += 4-(h+5)*srcStride;\
  174. dst += 4-h*dstStride;\
  175. }
  176. #define QPEL_CAVS(OPNAME, OP, MMX)\
  177. static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
  178. {\
  179. int h=8;\
  180. __asm__ volatile(\
  181. "pxor %%mm7, %%mm7 \n\t"\
  182. "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
  183. "1: \n\t"\
  184. "movq (%0), %%mm0 \n\t"\
  185. "movq 1(%0), %%mm2 \n\t"\
  186. "movq %%mm0, %%mm1 \n\t"\
  187. "movq %%mm2, %%mm3 \n\t"\
  188. "punpcklbw %%mm7, %%mm0 \n\t"\
  189. "punpckhbw %%mm7, %%mm1 \n\t"\
  190. "punpcklbw %%mm7, %%mm2 \n\t"\
  191. "punpckhbw %%mm7, %%mm3 \n\t"\
  192. "paddw %%mm2, %%mm0 \n\t"\
  193. "paddw %%mm3, %%mm1 \n\t"\
  194. "pmullw %%mm6, %%mm0 \n\t"\
  195. "pmullw %%mm6, %%mm1 \n\t"\
  196. "movq -1(%0), %%mm2 \n\t"\
  197. "movq 2(%0), %%mm4 \n\t"\
  198. "movq %%mm2, %%mm3 \n\t"\
  199. "movq %%mm4, %%mm5 \n\t"\
  200. "punpcklbw %%mm7, %%mm2 \n\t"\
  201. "punpckhbw %%mm7, %%mm3 \n\t"\
  202. "punpcklbw %%mm7, %%mm4 \n\t"\
  203. "punpckhbw %%mm7, %%mm5 \n\t"\
  204. "paddw %%mm4, %%mm2 \n\t"\
  205. "paddw %%mm3, %%mm5 \n\t"\
  206. "psubw %%mm2, %%mm0 \n\t"\
  207. "psubw %%mm5, %%mm1 \n\t"\
  208. "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\
  209. "paddw %%mm5, %%mm0 \n\t"\
  210. "paddw %%mm5, %%mm1 \n\t"\
  211. "psraw $3, %%mm0 \n\t"\
  212. "psraw $3, %%mm1 \n\t"\
  213. "packuswb %%mm1, %%mm0 \n\t"\
  214. OP(%%mm0, (%1),%%mm5, q) \
  215. "add %3, %0 \n\t"\
  216. "add %4, %1 \n\t"\
  217. "decl %2 \n\t"\
  218. " jnz 1b \n\t"\
  219. : "+a"(src), "+c"(dst), "+m"(h)\
  220. : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
  221. NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\
  222. : "memory"\
  223. );\
  224. }\
  225. \
  226. static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
  227. { \
  228. QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
  229. }\
  230. \
  231. static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
  232. { \
  233. QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42) \
  234. }\
  235. \
  236. static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
  237. { \
  238. QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
  239. }\
  240. \
  241. static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
  242. { \
  243. OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
  244. }\
  245. static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
  246. { \
  247. OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
  248. OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
  249. }\
  250. \
  251. static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
  252. { \
  253. OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
  254. }\
  255. static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
  256. { \
  257. OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
  258. OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
  259. }\
  260. \
  261. static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
  262. { \
  263. OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
  264. }\
  265. static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
  266. { \
  267. OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
  268. OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
  269. }\
  270. \
  271. static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
  272. { \
  273. OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
  274. OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
  275. src += 8*srcStride;\
  276. dst += 8*dstStride;\
  277. OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
  278. OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
  279. }\
  280. #define CAVS_MC(OPNAME, SIZE, MMX) \
  281. static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
  282. {\
  283. OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
  284. }\
  285. \
  286. static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
  287. {\
  288. OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
  289. }\
  290. \
  291. static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
  292. {\
  293. OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
  294. }\
  295. \
  296. static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
  297. {\
  298. OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
  299. }\
  300. #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
  301. #define AVG_3DNOW_OP(a,b,temp, size) \
  302. "mov" #size " " #b ", " #temp " \n\t"\
  303. "pavgusb " #temp ", " #a " \n\t"\
  304. "mov" #size " " #a ", " #b " \n\t"
  305. #define AVG_MMXEXT_OP(a, b, temp, size) \
  306. "mov" #size " " #b ", " #temp " \n\t"\
  307. "pavgb " #temp ", " #a " \n\t"\
  308. "mov" #size " " #a ", " #b " \n\t"
  309. #endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
  310. #if HAVE_MMX_EXTERNAL
  311. static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
  312. ptrdiff_t stride)
  313. {
  314. ff_put_pixels8_mmx(dst, src, stride, 8);
  315. }
  316. static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
  317. ptrdiff_t stride)
  318. {
  319. ff_avg_pixels8_mmx(dst, src, stride, 8);
  320. }
  321. static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src,
  322. ptrdiff_t stride)
  323. {
  324. ff_avg_pixels8_mmxext(dst, src, stride, 8);
  325. }
  326. static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
  327. ptrdiff_t stride)
  328. {
  329. ff_put_pixels16_mmx(dst, src, stride, 16);
  330. }
  331. static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
  332. ptrdiff_t stride)
  333. {
  334. ff_avg_pixels16_mmx(dst, src, stride, 16);
  335. }
  336. static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, const uint8_t *src,
  337. ptrdiff_t stride)
  338. {
  339. ff_avg_pixels16_mmxext(dst, src, stride, 16);
  340. }
  341. static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
  342. ptrdiff_t stride)
  343. {
  344. ff_put_pixels16_sse2(dst, src, stride, 16);
  345. }
  346. static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
  347. ptrdiff_t stride)
  348. {
  349. ff_avg_pixels16_sse2(dst, src, stride, 16);
  350. }
  351. #endif
  352. static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c,
  353. AVCodecContext *avctx)
  354. {
  355. #if HAVE_MMX_EXTERNAL
  356. c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx;
  357. c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
  358. c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx;
  359. c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx;
  360. c->cavs_idct8_add = cavs_idct8_add_mmx;
  361. c->idct_perm = FF_IDCT_PERM_TRANSPOSE;
  362. #endif /* HAVE_MMX_EXTERNAL */
  363. }
  364. #define DSPFUNC(PFX, IDX, NUM, EXT) \
  365. c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \
  366. c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \
  367. c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \
  368. c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \
  369. #if HAVE_MMXEXT_INLINE
  370. QPEL_CAVS(put_, PUT_OP, mmxext)
  371. QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
  372. CAVS_MC(put_, 8, mmxext)
  373. CAVS_MC(put_, 16, mmxext)
  374. CAVS_MC(avg_, 8, mmxext)
  375. CAVS_MC(avg_, 16, mmxext)
  376. #endif /* HAVE_MMXEXT_INLINE */
  377. #if HAVE_AMD3DNOW_INLINE
  378. QPEL_CAVS(put_, PUT_OP, 3dnow)
  379. QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
  380. CAVS_MC(put_, 8, 3dnow)
  381. CAVS_MC(put_, 16,3dnow)
  382. CAVS_MC(avg_, 8, 3dnow)
  383. CAVS_MC(avg_, 16,3dnow)
  384. static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c,
  385. AVCodecContext *avctx)
  386. {
  387. DSPFUNC(put, 0, 16, 3dnow);
  388. DSPFUNC(put, 1, 8, 3dnow);
  389. DSPFUNC(avg, 0, 16, 3dnow);
  390. DSPFUNC(avg, 1, 8, 3dnow);
  391. }
  392. #endif /* HAVE_AMD3DNOW_INLINE */
  393. av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
  394. {
  395. av_unused int cpu_flags = av_get_cpu_flags();
  396. if (X86_MMX(cpu_flags))
  397. cavsdsp_init_mmx(c, avctx);
  398. #if HAVE_AMD3DNOW_INLINE
  399. if (INLINE_AMD3DNOW(cpu_flags))
  400. cavsdsp_init_3dnow(c, avctx);
  401. #endif /* HAVE_AMD3DNOW_INLINE */
  402. #if HAVE_MMXEXT_INLINE
  403. if (INLINE_MMXEXT(cpu_flags)) {
  404. DSPFUNC(put, 0, 16, mmxext);
  405. DSPFUNC(put, 1, 8, mmxext);
  406. DSPFUNC(avg, 0, 16, mmxext);
  407. DSPFUNC(avg, 1, 8, mmxext);
  408. }
  409. #endif
  410. #if HAVE_MMX_EXTERNAL
  411. if (EXTERNAL_MMXEXT(cpu_flags)) {
  412. c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext;
  413. c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
  414. }
  415. #endif
  416. #if HAVE_SSE2_EXTERNAL
  417. if (EXTERNAL_SSE2(cpu_flags)) {
  418. c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
  419. c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
  420. c->cavs_idct8_add = cavs_idct8_add_sse2;
  421. c->idct_perm = FF_IDCT_PERM_TRANSPOSE;
  422. }
  423. #endif
  424. }