2
0

qpeldsp_init.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. /*
  2. * quarterpel DSP functions
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include <stddef.h>
  23. #include <stdint.h>
  24. #include "config.h"
  25. #include "libavutil/attributes.h"
  26. #include "libavutil/cpu.h"
  27. #include "libavutil/x86/cpu.h"
  28. #include "libavcodec/pixels.h"
  29. #include "libavcodec/qpeldsp.h"
  30. #include "fpel.h"
  31. void ff_put_pixels8_l2_mmxext(uint8_t *dst,
  32. const uint8_t *src1, const uint8_t *src2,
  33. int dstStride, int src1Stride, int h);
  34. void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
  35. const uint8_t *src1, const uint8_t *src2,
  36. int dstStride, int src1Stride, int h);
  37. void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
  38. const uint8_t *src1, const uint8_t *src2,
  39. int dstStride, int src1Stride, int h);
  40. void ff_put_pixels16_l2_mmxext(uint8_t *dst,
  41. const uint8_t *src1, const uint8_t *src2,
  42. int dstStride, int src1Stride, int h);
  43. void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
  44. const uint8_t *src1, const uint8_t *src2,
  45. int dstStride, int src1Stride, int h);
  46. void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
  47. const uint8_t *src1, const uint8_t *src2,
  48. int dstStride, int src1Stride, int h);
  49. void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  50. int dstStride, int srcStride, int h);
  51. void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  52. int dstStride, int srcStride, int h);
  53. void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
  54. const uint8_t *src,
  55. int dstStride, int srcStride,
  56. int h);
  57. void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  58. int dstStride, int srcStride, int h);
  59. void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  60. int dstStride, int srcStride, int h);
  61. void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
  62. const uint8_t *src,
  63. int dstStride, int srcStride,
  64. int h);
  65. void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  66. int dstStride, int srcStride);
  67. void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  68. int dstStride, int srcStride);
  69. void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
  70. const uint8_t *src,
  71. int dstStride, int srcStride);
  72. void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  73. int dstStride, int srcStride);
  74. void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
  75. int dstStride, int srcStride);
  76. void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
  77. const uint8_t *src,
  78. int dstStride, int srcStride);
  79. #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
  80. #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
  81. #if HAVE_X86ASM
  82. #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
  83. #define ff_put_pixels8_mmxext ff_put_pixels8_mmx
  84. #define QPEL_OP(OPNAME, RND, MMX) \
  85. static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
  86. const uint8_t *src, \
  87. ptrdiff_t stride) \
  88. { \
  89. ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
  90. } \
  91. \
  92. static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
  93. const uint8_t *src, \
  94. ptrdiff_t stride) \
  95. { \
  96. uint64_t temp[8]; \
  97. uint8_t *const half = (uint8_t *) temp; \
  98. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
  99. stride, 8); \
  100. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
  101. stride, stride, 8); \
  102. } \
  103. \
  104. static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
  105. const uint8_t *src, \
  106. ptrdiff_t stride) \
  107. { \
  108. ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
  109. stride, 8); \
  110. } \
  111. \
  112. static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
  113. const uint8_t *src, \
  114. ptrdiff_t stride) \
  115. { \
  116. uint64_t temp[8]; \
  117. uint8_t *const half = (uint8_t *) temp; \
  118. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
  119. stride, 8); \
  120. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
  121. stride, 8); \
  122. } \
  123. \
  124. static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
  125. const uint8_t *src, \
  126. ptrdiff_t stride) \
  127. { \
  128. uint64_t temp[8]; \
  129. uint8_t *const half = (uint8_t *) temp; \
  130. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
  131. 8, stride); \
  132. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
  133. stride, stride, 8); \
  134. } \
  135. \
  136. static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
  137. const uint8_t *src, \
  138. ptrdiff_t stride) \
  139. { \
  140. ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
  141. stride, stride); \
  142. } \
  143. \
  144. static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
  145. const uint8_t *src, \
  146. ptrdiff_t stride) \
  147. { \
  148. uint64_t temp[8]; \
  149. uint8_t *const half = (uint8_t *) temp; \
  150. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
  151. 8, stride); \
  152. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
  153. stride, 8); \
  154. } \
  155. \
  156. static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
  157. const uint8_t *src, \
  158. ptrdiff_t stride) \
  159. { \
  160. uint64_t half[8 + 9]; \
  161. uint8_t *const halfH = (uint8_t *) half + 64; \
  162. uint8_t *const halfHV = (uint8_t *) half; \
  163. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  164. stride, 9); \
  165. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
  166. stride, 9); \
  167. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  168. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
  169. stride, 8, 8); \
  170. } \
  171. \
  172. static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
  173. const uint8_t *src, \
  174. ptrdiff_t stride) \
  175. { \
  176. uint64_t half[8 + 9]; \
  177. uint8_t *const halfH = (uint8_t *) half + 64; \
  178. uint8_t *const halfHV = (uint8_t *) half; \
  179. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  180. stride, 9); \
  181. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
  182. stride, 9); \
  183. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  184. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
  185. stride, 8, 8); \
  186. } \
  187. \
  188. static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
  189. const uint8_t *src, \
  190. ptrdiff_t stride) \
  191. { \
  192. uint64_t half[8 + 9]; \
  193. uint8_t *const halfH = (uint8_t *) half + 64; \
  194. uint8_t *const halfHV = (uint8_t *) half; \
  195. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  196. stride, 9); \
  197. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
  198. stride, 9); \
  199. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  200. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
  201. stride, 8, 8); \
  202. } \
  203. \
  204. static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
  205. const uint8_t *src, \
  206. ptrdiff_t stride) \
  207. { \
  208. uint64_t half[8 + 9]; \
  209. uint8_t *const halfH = (uint8_t *) half + 64; \
  210. uint8_t *const halfHV = (uint8_t *) half; \
  211. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  212. stride, 9); \
  213. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
  214. stride, 9); \
  215. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  216. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
  217. stride, 8, 8); \
  218. } \
  219. \
  220. static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
  221. const uint8_t *src, \
  222. ptrdiff_t stride) \
  223. { \
  224. uint64_t half[8 + 9]; \
  225. uint8_t *const halfH = (uint8_t *) half + 64; \
  226. uint8_t *const halfHV = (uint8_t *) half; \
  227. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  228. stride, 9); \
  229. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  230. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
  231. stride, 8, 8); \
  232. } \
  233. \
  234. static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
  235. const uint8_t *src, \
  236. ptrdiff_t stride) \
  237. { \
  238. uint64_t half[8 + 9]; \
  239. uint8_t *const halfH = (uint8_t *) half + 64; \
  240. uint8_t *const halfHV = (uint8_t *) half; \
  241. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  242. stride, 9); \
  243. ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
  244. ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
  245. stride, 8, 8); \
  246. } \
  247. \
  248. static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
  249. const uint8_t *src, \
  250. ptrdiff_t stride) \
  251. { \
  252. uint64_t half[8 + 9]; \
  253. uint8_t *const halfH = (uint8_t *) half; \
  254. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  255. stride, 9); \
  256. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
  257. 8, stride, 9); \
  258. ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
  259. stride, 8); \
  260. } \
  261. \
  262. static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
  263. const uint8_t *src, \
  264. ptrdiff_t stride) \
  265. { \
  266. uint64_t half[8 + 9]; \
  267. uint8_t *const halfH = (uint8_t *) half; \
  268. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  269. stride, 9); \
  270. ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
  271. stride, 9); \
  272. ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
  273. stride, 8); \
  274. } \
  275. \
  276. static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
  277. const uint8_t *src, \
  278. ptrdiff_t stride) \
  279. { \
  280. uint64_t half[9]; \
  281. uint8_t *const halfH = (uint8_t *) half; \
  282. ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
  283. stride, 9); \
  284. ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
  285. stride, 8); \
  286. } \
  287. \
  288. static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
  289. const uint8_t *src, \
  290. ptrdiff_t stride) \
  291. { \
  292. ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
  293. } \
  294. \
  295. static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
  296. const uint8_t *src, \
  297. ptrdiff_t stride) \
  298. { \
  299. uint64_t temp[32]; \
  300. uint8_t *const half = (uint8_t *) temp; \
  301. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
  302. stride, 16); \
  303. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
  304. stride, 16); \
  305. } \
  306. \
  307. static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
  308. const uint8_t *src, \
  309. ptrdiff_t stride) \
  310. { \
  311. ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
  312. stride, stride, 16);\
  313. } \
  314. \
  315. static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
  316. const uint8_t *src, \
  317. ptrdiff_t stride) \
  318. { \
  319. uint64_t temp[32]; \
  320. uint8_t *const half = (uint8_t*) temp; \
  321. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
  322. stride, 16); \
  323. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
  324. stride, stride, 16); \
  325. } \
  326. \
  327. static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
  328. const uint8_t *src, \
  329. ptrdiff_t stride) \
  330. { \
  331. uint64_t temp[32]; \
  332. uint8_t *const half = (uint8_t *) temp; \
  333. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
  334. stride); \
  335. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
  336. stride, 16); \
  337. } \
  338. \
  339. static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
  340. const uint8_t *src, \
  341. ptrdiff_t stride) \
  342. { \
  343. ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
  344. stride, stride); \
  345. } \
  346. \
  347. static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
  348. const uint8_t *src, \
  349. ptrdiff_t stride) \
  350. { \
  351. uint64_t temp[32]; \
  352. uint8_t *const half = (uint8_t *) temp; \
  353. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
  354. stride); \
  355. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
  356. stride, stride, 16); \
  357. } \
  358. \
  359. static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
  360. const uint8_t *src, \
  361. ptrdiff_t stride) \
  362. { \
  363. uint64_t half[16 * 2 + 17 * 2]; \
  364. uint8_t *const halfH = (uint8_t *) half + 256; \
  365. uint8_t *const halfHV = (uint8_t *) half; \
  366. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  367. stride, 17); \
  368. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
  369. stride, 17); \
  370. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  371. 16, 16); \
  372. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
  373. stride, 16, 16); \
  374. } \
  375. \
  376. static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
  377. const uint8_t *src, \
  378. ptrdiff_t stride) \
  379. { \
  380. uint64_t half[16 * 2 + 17 * 2]; \
  381. uint8_t *const halfH = (uint8_t *) half + 256; \
  382. uint8_t *const halfHV = (uint8_t *) half; \
  383. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  384. stride, 17); \
  385. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
  386. stride, 17); \
  387. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  388. 16, 16); \
  389. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
  390. stride, 16, 16); \
  391. } \
  392. \
  393. static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
  394. const uint8_t *src, \
  395. ptrdiff_t stride) \
  396. { \
  397. uint64_t half[16 * 2 + 17 * 2]; \
  398. uint8_t *const halfH = (uint8_t *) half + 256; \
  399. uint8_t *const halfHV = (uint8_t *) half; \
  400. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  401. stride, 17); \
  402. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
  403. stride, 17); \
  404. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  405. 16, 16); \
  406. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
  407. stride, 16, 16); \
  408. } \
  409. \
  410. static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
  411. const uint8_t *src, \
  412. ptrdiff_t stride) \
  413. { \
  414. uint64_t half[16 * 2 + 17 * 2]; \
  415. uint8_t *const halfH = (uint8_t *) half + 256; \
  416. uint8_t *const halfHV = (uint8_t *) half; \
  417. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  418. stride, 17); \
  419. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
  420. stride, 17); \
  421. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  422. 16, 16); \
  423. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
  424. stride, 16, 16); \
  425. } \
  426. \
  427. static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
  428. const uint8_t *src, \
  429. ptrdiff_t stride) \
  430. { \
  431. uint64_t half[16 * 2 + 17 * 2]; \
  432. uint8_t *const halfH = (uint8_t *) half + 256; \
  433. uint8_t *const halfHV = (uint8_t *) half; \
  434. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  435. stride, 17); \
  436. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  437. 16, 16); \
  438. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
  439. stride, 16, 16); \
  440. } \
  441. \
  442. static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
  443. const uint8_t *src, \
  444. ptrdiff_t stride) \
  445. { \
  446. uint64_t half[16 * 2 + 17 * 2]; \
  447. uint8_t *const halfH = (uint8_t *) half + 256; \
  448. uint8_t *const halfHV = (uint8_t *) half; \
  449. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  450. stride, 17); \
  451. ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
  452. 16, 16); \
  453. ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
  454. stride, 16, 16); \
  455. } \
  456. \
  457. static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
  458. const uint8_t *src, \
  459. ptrdiff_t stride) \
  460. { \
  461. uint64_t half[17 * 2]; \
  462. uint8_t *const halfH = (uint8_t *) half; \
  463. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  464. stride, 17); \
  465. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
  466. stride, 17); \
  467. ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
  468. stride, 16); \
  469. } \
  470. \
  471. static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
  472. const uint8_t *src, \
  473. ptrdiff_t stride) \
  474. { \
  475. uint64_t half[17 * 2]; \
  476. uint8_t *const halfH = (uint8_t *) half; \
  477. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  478. stride, 17); \
  479. ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
  480. stride, 17); \
  481. ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
  482. stride, 16); \
  483. } \
  484. \
  485. static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
  486. const uint8_t *src, \
  487. ptrdiff_t stride) \
  488. { \
  489. uint64_t half[17 * 2]; \
  490. uint8_t *const halfH = (uint8_t *) half; \
  491. ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
  492. stride, 17); \
  493. ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
  494. stride, 16); \
  495. }
  496. QPEL_OP(put_, _, mmxext)
  497. QPEL_OP(avg_, _, mmxext)
  498. QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
  499. #endif /* HAVE_X86ASM */
  500. #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
  501. do { \
  502. c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
  503. c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
  504. c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
  505. c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
  506. c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
  507. c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
  508. c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
  509. c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
  510. c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
  511. c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
  512. c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
  513. c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
  514. c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
  515. c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
  516. c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
  517. c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
  518. } while (0)
  519. av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
  520. {
  521. int cpu_flags = av_get_cpu_flags();
  522. if (X86_MMXEXT(cpu_flags)) {
  523. #if HAVE_MMXEXT_EXTERNAL
  524. SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
  525. SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
  526. SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
  527. SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
  528. SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
  529. SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
  530. #endif /* HAVE_MMXEXT_EXTERNAL */
  531. }
  532. }