dirac_dwt_init.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. /*
  2. * x86 optimized discrete wavelet transform
  3. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  4. * Copyright (c) 2010 David Conrad
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "libavutil/x86/asm.h"
  23. #include "libavutil/x86/cpu.h"
  24. #include "libavcodec/dirac_dwt.h"
  25. #define COMPOSE_VERTICAL(ext, align) \
  26. void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
  27. void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
  28. void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
  29. void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
  30. void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \
  31. void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\
  32. void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\
  33. \
  34. static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
  35. { \
  36. int i, width_align = width&~(align-1); \
  37. int16_t *b0 = (int16_t *)_b0; \
  38. int16_t *b1 = (int16_t *)_b1; \
  39. int16_t *b2 = (int16_t *)_b2; \
  40. \
  41. for(i=width_align; i<width; i++) \
  42. b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
  43. \
  44. ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
  45. } \
  46. \
  47. static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
  48. { \
  49. int i, width_align = width&~(align-1); \
  50. int16_t *b0 = (int16_t *)_b0; \
  51. int16_t *b1 = (int16_t *)_b1; \
  52. int16_t *b2 = (int16_t *)_b2; \
  53. \
  54. for(i=width_align; i<width; i++) \
  55. b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
  56. \
  57. ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
  58. } \
  59. \
  60. static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
  61. uint8_t *_b3, uint8_t *_b4, int width) \
  62. { \
  63. int i, width_align = width&~(align-1); \
  64. int16_t *b0 = (int16_t *)_b0; \
  65. int16_t *b1 = (int16_t *)_b1; \
  66. int16_t *b2 = (int16_t *)_b2; \
  67. int16_t *b3 = (int16_t *)_b3; \
  68. int16_t *b4 = (int16_t *)_b4; \
  69. \
  70. for(i=width_align; i<width; i++) \
  71. b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
  72. \
  73. ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
  74. } \
  75. \
  76. static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
  77. uint8_t *_b3, uint8_t *_b4, int width) \
  78. { \
  79. int i, width_align = width&~(align-1); \
  80. int16_t *b0 = (int16_t *)_b0; \
  81. int16_t *b1 = (int16_t *)_b1; \
  82. int16_t *b2 = (int16_t *)_b2; \
  83. int16_t *b3 = (int16_t *)_b3; \
  84. int16_t *b4 = (int16_t *)_b4; \
  85. \
  86. for(i=width_align; i<width; i++) \
  87. b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
  88. \
  89. ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
  90. } \
  91. static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \
  92. { \
  93. int i, width_align = width&~(align-1); \
  94. int16_t *b0 = (int16_t *)_b0; \
  95. int16_t *b1 = (int16_t *)_b1; \
  96. \
  97. for(i=width_align; i<width; i++) { \
  98. b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
  99. b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
  100. } \
  101. \
  102. ff_vertical_compose_haar##ext(b0, b1, width_align); \
  103. } \
  104. static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
  105. {\
  106. int w2= w>>1;\
  107. int x= w2 - (w2&(align-1));\
  108. int16_t *b = (int16_t *)_b; \
  109. int16_t *tmp = (int16_t *)_tmp; \
  110. \
  111. ff_horizontal_compose_haar0i##ext(b, tmp, w);\
  112. \
  113. for (; x < w2; x++) {\
  114. b[2*x ] = tmp[x];\
  115. b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
  116. }\
  117. }\
  118. static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
  119. {\
  120. int w2= w>>1;\
  121. int x= w2 - (w2&(align-1));\
  122. int16_t *b = (int16_t *)_b; \
  123. int16_t *tmp = (int16_t *)_tmp; \
  124. \
  125. ff_horizontal_compose_haar1i##ext(b, tmp, w);\
  126. \
  127. for (; x < w2; x++) {\
  128. b[2*x ] = (tmp[x] + 1)>>1;\
  129. b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
  130. }\
  131. }\
  132. \
  133. #if HAVE_X86ASM
  134. #if !ARCH_X86_64
  135. COMPOSE_VERTICAL(_mmx, 4)
  136. #endif
  137. COMPOSE_VERTICAL(_sse2, 8)
  138. void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w);
  139. static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
  140. {
  141. int w2= w>>1;
  142. int x= w2 - (w2&7);
  143. int16_t *b = (int16_t *)_b;
  144. int16_t *tmp = (int16_t *)_tmp;
  145. ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
  146. for (; x < w2; x++) {
  147. b[2*x ] = (tmp[x] + 1)>>1;
  148. b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
  149. }
  150. }
  151. #endif
  152. void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type)
  153. {
  154. #if HAVE_X86ASM
  155. int mm_flags = av_get_cpu_flags();
  156. #if !ARCH_X86_64
  157. if (!(mm_flags & AV_CPU_FLAG_MMX))
  158. return;
  159. switch (type) {
  160. case DWT_DIRAC_DD9_7:
  161. d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
  162. d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
  163. break;
  164. case DWT_DIRAC_LEGALL5_3:
  165. d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
  166. d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx;
  167. break;
  168. case DWT_DIRAC_DD13_7:
  169. d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx;
  170. d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
  171. break;
  172. case DWT_DIRAC_HAAR0:
  173. d->vertical_compose = (void*)vertical_compose_haar_mmx;
  174. d->horizontal_compose = horizontal_compose_haar0i_mmx;
  175. break;
  176. case DWT_DIRAC_HAAR1:
  177. d->vertical_compose = (void*)vertical_compose_haar_mmx;
  178. d->horizontal_compose = horizontal_compose_haar1i_mmx;
  179. break;
  180. }
  181. #endif
  182. if (!(mm_flags & AV_CPU_FLAG_SSE2))
  183. return;
  184. switch (type) {
  185. case DWT_DIRAC_DD9_7:
  186. d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
  187. d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
  188. break;
  189. case DWT_DIRAC_LEGALL5_3:
  190. d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
  191. d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2;
  192. break;
  193. case DWT_DIRAC_DD13_7:
  194. d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2;
  195. d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
  196. break;
  197. case DWT_DIRAC_HAAR0:
  198. d->vertical_compose = (void*)vertical_compose_haar_sse2;
  199. d->horizontal_compose = horizontal_compose_haar0i_sse2;
  200. break;
  201. case DWT_DIRAC_HAAR1:
  202. d->vertical_compose = (void*)vertical_compose_haar_sse2;
  203. d->horizontal_compose = horizontal_compose_haar1i_sse2;
  204. break;
  205. }
  206. if (!(mm_flags & AV_CPU_FLAG_SSSE3))
  207. return;
  208. switch (type) {
  209. case DWT_DIRAC_DD9_7:
  210. d->horizontal_compose = horizontal_compose_dd97i_ssse3;
  211. break;
  212. }
  213. #endif // HAVE_X86ASM
  214. }