2
0

scale_any.cc 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. /*
  2. * Copyright 2015 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include "libyuv/scale_row.h"
  12. #include "libyuv/basic_types.h"
  13. #ifdef __cplusplus
  14. namespace libyuv {
  15. extern "C" {
  16. #endif
  17. // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
  18. #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
  19. void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
  20. int dst_width, int x, int dx) { \
  21. int n = dst_width & ~MASK; \
  22. if (n > 0) { \
  23. TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
  24. } \
  25. TERP_C(dst_ptr + n * BPP, src_ptr, \
  26. dst_width & MASK, x + n * dx, dx); \
  27. }
  28. #ifdef HAS_SCALEFILTERCOLS_NEON
  29. CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
  30. #endif
  31. #ifdef HAS_SCALEARGBCOLS_NEON
  32. CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
  33. #endif
  34. #ifdef HAS_SCALEARGBFILTERCOLS_NEON
  35. CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
  36. ScaleARGBFilterCols_C, 4, 3)
  37. #endif
  38. #undef CANY
  39. // Fixed scale down.
  40. #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  41. void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
  42. uint8* dst_ptr, int dst_width) { \
  43. int r = (int)((unsigned int)dst_width % (MASK + 1)); \
  44. int n = dst_width - r; \
  45. if (n > 0) { \
  46. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  47. } \
  48. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  49. dst_ptr + n * BPP, r); \
  50. }
  51. // Fixed scale down for odd source width. Used by I420Blend subsampling.
  52. // Since dst_width is (width + 1) / 2, this function scales one less pixel
  53. // and copies the last pixel.
  54. #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  55. void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
  56. uint8* dst_ptr, int dst_width) { \
  57. int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \
  58. int n = dst_width - r; \
  59. if (n > 0) { \
  60. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  61. } \
  62. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  63. dst_ptr + n * BPP, r); \
  64. }
  65. #ifdef HAS_SCALEROWDOWN2_SSSE3
  66. SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
  67. SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
  68. ScaleRowDown2Linear_C, 2, 1, 15)
  69. SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
  70. 2, 1, 15)
  71. SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3,
  72. ScaleRowDown2Box_Odd_C, 2, 1, 15)
  73. #endif
  74. #ifdef HAS_SCALEROWDOWN2_AVX2
  75. SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
  76. SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
  77. ScaleRowDown2Linear_C, 2, 1, 31)
  78. SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
  79. 2, 1, 31)
  80. SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C,
  81. 2, 1, 31)
  82. #endif
  83. #ifdef HAS_SCALEROWDOWN2_NEON
  84. SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
  85. SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
  86. ScaleRowDown2Linear_C, 2, 1, 15)
  87. SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
  88. ScaleRowDown2Box_C, 2, 1, 15)
  89. SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON,
  90. ScaleRowDown2Box_Odd_C, 2, 1, 15)
  91. #endif
  92. #ifdef HAS_SCALEROWDOWN4_SSSE3
  93. SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
  94. SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C,
  95. 4, 1, 7)
  96. #endif
  97. #ifdef HAS_SCALEROWDOWN4_AVX2
  98. SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
  99. SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C,
  100. 4, 1, 15)
  101. #endif
  102. #ifdef HAS_SCALEROWDOWN4_NEON
  103. SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
  104. SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
  105. 4, 1, 7)
  106. #endif
  107. #ifdef HAS_SCALEROWDOWN34_SSSE3
  108. SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3,
  109. ScaleRowDown34_C, 4 / 3, 1, 23)
  110. SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3,
  111. ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
  112. SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3,
  113. ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
  114. #endif
  115. #ifdef HAS_SCALEROWDOWN34_NEON
  116. SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON,
  117. ScaleRowDown34_C, 4 / 3, 1, 23)
  118. SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON,
  119. ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
  120. SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON,
  121. ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
  122. #endif
  123. #ifdef HAS_SCALEROWDOWN38_SSSE3
  124. SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3,
  125. ScaleRowDown38_C, 8 / 3, 1, 11)
  126. SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3,
  127. ScaleRowDown38_3_Box_C, 8 / 3, 1, 5)
  128. SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3,
  129. ScaleRowDown38_2_Box_C, 8 / 3, 1, 5)
  130. #endif
  131. #ifdef HAS_SCALEROWDOWN38_NEON
  132. SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON,
  133. ScaleRowDown38_C, 8 / 3, 1, 11)
  134. SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON,
  135. ScaleRowDown38_3_Box_C, 8 / 3, 1, 11)
  136. SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON,
  137. ScaleRowDown38_2_Box_C, 8 / 3, 1, 11)
  138. #endif
  139. #ifdef HAS_SCALEARGBROWDOWN2_SSE2
  140. SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2,
  141. ScaleARGBRowDown2_C, 2, 4, 3)
  142. SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2,
  143. ScaleARGBRowDown2Linear_C, 2, 4, 3)
  144. SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2,
  145. ScaleARGBRowDown2Box_C, 2, 4, 3)
  146. #endif
  147. #ifdef HAS_SCALEARGBROWDOWN2_NEON
  148. SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON,
  149. ScaleARGBRowDown2_C, 2, 4, 7)
  150. SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON,
  151. ScaleARGBRowDown2Linear_C, 2, 4, 7)
  152. SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON,
  153. ScaleARGBRowDown2Box_C, 2, 4, 7)
  154. #endif
  155. #undef SDANY
  156. // Scale down by even scale factor.
  157. #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
  158. void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
  159. uint8* dst_ptr, int dst_width) { \
  160. int r = (int)((unsigned int)dst_width % (MASK + 1)); \
  161. int n = dst_width - r; \
  162. if (n > 0) { \
  163. SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
  164. } \
  165. SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \
  166. src_stepx, dst_ptr + n * BPP, r); \
  167. }
  168. #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
  169. SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2,
  170. ScaleARGBRowDownEven_C, 4, 3)
  171. SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2,
  172. ScaleARGBRowDownEvenBox_C, 4, 3)
  173. #endif
  174. #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
  175. SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON,
  176. ScaleARGBRowDownEven_C, 4, 3)
  177. SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
  178. ScaleARGBRowDownEvenBox_C, 4, 3)
  179. #endif
  180. // Add rows box filter scale down.
  181. #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
  182. void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
  183. int n = src_width & ~MASK; \
  184. if (n > 0) { \
  185. SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
  186. } \
  187. SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
  188. }
  189. #ifdef HAS_SCALEADDROW_SSE2
  190. SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
  191. #endif
  192. #ifdef HAS_SCALEADDROW_AVX2
  193. SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
  194. #endif
  195. #ifdef HAS_SCALEADDROW_NEON
  196. SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
  197. #endif
  198. #undef SAANY
  199. #ifdef __cplusplus
  200. } // extern "C"
  201. } // namespace libyuv
  202. #endif