scale_any.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /*
  2. * Copyright 2015 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include "libyuv/scale_row.h"
  12. #include "libyuv/basic_types.h"
  13. #ifdef __cplusplus
  14. namespace libyuv {
  15. extern "C" {
  16. #endif
  17. // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
  18. #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
  19. void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \
  20. int dx) { \
  21. int r = dst_width & MASK; \
  22. int n = dst_width & ~MASK; \
  23. if (n > 0) { \
  24. TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
  25. } \
  26. TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
  27. }
  28. #ifdef HAS_SCALEFILTERCOLS_NEON
  29. CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
  30. #endif
  31. #ifdef HAS_SCALEFILTERCOLS_MSA
  32. CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15)
  33. #endif
  34. #ifdef HAS_SCALEARGBCOLS_NEON
  35. CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
  36. #endif
  37. #ifdef HAS_SCALEARGBCOLS_MSA
  38. CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
  39. #endif
  40. #ifdef HAS_SCALEARGBFILTERCOLS_NEON
  41. CANY(ScaleARGBFilterCols_Any_NEON,
  42. ScaleARGBFilterCols_NEON,
  43. ScaleARGBFilterCols_C,
  44. 4,
  45. 3)
  46. #endif
  47. #ifdef HAS_SCALEARGBFILTERCOLS_MSA
  48. CANY(ScaleARGBFilterCols_Any_MSA,
  49. ScaleARGBFilterCols_MSA,
  50. ScaleARGBFilterCols_C,
  51. 4,
  52. 7)
  53. #endif
  54. #undef CANY
  55. // Fixed scale down.
  56. // Mask may be non-power of 2, so use MOD
  57. #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  58. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
  59. int dst_width) { \
  60. int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
  61. int n = dst_width - r; \
  62. if (n > 0) { \
  63. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  64. } \
  65. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  66. dst_ptr + n * BPP, r); \
  67. }
  68. // Fixed scale down for odd source width. Used by I420Blend subsampling.
  69. // Since dst_width is (width + 1) / 2, this function scales one less pixel
  70. // and copies the last pixel.
  71. #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  72. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
  73. int dst_width) { \
  74. int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
  75. int n = (dst_width - 1) - r; \
  76. if (n > 0) { \
  77. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  78. } \
  79. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  80. dst_ptr + n * BPP, r + 1); \
  81. }
  82. #ifdef HAS_SCALEROWDOWN2_SSSE3
  83. SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
  84. SDANY(ScaleRowDown2Linear_Any_SSSE3,
  85. ScaleRowDown2Linear_SSSE3,
  86. ScaleRowDown2Linear_C,
  87. 2,
  88. 1,
  89. 15)
  90. SDANY(ScaleRowDown2Box_Any_SSSE3,
  91. ScaleRowDown2Box_SSSE3,
  92. ScaleRowDown2Box_C,
  93. 2,
  94. 1,
  95. 15)
  96. SDODD(ScaleRowDown2Box_Odd_SSSE3,
  97. ScaleRowDown2Box_SSSE3,
  98. ScaleRowDown2Box_Odd_C,
  99. 2,
  100. 1,
  101. 15)
  102. #endif
  103. #ifdef HAS_SCALEROWDOWN2_AVX2
  104. SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
  105. SDANY(ScaleRowDown2Linear_Any_AVX2,
  106. ScaleRowDown2Linear_AVX2,
  107. ScaleRowDown2Linear_C,
  108. 2,
  109. 1,
  110. 31)
  111. SDANY(ScaleRowDown2Box_Any_AVX2,
  112. ScaleRowDown2Box_AVX2,
  113. ScaleRowDown2Box_C,
  114. 2,
  115. 1,
  116. 31)
  117. SDODD(ScaleRowDown2Box_Odd_AVX2,
  118. ScaleRowDown2Box_AVX2,
  119. ScaleRowDown2Box_Odd_C,
  120. 2,
  121. 1,
  122. 31)
  123. #endif
  124. #ifdef HAS_SCALEROWDOWN2_NEON
  125. SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
  126. SDANY(ScaleRowDown2Linear_Any_NEON,
  127. ScaleRowDown2Linear_NEON,
  128. ScaleRowDown2Linear_C,
  129. 2,
  130. 1,
  131. 15)
  132. SDANY(ScaleRowDown2Box_Any_NEON,
  133. ScaleRowDown2Box_NEON,
  134. ScaleRowDown2Box_C,
  135. 2,
  136. 1,
  137. 15)
  138. SDODD(ScaleRowDown2Box_Odd_NEON,
  139. ScaleRowDown2Box_NEON,
  140. ScaleRowDown2Box_Odd_C,
  141. 2,
  142. 1,
  143. 15)
  144. #endif
  145. #ifdef HAS_SCALEROWDOWN2_MSA
  146. SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31)
  147. SDANY(ScaleRowDown2Linear_Any_MSA,
  148. ScaleRowDown2Linear_MSA,
  149. ScaleRowDown2Linear_C,
  150. 2,
  151. 1,
  152. 31)
  153. SDANY(ScaleRowDown2Box_Any_MSA,
  154. ScaleRowDown2Box_MSA,
  155. ScaleRowDown2Box_C,
  156. 2,
  157. 1,
  158. 31)
  159. #endif
  160. #ifdef HAS_SCALEROWDOWN4_SSSE3
  161. SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
  162. SDANY(ScaleRowDown4Box_Any_SSSE3,
  163. ScaleRowDown4Box_SSSE3,
  164. ScaleRowDown4Box_C,
  165. 4,
  166. 1,
  167. 7)
  168. #endif
  169. #ifdef HAS_SCALEROWDOWN4_AVX2
  170. SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
  171. SDANY(ScaleRowDown4Box_Any_AVX2,
  172. ScaleRowDown4Box_AVX2,
  173. ScaleRowDown4Box_C,
  174. 4,
  175. 1,
  176. 15)
  177. #endif
  178. #ifdef HAS_SCALEROWDOWN4_NEON
  179. SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
  180. SDANY(ScaleRowDown4Box_Any_NEON,
  181. ScaleRowDown4Box_NEON,
  182. ScaleRowDown4Box_C,
  183. 4,
  184. 1,
  185. 7)
  186. #endif
  187. #ifdef HAS_SCALEROWDOWN4_MSA
  188. SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15)
  189. SDANY(ScaleRowDown4Box_Any_MSA,
  190. ScaleRowDown4Box_MSA,
  191. ScaleRowDown4Box_C,
  192. 4,
  193. 1,
  194. 15)
  195. #endif
  196. #ifdef HAS_SCALEROWDOWN34_SSSE3
  197. SDANY(ScaleRowDown34_Any_SSSE3,
  198. ScaleRowDown34_SSSE3,
  199. ScaleRowDown34_C,
  200. 4 / 3,
  201. 1,
  202. 23)
  203. SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
  204. ScaleRowDown34_0_Box_SSSE3,
  205. ScaleRowDown34_0_Box_C,
  206. 4 / 3,
  207. 1,
  208. 23)
  209. SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
  210. ScaleRowDown34_1_Box_SSSE3,
  211. ScaleRowDown34_1_Box_C,
  212. 4 / 3,
  213. 1,
  214. 23)
  215. #endif
  216. #ifdef HAS_SCALEROWDOWN34_NEON
  217. SDANY(ScaleRowDown34_Any_NEON,
  218. ScaleRowDown34_NEON,
  219. ScaleRowDown34_C,
  220. 4 / 3,
  221. 1,
  222. 23)
  223. SDANY(ScaleRowDown34_0_Box_Any_NEON,
  224. ScaleRowDown34_0_Box_NEON,
  225. ScaleRowDown34_0_Box_C,
  226. 4 / 3,
  227. 1,
  228. 23)
  229. SDANY(ScaleRowDown34_1_Box_Any_NEON,
  230. ScaleRowDown34_1_Box_NEON,
  231. ScaleRowDown34_1_Box_C,
  232. 4 / 3,
  233. 1,
  234. 23)
  235. #endif
  236. #ifdef HAS_SCALEROWDOWN34_MSA
  237. SDANY(ScaleRowDown34_Any_MSA,
  238. ScaleRowDown34_MSA,
  239. ScaleRowDown34_C,
  240. 4 / 3,
  241. 1,
  242. 47)
  243. SDANY(ScaleRowDown34_0_Box_Any_MSA,
  244. ScaleRowDown34_0_Box_MSA,
  245. ScaleRowDown34_0_Box_C,
  246. 4 / 3,
  247. 1,
  248. 47)
  249. SDANY(ScaleRowDown34_1_Box_Any_MSA,
  250. ScaleRowDown34_1_Box_MSA,
  251. ScaleRowDown34_1_Box_C,
  252. 4 / 3,
  253. 1,
  254. 47)
  255. #endif
  256. #ifdef HAS_SCALEROWDOWN38_SSSE3
  257. SDANY(ScaleRowDown38_Any_SSSE3,
  258. ScaleRowDown38_SSSE3,
  259. ScaleRowDown38_C,
  260. 8 / 3,
  261. 1,
  262. 11)
  263. SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
  264. ScaleRowDown38_3_Box_SSSE3,
  265. ScaleRowDown38_3_Box_C,
  266. 8 / 3,
  267. 1,
  268. 5)
  269. SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
  270. ScaleRowDown38_2_Box_SSSE3,
  271. ScaleRowDown38_2_Box_C,
  272. 8 / 3,
  273. 1,
  274. 5)
  275. #endif
  276. #ifdef HAS_SCALEROWDOWN38_NEON
  277. SDANY(ScaleRowDown38_Any_NEON,
  278. ScaleRowDown38_NEON,
  279. ScaleRowDown38_C,
  280. 8 / 3,
  281. 1,
  282. 11)
  283. SDANY(ScaleRowDown38_3_Box_Any_NEON,
  284. ScaleRowDown38_3_Box_NEON,
  285. ScaleRowDown38_3_Box_C,
  286. 8 / 3,
  287. 1,
  288. 11)
  289. SDANY(ScaleRowDown38_2_Box_Any_NEON,
  290. ScaleRowDown38_2_Box_NEON,
  291. ScaleRowDown38_2_Box_C,
  292. 8 / 3,
  293. 1,
  294. 11)
  295. #endif
  296. #ifdef HAS_SCALEROWDOWN38_MSA
  297. SDANY(ScaleRowDown38_Any_MSA,
  298. ScaleRowDown38_MSA,
  299. ScaleRowDown38_C,
  300. 8 / 3,
  301. 1,
  302. 11)
  303. SDANY(ScaleRowDown38_3_Box_Any_MSA,
  304. ScaleRowDown38_3_Box_MSA,
  305. ScaleRowDown38_3_Box_C,
  306. 8 / 3,
  307. 1,
  308. 11)
  309. SDANY(ScaleRowDown38_2_Box_Any_MSA,
  310. ScaleRowDown38_2_Box_MSA,
  311. ScaleRowDown38_2_Box_C,
  312. 8 / 3,
  313. 1,
  314. 11)
  315. #endif
  316. #ifdef HAS_SCALEARGBROWDOWN2_SSE2
  317. SDANY(ScaleARGBRowDown2_Any_SSE2,
  318. ScaleARGBRowDown2_SSE2,
  319. ScaleARGBRowDown2_C,
  320. 2,
  321. 4,
  322. 3)
  323. SDANY(ScaleARGBRowDown2Linear_Any_SSE2,
  324. ScaleARGBRowDown2Linear_SSE2,
  325. ScaleARGBRowDown2Linear_C,
  326. 2,
  327. 4,
  328. 3)
  329. SDANY(ScaleARGBRowDown2Box_Any_SSE2,
  330. ScaleARGBRowDown2Box_SSE2,
  331. ScaleARGBRowDown2Box_C,
  332. 2,
  333. 4,
  334. 3)
  335. #endif
  336. #ifdef HAS_SCALEARGBROWDOWN2_NEON
  337. SDANY(ScaleARGBRowDown2_Any_NEON,
  338. ScaleARGBRowDown2_NEON,
  339. ScaleARGBRowDown2_C,
  340. 2,
  341. 4,
  342. 7)
  343. SDANY(ScaleARGBRowDown2Linear_Any_NEON,
  344. ScaleARGBRowDown2Linear_NEON,
  345. ScaleARGBRowDown2Linear_C,
  346. 2,
  347. 4,
  348. 7)
  349. SDANY(ScaleARGBRowDown2Box_Any_NEON,
  350. ScaleARGBRowDown2Box_NEON,
  351. ScaleARGBRowDown2Box_C,
  352. 2,
  353. 4,
  354. 7)
  355. #endif
  356. #ifdef HAS_SCALEARGBROWDOWN2_MSA
  357. SDANY(ScaleARGBRowDown2_Any_MSA,
  358. ScaleARGBRowDown2_MSA,
  359. ScaleARGBRowDown2_C,
  360. 2,
  361. 4,
  362. 3)
  363. SDANY(ScaleARGBRowDown2Linear_Any_MSA,
  364. ScaleARGBRowDown2Linear_MSA,
  365. ScaleARGBRowDown2Linear_C,
  366. 2,
  367. 4,
  368. 3)
  369. SDANY(ScaleARGBRowDown2Box_Any_MSA,
  370. ScaleARGBRowDown2Box_MSA,
  371. ScaleARGBRowDown2Box_C,
  372. 2,
  373. 4,
  374. 3)
  375. #endif
  376. #undef SDANY
  377. // Scale down by even scale factor.
  378. #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
  379. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
  380. uint8_t* dst_ptr, int dst_width) { \
  381. int r = dst_width & MASK; \
  382. int n = dst_width & ~MASK; \
  383. if (n > 0) { \
  384. SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
  385. } \
  386. SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
  387. dst_ptr + n * BPP, r); \
  388. }
  389. #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
  390. SDAANY(ScaleARGBRowDownEven_Any_SSE2,
  391. ScaleARGBRowDownEven_SSE2,
  392. ScaleARGBRowDownEven_C,
  393. 4,
  394. 3)
  395. SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2,
  396. ScaleARGBRowDownEvenBox_SSE2,
  397. ScaleARGBRowDownEvenBox_C,
  398. 4,
  399. 3)
  400. #endif
  401. #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
  402. SDAANY(ScaleARGBRowDownEven_Any_NEON,
  403. ScaleARGBRowDownEven_NEON,
  404. ScaleARGBRowDownEven_C,
  405. 4,
  406. 3)
  407. SDAANY(ScaleARGBRowDownEvenBox_Any_NEON,
  408. ScaleARGBRowDownEvenBox_NEON,
  409. ScaleARGBRowDownEvenBox_C,
  410. 4,
  411. 3)
  412. #endif
  413. #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA
  414. SDAANY(ScaleARGBRowDownEven_Any_MSA,
  415. ScaleARGBRowDownEven_MSA,
  416. ScaleARGBRowDownEven_C,
  417. 4,
  418. 3)
  419. SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
  420. ScaleARGBRowDownEvenBox_MSA,
  421. ScaleARGBRowDownEvenBox_C,
  422. 4,
  423. 3)
  424. #endif
  425. // Add rows box filter scale down.
  426. #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
  427. void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
  428. int n = src_width & ~MASK; \
  429. if (n > 0) { \
  430. SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
  431. } \
  432. SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
  433. }
  434. #ifdef HAS_SCALEADDROW_SSE2
  435. SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
  436. #endif
  437. #ifdef HAS_SCALEADDROW_AVX2
  438. SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
  439. #endif
  440. #ifdef HAS_SCALEADDROW_NEON
  441. SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
  442. #endif
  443. #ifdef HAS_SCALEADDROW_MSA
  444. SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
  445. #endif
  446. #undef SAANY
  447. #ifdef __cplusplus
  448. } // extern "C"
  449. } // namespace libyuv
  450. #endif