scale_any.cc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. /*
  2. * Copyright 2015 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <string.h> // For memset/memcpy
  11. #include "libyuv/scale.h"
  12. #include "libyuv/scale_row.h"
  13. #include "libyuv/basic_types.h"
  14. #ifdef __cplusplus
  15. namespace libyuv {
  16. extern "C" {
  17. #endif
  18. // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
  19. #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
  20. void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \
  21. int dx) { \
  22. int r = dst_width & MASK; \
  23. int n = dst_width & ~MASK; \
  24. if (n > 0) { \
  25. TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
  26. } \
  27. TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
  28. }
  29. #ifdef HAS_SCALEFILTERCOLS_NEON
  30. CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
  31. #endif
  32. #ifdef HAS_SCALEFILTERCOLS_MSA
  33. CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15)
  34. #endif
  35. #ifdef HAS_SCALEARGBCOLS_NEON
  36. CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
  37. #endif
  38. #ifdef HAS_SCALEARGBCOLS_MSA
  39. CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
  40. #endif
  41. #ifdef HAS_SCALEARGBCOLS_MMI
  42. CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0)
  43. #endif
  44. #ifdef HAS_SCALEARGBFILTERCOLS_NEON
  45. CANY(ScaleARGBFilterCols_Any_NEON,
  46. ScaleARGBFilterCols_NEON,
  47. ScaleARGBFilterCols_C,
  48. 4,
  49. 3)
  50. #endif
  51. #ifdef HAS_SCALEARGBFILTERCOLS_MSA
  52. CANY(ScaleARGBFilterCols_Any_MSA,
  53. ScaleARGBFilterCols_MSA,
  54. ScaleARGBFilterCols_C,
  55. 4,
  56. 7)
  57. #endif
  58. #undef CANY
  59. // Fixed scale down.
  60. // Mask may be non-power of 2, so use MOD
  61. #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  62. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
  63. int dst_width) { \
  64. int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
  65. int n = dst_width - r; \
  66. if (n > 0) { \
  67. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  68. } \
  69. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  70. dst_ptr + n * BPP, r); \
  71. }
  72. // Fixed scale down for odd source width. Used by I420Blend subsampling.
  73. // Since dst_width is (width + 1) / 2, this function scales one less pixel
  74. // and copies the last pixel.
  75. #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
  76. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
  77. int dst_width) { \
  78. int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
  79. int n = (dst_width - 1) - r; \
  80. if (n > 0) { \
  81. SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
  82. } \
  83. SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
  84. dst_ptr + n * BPP, r + 1); \
  85. }
  86. #ifdef HAS_SCALEROWDOWN2_SSSE3
  87. SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
  88. SDANY(ScaleRowDown2Linear_Any_SSSE3,
  89. ScaleRowDown2Linear_SSSE3,
  90. ScaleRowDown2Linear_C,
  91. 2,
  92. 1,
  93. 15)
  94. SDANY(ScaleRowDown2Box_Any_SSSE3,
  95. ScaleRowDown2Box_SSSE3,
  96. ScaleRowDown2Box_C,
  97. 2,
  98. 1,
  99. 15)
  100. SDODD(ScaleRowDown2Box_Odd_SSSE3,
  101. ScaleRowDown2Box_SSSE3,
  102. ScaleRowDown2Box_Odd_C,
  103. 2,
  104. 1,
  105. 15)
  106. #endif
  107. #ifdef HAS_SCALEROWDOWN2_AVX2
  108. SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
  109. SDANY(ScaleRowDown2Linear_Any_AVX2,
  110. ScaleRowDown2Linear_AVX2,
  111. ScaleRowDown2Linear_C,
  112. 2,
  113. 1,
  114. 31)
  115. SDANY(ScaleRowDown2Box_Any_AVX2,
  116. ScaleRowDown2Box_AVX2,
  117. ScaleRowDown2Box_C,
  118. 2,
  119. 1,
  120. 31)
  121. SDODD(ScaleRowDown2Box_Odd_AVX2,
  122. ScaleRowDown2Box_AVX2,
  123. ScaleRowDown2Box_Odd_C,
  124. 2,
  125. 1,
  126. 31)
  127. #endif
  128. #ifdef HAS_SCALEROWDOWN2_NEON
  129. SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
  130. SDANY(ScaleRowDown2Linear_Any_NEON,
  131. ScaleRowDown2Linear_NEON,
  132. ScaleRowDown2Linear_C,
  133. 2,
  134. 1,
  135. 15)
  136. SDANY(ScaleRowDown2Box_Any_NEON,
  137. ScaleRowDown2Box_NEON,
  138. ScaleRowDown2Box_C,
  139. 2,
  140. 1,
  141. 15)
  142. SDODD(ScaleRowDown2Box_Odd_NEON,
  143. ScaleRowDown2Box_NEON,
  144. ScaleRowDown2Box_Odd_C,
  145. 2,
  146. 1,
  147. 15)
  148. #endif
  149. #ifdef HAS_SCALEROWDOWN2_MSA
  150. SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31)
  151. SDANY(ScaleRowDown2Linear_Any_MSA,
  152. ScaleRowDown2Linear_MSA,
  153. ScaleRowDown2Linear_C,
  154. 2,
  155. 1,
  156. 31)
  157. SDANY(ScaleRowDown2Box_Any_MSA,
  158. ScaleRowDown2Box_MSA,
  159. ScaleRowDown2Box_C,
  160. 2,
  161. 1,
  162. 31)
  163. #endif
  164. #ifdef HAS_SCALEROWDOWN2_MMI
  165. SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7)
  166. SDANY(ScaleRowDown2Linear_Any_MMI,
  167. ScaleRowDown2Linear_MMI,
  168. ScaleRowDown2Linear_C,
  169. 2,
  170. 1,
  171. 7)
  172. SDANY(ScaleRowDown2Box_Any_MMI,
  173. ScaleRowDown2Box_MMI,
  174. ScaleRowDown2Box_C,
  175. 2,
  176. 1,
  177. 7)
  178. SDODD(ScaleRowDown2Box_Odd_MMI,
  179. ScaleRowDown2Box_MMI,
  180. ScaleRowDown2Box_Odd_C,
  181. 2,
  182. 1,
  183. 7)
  184. #endif
  185. #ifdef HAS_SCALEROWDOWN4_SSSE3
  186. SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
  187. SDANY(ScaleRowDown4Box_Any_SSSE3,
  188. ScaleRowDown4Box_SSSE3,
  189. ScaleRowDown4Box_C,
  190. 4,
  191. 1,
  192. 7)
  193. #endif
  194. #ifdef HAS_SCALEROWDOWN4_AVX2
  195. SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
  196. SDANY(ScaleRowDown4Box_Any_AVX2,
  197. ScaleRowDown4Box_AVX2,
  198. ScaleRowDown4Box_C,
  199. 4,
  200. 1,
  201. 15)
  202. #endif
  203. #ifdef HAS_SCALEROWDOWN4_NEON
  204. SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
  205. SDANY(ScaleRowDown4Box_Any_NEON,
  206. ScaleRowDown4Box_NEON,
  207. ScaleRowDown4Box_C,
  208. 4,
  209. 1,
  210. 7)
  211. #endif
  212. #ifdef HAS_SCALEROWDOWN4_MSA
  213. SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15)
  214. SDANY(ScaleRowDown4Box_Any_MSA,
  215. ScaleRowDown4Box_MSA,
  216. ScaleRowDown4Box_C,
  217. 4,
  218. 1,
  219. 15)
  220. #endif
  221. #ifdef HAS_SCALEROWDOWN4_MMI
  222. SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7)
  223. SDANY(ScaleRowDown4Box_Any_MMI,
  224. ScaleRowDown4Box_MMI,
  225. ScaleRowDown4Box_C,
  226. 4,
  227. 1,
  228. 7)
  229. #endif
  230. #ifdef HAS_SCALEROWDOWN34_SSSE3
  231. SDANY(ScaleRowDown34_Any_SSSE3,
  232. ScaleRowDown34_SSSE3,
  233. ScaleRowDown34_C,
  234. 4 / 3,
  235. 1,
  236. 23)
  237. SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
  238. ScaleRowDown34_0_Box_SSSE3,
  239. ScaleRowDown34_0_Box_C,
  240. 4 / 3,
  241. 1,
  242. 23)
  243. SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
  244. ScaleRowDown34_1_Box_SSSE3,
  245. ScaleRowDown34_1_Box_C,
  246. 4 / 3,
  247. 1,
  248. 23)
  249. #endif
  250. #ifdef HAS_SCALEROWDOWN34_NEON
  251. SDANY(ScaleRowDown34_Any_NEON,
  252. ScaleRowDown34_NEON,
  253. ScaleRowDown34_C,
  254. 4 / 3,
  255. 1,
  256. 23)
  257. SDANY(ScaleRowDown34_0_Box_Any_NEON,
  258. ScaleRowDown34_0_Box_NEON,
  259. ScaleRowDown34_0_Box_C,
  260. 4 / 3,
  261. 1,
  262. 23)
  263. SDANY(ScaleRowDown34_1_Box_Any_NEON,
  264. ScaleRowDown34_1_Box_NEON,
  265. ScaleRowDown34_1_Box_C,
  266. 4 / 3,
  267. 1,
  268. 23)
  269. #endif
  270. #ifdef HAS_SCALEROWDOWN34_MSA
  271. SDANY(ScaleRowDown34_Any_MSA,
  272. ScaleRowDown34_MSA,
  273. ScaleRowDown34_C,
  274. 4 / 3,
  275. 1,
  276. 47)
  277. SDANY(ScaleRowDown34_0_Box_Any_MSA,
  278. ScaleRowDown34_0_Box_MSA,
  279. ScaleRowDown34_0_Box_C,
  280. 4 / 3,
  281. 1,
  282. 47)
  283. SDANY(ScaleRowDown34_1_Box_Any_MSA,
  284. ScaleRowDown34_1_Box_MSA,
  285. ScaleRowDown34_1_Box_C,
  286. 4 / 3,
  287. 1,
  288. 47)
  289. #endif
  290. #ifdef HAS_SCALEROWDOWN38_SSSE3
  291. SDANY(ScaleRowDown38_Any_SSSE3,
  292. ScaleRowDown38_SSSE3,
  293. ScaleRowDown38_C,
  294. 8 / 3,
  295. 1,
  296. 11)
  297. SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
  298. ScaleRowDown38_3_Box_SSSE3,
  299. ScaleRowDown38_3_Box_C,
  300. 8 / 3,
  301. 1,
  302. 5)
  303. SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
  304. ScaleRowDown38_2_Box_SSSE3,
  305. ScaleRowDown38_2_Box_C,
  306. 8 / 3,
  307. 1,
  308. 5)
  309. #endif
  310. #ifdef HAS_SCALEROWDOWN38_NEON
  311. SDANY(ScaleRowDown38_Any_NEON,
  312. ScaleRowDown38_NEON,
  313. ScaleRowDown38_C,
  314. 8 / 3,
  315. 1,
  316. 11)
  317. SDANY(ScaleRowDown38_3_Box_Any_NEON,
  318. ScaleRowDown38_3_Box_NEON,
  319. ScaleRowDown38_3_Box_C,
  320. 8 / 3,
  321. 1,
  322. 11)
  323. SDANY(ScaleRowDown38_2_Box_Any_NEON,
  324. ScaleRowDown38_2_Box_NEON,
  325. ScaleRowDown38_2_Box_C,
  326. 8 / 3,
  327. 1,
  328. 11)
  329. #endif
  330. #ifdef HAS_SCALEROWDOWN38_MSA
  331. SDANY(ScaleRowDown38_Any_MSA,
  332. ScaleRowDown38_MSA,
  333. ScaleRowDown38_C,
  334. 8 / 3,
  335. 1,
  336. 11)
  337. SDANY(ScaleRowDown38_3_Box_Any_MSA,
  338. ScaleRowDown38_3_Box_MSA,
  339. ScaleRowDown38_3_Box_C,
  340. 8 / 3,
  341. 1,
  342. 11)
  343. SDANY(ScaleRowDown38_2_Box_Any_MSA,
  344. ScaleRowDown38_2_Box_MSA,
  345. ScaleRowDown38_2_Box_C,
  346. 8 / 3,
  347. 1,
  348. 11)
  349. #endif
  350. #ifdef HAS_SCALEARGBROWDOWN2_SSE2
  351. SDANY(ScaleARGBRowDown2_Any_SSE2,
  352. ScaleARGBRowDown2_SSE2,
  353. ScaleARGBRowDown2_C,
  354. 2,
  355. 4,
  356. 3)
  357. SDANY(ScaleARGBRowDown2Linear_Any_SSE2,
  358. ScaleARGBRowDown2Linear_SSE2,
  359. ScaleARGBRowDown2Linear_C,
  360. 2,
  361. 4,
  362. 3)
  363. SDANY(ScaleARGBRowDown2Box_Any_SSE2,
  364. ScaleARGBRowDown2Box_SSE2,
  365. ScaleARGBRowDown2Box_C,
  366. 2,
  367. 4,
  368. 3)
  369. #endif
  370. #ifdef HAS_SCALEARGBROWDOWN2_NEON
  371. SDANY(ScaleARGBRowDown2_Any_NEON,
  372. ScaleARGBRowDown2_NEON,
  373. ScaleARGBRowDown2_C,
  374. 2,
  375. 4,
  376. 7)
  377. SDANY(ScaleARGBRowDown2Linear_Any_NEON,
  378. ScaleARGBRowDown2Linear_NEON,
  379. ScaleARGBRowDown2Linear_C,
  380. 2,
  381. 4,
  382. 7)
  383. SDANY(ScaleARGBRowDown2Box_Any_NEON,
  384. ScaleARGBRowDown2Box_NEON,
  385. ScaleARGBRowDown2Box_C,
  386. 2,
  387. 4,
  388. 7)
  389. #endif
  390. #ifdef HAS_SCALEARGBROWDOWN2_MSA
  391. SDANY(ScaleARGBRowDown2_Any_MSA,
  392. ScaleARGBRowDown2_MSA,
  393. ScaleARGBRowDown2_C,
  394. 2,
  395. 4,
  396. 3)
  397. SDANY(ScaleARGBRowDown2Linear_Any_MSA,
  398. ScaleARGBRowDown2Linear_MSA,
  399. ScaleARGBRowDown2Linear_C,
  400. 2,
  401. 4,
  402. 3)
  403. SDANY(ScaleARGBRowDown2Box_Any_MSA,
  404. ScaleARGBRowDown2Box_MSA,
  405. ScaleARGBRowDown2Box_C,
  406. 2,
  407. 4,
  408. 3)
  409. #endif
  410. #ifdef HAS_SCALEARGBROWDOWN2_MMI
  411. SDANY(ScaleARGBRowDown2_Any_MMI,
  412. ScaleARGBRowDown2_MMI,
  413. ScaleARGBRowDown2_C,
  414. 2,
  415. 4,
  416. 1)
  417. SDANY(ScaleARGBRowDown2Linear_Any_MMI,
  418. ScaleARGBRowDown2Linear_MMI,
  419. ScaleARGBRowDown2Linear_C,
  420. 2,
  421. 4,
  422. 1)
  423. SDANY(ScaleARGBRowDown2Box_Any_MMI,
  424. ScaleARGBRowDown2Box_MMI,
  425. ScaleARGBRowDown2Box_C,
  426. 2,
  427. 4,
  428. 1)
  429. #endif
  430. #undef SDANY
  431. // Scale down by even scale factor.
  432. #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
  433. void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
  434. uint8_t* dst_ptr, int dst_width) { \
  435. int r = dst_width & MASK; \
  436. int n = dst_width & ~MASK; \
  437. if (n > 0) { \
  438. SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
  439. } \
  440. SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
  441. dst_ptr + n * BPP, r); \
  442. }
  443. #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
  444. SDAANY(ScaleARGBRowDownEven_Any_SSE2,
  445. ScaleARGBRowDownEven_SSE2,
  446. ScaleARGBRowDownEven_C,
  447. 4,
  448. 3)
  449. SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2,
  450. ScaleARGBRowDownEvenBox_SSE2,
  451. ScaleARGBRowDownEvenBox_C,
  452. 4,
  453. 3)
  454. #endif
  455. #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
  456. SDAANY(ScaleARGBRowDownEven_Any_NEON,
  457. ScaleARGBRowDownEven_NEON,
  458. ScaleARGBRowDownEven_C,
  459. 4,
  460. 3)
  461. SDAANY(ScaleARGBRowDownEvenBox_Any_NEON,
  462. ScaleARGBRowDownEvenBox_NEON,
  463. ScaleARGBRowDownEvenBox_C,
  464. 4,
  465. 3)
  466. #endif
  467. #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA
  468. SDAANY(ScaleARGBRowDownEven_Any_MSA,
  469. ScaleARGBRowDownEven_MSA,
  470. ScaleARGBRowDownEven_C,
  471. 4,
  472. 3)
  473. SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
  474. ScaleARGBRowDownEvenBox_MSA,
  475. ScaleARGBRowDownEvenBox_C,
  476. 4,
  477. 3)
  478. #endif
  479. #ifdef HAS_SCALEARGBROWDOWNEVEN_MMI
  480. SDAANY(ScaleARGBRowDownEven_Any_MMI,
  481. ScaleARGBRowDownEven_MMI,
  482. ScaleARGBRowDownEven_C,
  483. 4,
  484. 1)
  485. SDAANY(ScaleARGBRowDownEvenBox_Any_MMI,
  486. ScaleARGBRowDownEvenBox_MMI,
  487. ScaleARGBRowDownEvenBox_C,
  488. 4,
  489. 1)
  490. #endif
  491. #ifdef SASIMDONLY
  492. // This also works and uses memcpy and SIMD instead of C, but is slower on ARM
  493. // Add rows box filter scale down. Using macro from row_any
  494. #define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
  495. void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \
  496. SIMD_ALIGNED(uint16_t dst_temp[32]); \
  497. SIMD_ALIGNED(uint8_t src_temp[32]); \
  498. memset(dst_temp, 0, 32 * 2); /* for msan */ \
  499. int r = width & MASK; \
  500. int n = width & ~MASK; \
  501. if (n > 0) { \
  502. ANY_SIMD(src_ptr, dst_ptr, n); \
  503. } \
  504. memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \
  505. memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \
  506. ANY_SIMD(src_temp, dst_temp, MASK + 1); \
  507. memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \
  508. }
  509. #ifdef HAS_SCALEADDROW_SSE2
  510. SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15)
  511. #endif
  512. #ifdef HAS_SCALEADDROW_AVX2
  513. SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31)
  514. #endif
  515. #ifdef HAS_SCALEADDROW_NEON
  516. SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15)
  517. #endif
  518. #ifdef HAS_SCALEADDROW_MSA
  519. SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15)
  520. #endif
  521. #ifdef HAS_SCALEADDROW_MMI
  522. SAROW(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, 1, 2, 7)
  523. #endif
  524. #undef SAANY
  525. #else
  526. // Add rows box filter scale down.
  527. #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
  528. void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
  529. int n = src_width & ~MASK; \
  530. if (n > 0) { \
  531. SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
  532. } \
  533. SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
  534. }
  535. #ifdef HAS_SCALEADDROW_SSE2
  536. SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
  537. #endif
  538. #ifdef HAS_SCALEADDROW_AVX2
  539. SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
  540. #endif
  541. #ifdef HAS_SCALEADDROW_NEON
  542. SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
  543. #endif
  544. #ifdef HAS_SCALEADDROW_MSA
  545. SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
  546. #endif
  547. #ifdef HAS_SCALEADDROW_MMI
  548. SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7)
  549. #endif
  550. #undef SAANY
  551. #endif // SASIMDONLY
  552. #ifdef __cplusplus
  553. } // extern "C"
  554. } // namespace libyuv
  555. #endif