scale_common.cc 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333
  1. /*
  2. * Copyright 2013 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyARGB
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. // CPU agnostic row functions
  25. void ScaleRowDown2_C(const uint8_t* src_ptr,
  26. ptrdiff_t src_stride,
  27. uint8_t* dst,
  28. int dst_width) {
  29. int x;
  30. (void)src_stride;
  31. for (x = 0; x < dst_width - 1; x += 2) {
  32. dst[0] = src_ptr[1];
  33. dst[1] = src_ptr[3];
  34. dst += 2;
  35. src_ptr += 4;
  36. }
  37. if (dst_width & 1) {
  38. dst[0] = src_ptr[1];
  39. }
  40. }
  41. void ScaleRowDown2_16_C(const uint16_t* src_ptr,
  42. ptrdiff_t src_stride,
  43. uint16_t* dst,
  44. int dst_width) {
  45. int x;
  46. (void)src_stride;
  47. for (x = 0; x < dst_width - 1; x += 2) {
  48. dst[0] = src_ptr[1];
  49. dst[1] = src_ptr[3];
  50. dst += 2;
  51. src_ptr += 4;
  52. }
  53. if (dst_width & 1) {
  54. dst[0] = src_ptr[1];
  55. }
  56. }
  57. void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
  58. ptrdiff_t src_stride,
  59. uint8_t* dst,
  60. int dst_width) {
  61. const uint8_t* s = src_ptr;
  62. int x;
  63. (void)src_stride;
  64. for (x = 0; x < dst_width - 1; x += 2) {
  65. dst[0] = (s[0] + s[1] + 1) >> 1;
  66. dst[1] = (s[2] + s[3] + 1) >> 1;
  67. dst += 2;
  68. s += 4;
  69. }
  70. if (dst_width & 1) {
  71. dst[0] = (s[0] + s[1] + 1) >> 1;
  72. }
  73. }
  74. void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
  75. ptrdiff_t src_stride,
  76. uint16_t* dst,
  77. int dst_width) {
  78. const uint16_t* s = src_ptr;
  79. int x;
  80. (void)src_stride;
  81. for (x = 0; x < dst_width - 1; x += 2) {
  82. dst[0] = (s[0] + s[1] + 1) >> 1;
  83. dst[1] = (s[2] + s[3] + 1) >> 1;
  84. dst += 2;
  85. s += 4;
  86. }
  87. if (dst_width & 1) {
  88. dst[0] = (s[0] + s[1] + 1) >> 1;
  89. }
  90. }
  91. void ScaleRowDown2Box_C(const uint8_t* src_ptr,
  92. ptrdiff_t src_stride,
  93. uint8_t* dst,
  94. int dst_width) {
  95. const uint8_t* s = src_ptr;
  96. const uint8_t* t = src_ptr + src_stride;
  97. int x;
  98. for (x = 0; x < dst_width - 1; x += 2) {
  99. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  100. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  101. dst += 2;
  102. s += 4;
  103. t += 4;
  104. }
  105. if (dst_width & 1) {
  106. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  107. }
  108. }
  109. void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
  110. ptrdiff_t src_stride,
  111. uint8_t* dst,
  112. int dst_width) {
  113. const uint8_t* s = src_ptr;
  114. const uint8_t* t = src_ptr + src_stride;
  115. int x;
  116. dst_width -= 1;
  117. for (x = 0; x < dst_width - 1; x += 2) {
  118. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  119. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  120. dst += 2;
  121. s += 4;
  122. t += 4;
  123. }
  124. if (dst_width & 1) {
  125. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  126. dst += 1;
  127. s += 2;
  128. t += 2;
  129. }
  130. dst[0] = (s[0] + t[0] + 1) >> 1;
  131. }
  132. void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
  133. ptrdiff_t src_stride,
  134. uint16_t* dst,
  135. int dst_width) {
  136. const uint16_t* s = src_ptr;
  137. const uint16_t* t = src_ptr + src_stride;
  138. int x;
  139. for (x = 0; x < dst_width - 1; x += 2) {
  140. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  141. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  142. dst += 2;
  143. s += 4;
  144. t += 4;
  145. }
  146. if (dst_width & 1) {
  147. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  148. }
  149. }
  150. void ScaleRowDown4_C(const uint8_t* src_ptr,
  151. ptrdiff_t src_stride,
  152. uint8_t* dst,
  153. int dst_width) {
  154. int x;
  155. (void)src_stride;
  156. for (x = 0; x < dst_width - 1; x += 2) {
  157. dst[0] = src_ptr[2];
  158. dst[1] = src_ptr[6];
  159. dst += 2;
  160. src_ptr += 8;
  161. }
  162. if (dst_width & 1) {
  163. dst[0] = src_ptr[2];
  164. }
  165. }
  166. void ScaleRowDown4_16_C(const uint16_t* src_ptr,
  167. ptrdiff_t src_stride,
  168. uint16_t* dst,
  169. int dst_width) {
  170. int x;
  171. (void)src_stride;
  172. for (x = 0; x < dst_width - 1; x += 2) {
  173. dst[0] = src_ptr[2];
  174. dst[1] = src_ptr[6];
  175. dst += 2;
  176. src_ptr += 8;
  177. }
  178. if (dst_width & 1) {
  179. dst[0] = src_ptr[2];
  180. }
  181. }
  182. void ScaleRowDown4Box_C(const uint8_t* src_ptr,
  183. ptrdiff_t src_stride,
  184. uint8_t* dst,
  185. int dst_width) {
  186. intptr_t stride = src_stride;
  187. int x;
  188. for (x = 0; x < dst_width - 1; x += 2) {
  189. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  190. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  191. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  192. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  193. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  194. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  195. src_ptr[stride * 3 + 3] + 8) >>
  196. 4;
  197. dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
  198. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
  199. src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
  200. src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
  201. src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
  202. src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
  203. src_ptr[stride * 3 + 7] + 8) >>
  204. 4;
  205. dst += 2;
  206. src_ptr += 8;
  207. }
  208. if (dst_width & 1) {
  209. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  210. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  211. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  212. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  213. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  214. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  215. src_ptr[stride * 3 + 3] + 8) >>
  216. 4;
  217. }
  218. }
  219. void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
  220. ptrdiff_t src_stride,
  221. uint16_t* dst,
  222. int dst_width) {
  223. intptr_t stride = src_stride;
  224. int x;
  225. for (x = 0; x < dst_width - 1; x += 2) {
  226. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  227. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  228. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  229. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  230. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  231. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  232. src_ptr[stride * 3 + 3] + 8) >>
  233. 4;
  234. dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
  235. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
  236. src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
  237. src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
  238. src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
  239. src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
  240. src_ptr[stride * 3 + 7] + 8) >>
  241. 4;
  242. dst += 2;
  243. src_ptr += 8;
  244. }
  245. if (dst_width & 1) {
  246. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  247. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  248. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  249. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  250. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  251. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  252. src_ptr[stride * 3 + 3] + 8) >>
  253. 4;
  254. }
  255. }
  256. void ScaleRowDown34_C(const uint8_t* src_ptr,
  257. ptrdiff_t src_stride,
  258. uint8_t* dst,
  259. int dst_width) {
  260. int x;
  261. (void)src_stride;
  262. assert((dst_width % 3 == 0) && (dst_width > 0));
  263. for (x = 0; x < dst_width; x += 3) {
  264. dst[0] = src_ptr[0];
  265. dst[1] = src_ptr[1];
  266. dst[2] = src_ptr[3];
  267. dst += 3;
  268. src_ptr += 4;
  269. }
  270. }
  271. void ScaleRowDown34_16_C(const uint16_t* src_ptr,
  272. ptrdiff_t src_stride,
  273. uint16_t* dst,
  274. int dst_width) {
  275. int x;
  276. (void)src_stride;
  277. assert((dst_width % 3 == 0) && (dst_width > 0));
  278. for (x = 0; x < dst_width; x += 3) {
  279. dst[0] = src_ptr[0];
  280. dst[1] = src_ptr[1];
  281. dst[2] = src_ptr[3];
  282. dst += 3;
  283. src_ptr += 4;
  284. }
  285. }
  286. // Filter rows 0 and 1 together, 3 : 1
  287. void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
  288. ptrdiff_t src_stride,
  289. uint8_t* d,
  290. int dst_width) {
  291. const uint8_t* s = src_ptr;
  292. const uint8_t* t = src_ptr + src_stride;
  293. int x;
  294. assert((dst_width % 3 == 0) && (dst_width > 0));
  295. for (x = 0; x < dst_width; x += 3) {
  296. uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  297. uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  298. uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  299. uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  300. uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  301. uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  302. d[0] = (a0 * 3 + b0 + 2) >> 2;
  303. d[1] = (a1 * 3 + b1 + 2) >> 2;
  304. d[2] = (a2 * 3 + b2 + 2) >> 2;
  305. d += 3;
  306. s += 4;
  307. t += 4;
  308. }
  309. }
  310. void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
  311. ptrdiff_t src_stride,
  312. uint16_t* d,
  313. int dst_width) {
  314. const uint16_t* s = src_ptr;
  315. const uint16_t* t = src_ptr + src_stride;
  316. int x;
  317. assert((dst_width % 3 == 0) && (dst_width > 0));
  318. for (x = 0; x < dst_width; x += 3) {
  319. uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  320. uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  321. uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  322. uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  323. uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  324. uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  325. d[0] = (a0 * 3 + b0 + 2) >> 2;
  326. d[1] = (a1 * 3 + b1 + 2) >> 2;
  327. d[2] = (a2 * 3 + b2 + 2) >> 2;
  328. d += 3;
  329. s += 4;
  330. t += 4;
  331. }
  332. }
  333. // Filter rows 1 and 2 together, 1 : 1
  334. void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
  335. ptrdiff_t src_stride,
  336. uint8_t* d,
  337. int dst_width) {
  338. const uint8_t* s = src_ptr;
  339. const uint8_t* t = src_ptr + src_stride;
  340. int x;
  341. assert((dst_width % 3 == 0) && (dst_width > 0));
  342. for (x = 0; x < dst_width; x += 3) {
  343. uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  344. uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  345. uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  346. uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  347. uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  348. uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  349. d[0] = (a0 + b0 + 1) >> 1;
  350. d[1] = (a1 + b1 + 1) >> 1;
  351. d[2] = (a2 + b2 + 1) >> 1;
  352. d += 3;
  353. s += 4;
  354. t += 4;
  355. }
  356. }
  357. void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
  358. ptrdiff_t src_stride,
  359. uint16_t* d,
  360. int dst_width) {
  361. const uint16_t* s = src_ptr;
  362. const uint16_t* t = src_ptr + src_stride;
  363. int x;
  364. assert((dst_width % 3 == 0) && (dst_width > 0));
  365. for (x = 0; x < dst_width; x += 3) {
  366. uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  367. uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  368. uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  369. uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  370. uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  371. uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  372. d[0] = (a0 + b0 + 1) >> 1;
  373. d[1] = (a1 + b1 + 1) >> 1;
  374. d[2] = (a2 + b2 + 1) >> 1;
  375. d += 3;
  376. s += 4;
  377. t += 4;
  378. }
  379. }
  380. // Scales a single row of pixels using point sampling.
  381. void ScaleCols_C(uint8_t* dst_ptr,
  382. const uint8_t* src_ptr,
  383. int dst_width,
  384. int x,
  385. int dx) {
  386. int j;
  387. for (j = 0; j < dst_width - 1; j += 2) {
  388. dst_ptr[0] = src_ptr[x >> 16];
  389. x += dx;
  390. dst_ptr[1] = src_ptr[x >> 16];
  391. x += dx;
  392. dst_ptr += 2;
  393. }
  394. if (dst_width & 1) {
  395. dst_ptr[0] = src_ptr[x >> 16];
  396. }
  397. }
  398. void ScaleCols_16_C(uint16_t* dst_ptr,
  399. const uint16_t* src_ptr,
  400. int dst_width,
  401. int x,
  402. int dx) {
  403. int j;
  404. for (j = 0; j < dst_width - 1; j += 2) {
  405. dst_ptr[0] = src_ptr[x >> 16];
  406. x += dx;
  407. dst_ptr[1] = src_ptr[x >> 16];
  408. x += dx;
  409. dst_ptr += 2;
  410. }
  411. if (dst_width & 1) {
  412. dst_ptr[0] = src_ptr[x >> 16];
  413. }
  414. }
  415. // Scales a single row of pixels up by 2x using point sampling.
  416. void ScaleColsUp2_C(uint8_t* dst_ptr,
  417. const uint8_t* src_ptr,
  418. int dst_width,
  419. int x,
  420. int dx) {
  421. int j;
  422. (void)x;
  423. (void)dx;
  424. for (j = 0; j < dst_width - 1; j += 2) {
  425. dst_ptr[1] = dst_ptr[0] = src_ptr[0];
  426. src_ptr += 1;
  427. dst_ptr += 2;
  428. }
  429. if (dst_width & 1) {
  430. dst_ptr[0] = src_ptr[0];
  431. }
  432. }
  433. void ScaleColsUp2_16_C(uint16_t* dst_ptr,
  434. const uint16_t* src_ptr,
  435. int dst_width,
  436. int x,
  437. int dx) {
  438. int j;
  439. (void)x;
  440. (void)dx;
  441. for (j = 0; j < dst_width - 1; j += 2) {
  442. dst_ptr[1] = dst_ptr[0] = src_ptr[0];
  443. src_ptr += 1;
  444. dst_ptr += 2;
  445. }
  446. if (dst_width & 1) {
  447. dst_ptr[0] = src_ptr[0];
  448. }
  449. }
  450. // (1-f)a + fb can be replaced with a + f(b-a)
  451. #if defined(__arm__) || defined(__aarch64__)
  452. #define BLENDER(a, b, f) \
  453. (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
  454. #else
  455. // Intel uses 7 bit math with rounding.
  456. #define BLENDER(a, b, f) \
  457. (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
  458. #endif
  459. void ScaleFilterCols_C(uint8_t* dst_ptr,
  460. const uint8_t* src_ptr,
  461. int dst_width,
  462. int x,
  463. int dx) {
  464. int j;
  465. for (j = 0; j < dst_width - 1; j += 2) {
  466. int xi = x >> 16;
  467. int a = src_ptr[xi];
  468. int b = src_ptr[xi + 1];
  469. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  470. x += dx;
  471. xi = x >> 16;
  472. a = src_ptr[xi];
  473. b = src_ptr[xi + 1];
  474. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  475. x += dx;
  476. dst_ptr += 2;
  477. }
  478. if (dst_width & 1) {
  479. int xi = x >> 16;
  480. int a = src_ptr[xi];
  481. int b = src_ptr[xi + 1];
  482. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  483. }
  484. }
  485. void ScaleFilterCols64_C(uint8_t* dst_ptr,
  486. const uint8_t* src_ptr,
  487. int dst_width,
  488. int x32,
  489. int dx) {
  490. int64_t x = (int64_t)(x32);
  491. int j;
  492. for (j = 0; j < dst_width - 1; j += 2) {
  493. int64_t xi = x >> 16;
  494. int a = src_ptr[xi];
  495. int b = src_ptr[xi + 1];
  496. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  497. x += dx;
  498. xi = x >> 16;
  499. a = src_ptr[xi];
  500. b = src_ptr[xi + 1];
  501. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  502. x += dx;
  503. dst_ptr += 2;
  504. }
  505. if (dst_width & 1) {
  506. int64_t xi = x >> 16;
  507. int a = src_ptr[xi];
  508. int b = src_ptr[xi + 1];
  509. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  510. }
  511. }
  512. #undef BLENDER
  513. // Same as 8 bit arm blender but return is cast to uint16_t
  514. #define BLENDER(a, b, f) \
  515. (uint16_t)( \
  516. (int)(a) + \
  517. (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
  518. void ScaleFilterCols_16_C(uint16_t* dst_ptr,
  519. const uint16_t* src_ptr,
  520. int dst_width,
  521. int x,
  522. int dx) {
  523. int j;
  524. for (j = 0; j < dst_width - 1; j += 2) {
  525. int xi = x >> 16;
  526. int a = src_ptr[xi];
  527. int b = src_ptr[xi + 1];
  528. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  529. x += dx;
  530. xi = x >> 16;
  531. a = src_ptr[xi];
  532. b = src_ptr[xi + 1];
  533. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  534. x += dx;
  535. dst_ptr += 2;
  536. }
  537. if (dst_width & 1) {
  538. int xi = x >> 16;
  539. int a = src_ptr[xi];
  540. int b = src_ptr[xi + 1];
  541. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  542. }
  543. }
  544. void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
  545. const uint16_t* src_ptr,
  546. int dst_width,
  547. int x32,
  548. int dx) {
  549. int64_t x = (int64_t)(x32);
  550. int j;
  551. for (j = 0; j < dst_width - 1; j += 2) {
  552. int64_t xi = x >> 16;
  553. int a = src_ptr[xi];
  554. int b = src_ptr[xi + 1];
  555. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  556. x += dx;
  557. xi = x >> 16;
  558. a = src_ptr[xi];
  559. b = src_ptr[xi + 1];
  560. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  561. x += dx;
  562. dst_ptr += 2;
  563. }
  564. if (dst_width & 1) {
  565. int64_t xi = x >> 16;
  566. int a = src_ptr[xi];
  567. int b = src_ptr[xi + 1];
  568. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  569. }
  570. }
  571. #undef BLENDER
  572. void ScaleRowDown38_C(const uint8_t* src_ptr,
  573. ptrdiff_t src_stride,
  574. uint8_t* dst,
  575. int dst_width) {
  576. int x;
  577. (void)src_stride;
  578. assert(dst_width % 3 == 0);
  579. for (x = 0; x < dst_width; x += 3) {
  580. dst[0] = src_ptr[0];
  581. dst[1] = src_ptr[3];
  582. dst[2] = src_ptr[6];
  583. dst += 3;
  584. src_ptr += 8;
  585. }
  586. }
  587. void ScaleRowDown38_16_C(const uint16_t* src_ptr,
  588. ptrdiff_t src_stride,
  589. uint16_t* dst,
  590. int dst_width) {
  591. int x;
  592. (void)src_stride;
  593. assert(dst_width % 3 == 0);
  594. for (x = 0; x < dst_width; x += 3) {
  595. dst[0] = src_ptr[0];
  596. dst[1] = src_ptr[3];
  597. dst[2] = src_ptr[6];
  598. dst += 3;
  599. src_ptr += 8;
  600. }
  601. }
  602. // 8x3 -> 3x1
  603. void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
  604. ptrdiff_t src_stride,
  605. uint8_t* dst_ptr,
  606. int dst_width) {
  607. intptr_t stride = src_stride;
  608. int i;
  609. assert((dst_width % 3 == 0) && (dst_width > 0));
  610. for (i = 0; i < dst_width; i += 3) {
  611. dst_ptr[0] =
  612. (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  613. src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
  614. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
  615. (65536 / 9) >>
  616. 16;
  617. dst_ptr[1] =
  618. (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  619. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
  620. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
  621. (65536 / 9) >>
  622. 16;
  623. dst_ptr[2] =
  624. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
  625. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
  626. (65536 / 6) >>
  627. 16;
  628. src_ptr += 8;
  629. dst_ptr += 3;
  630. }
  631. }
  632. void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
  633. ptrdiff_t src_stride,
  634. uint16_t* dst_ptr,
  635. int dst_width) {
  636. intptr_t stride = src_stride;
  637. int i;
  638. assert((dst_width % 3 == 0) && (dst_width > 0));
  639. for (i = 0; i < dst_width; i += 3) {
  640. dst_ptr[0] =
  641. (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  642. src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
  643. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
  644. (65536 / 9) >>
  645. 16;
  646. dst_ptr[1] =
  647. (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  648. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
  649. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
  650. (65536 / 9) >>
  651. 16;
  652. dst_ptr[2] =
  653. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
  654. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
  655. (65536 / 6) >>
  656. 16;
  657. src_ptr += 8;
  658. dst_ptr += 3;
  659. }
  660. }
  661. // 8x2 -> 3x1
  662. void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
  663. ptrdiff_t src_stride,
  664. uint8_t* dst_ptr,
  665. int dst_width) {
  666. intptr_t stride = src_stride;
  667. int i;
  668. assert((dst_width % 3 == 0) && (dst_width > 0));
  669. for (i = 0; i < dst_width; i += 3) {
  670. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  671. src_ptr[stride + 1] + src_ptr[stride + 2]) *
  672. (65536 / 6) >>
  673. 16;
  674. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  675. src_ptr[stride + 4] + src_ptr[stride + 5]) *
  676. (65536 / 6) >>
  677. 16;
  678. dst_ptr[2] =
  679. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
  680. (65536 / 4) >>
  681. 16;
  682. src_ptr += 8;
  683. dst_ptr += 3;
  684. }
  685. }
  686. void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
  687. ptrdiff_t src_stride,
  688. uint16_t* dst_ptr,
  689. int dst_width) {
  690. intptr_t stride = src_stride;
  691. int i;
  692. assert((dst_width % 3 == 0) && (dst_width > 0));
  693. for (i = 0; i < dst_width; i += 3) {
  694. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  695. src_ptr[stride + 1] + src_ptr[stride + 2]) *
  696. (65536 / 6) >>
  697. 16;
  698. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  699. src_ptr[stride + 4] + src_ptr[stride + 5]) *
  700. (65536 / 6) >>
  701. 16;
  702. dst_ptr[2] =
  703. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
  704. (65536 / 4) >>
  705. 16;
  706. src_ptr += 8;
  707. dst_ptr += 3;
  708. }
  709. }
  710. void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
  711. int x;
  712. assert(src_width > 0);
  713. for (x = 0; x < src_width - 1; x += 2) {
  714. dst_ptr[0] += src_ptr[0];
  715. dst_ptr[1] += src_ptr[1];
  716. src_ptr += 2;
  717. dst_ptr += 2;
  718. }
  719. if (src_width & 1) {
  720. dst_ptr[0] += src_ptr[0];
  721. }
  722. }
  723. void ScaleAddRow_16_C(const uint16_t* src_ptr,
  724. uint32_t* dst_ptr,
  725. int src_width) {
  726. int x;
  727. assert(src_width > 0);
  728. for (x = 0; x < src_width - 1; x += 2) {
  729. dst_ptr[0] += src_ptr[0];
  730. dst_ptr[1] += src_ptr[1];
  731. src_ptr += 2;
  732. dst_ptr += 2;
  733. }
  734. if (src_width & 1) {
  735. dst_ptr[0] += src_ptr[0];
  736. }
  737. }
  738. void ScaleARGBRowDown2_C(const uint8_t* src_argb,
  739. ptrdiff_t src_stride,
  740. uint8_t* dst_argb,
  741. int dst_width) {
  742. const uint32_t* src = (const uint32_t*)(src_argb);
  743. uint32_t* dst = (uint32_t*)(dst_argb);
  744. int x;
  745. (void)src_stride;
  746. for (x = 0; x < dst_width - 1; x += 2) {
  747. dst[0] = src[1];
  748. dst[1] = src[3];
  749. src += 4;
  750. dst += 2;
  751. }
  752. if (dst_width & 1) {
  753. dst[0] = src[1];
  754. }
  755. }
  756. void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
  757. ptrdiff_t src_stride,
  758. uint8_t* dst_argb,
  759. int dst_width) {
  760. int x;
  761. (void)src_stride;
  762. for (x = 0; x < dst_width; ++x) {
  763. dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
  764. dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
  765. dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
  766. dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
  767. src_argb += 8;
  768. dst_argb += 4;
  769. }
  770. }
  771. void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
  772. ptrdiff_t src_stride,
  773. uint8_t* dst_argb,
  774. int dst_width) {
  775. int x;
  776. for (x = 0; x < dst_width; ++x) {
  777. dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
  778. src_argb[src_stride + 4] + 2) >>
  779. 2;
  780. dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
  781. src_argb[src_stride + 5] + 2) >>
  782. 2;
  783. dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
  784. src_argb[src_stride + 6] + 2) >>
  785. 2;
  786. dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
  787. src_argb[src_stride + 7] + 2) >>
  788. 2;
  789. src_argb += 8;
  790. dst_argb += 4;
  791. }
  792. }
  793. void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
  794. ptrdiff_t src_stride,
  795. int src_stepx,
  796. uint8_t* dst_argb,
  797. int dst_width) {
  798. const uint32_t* src = (const uint32_t*)(src_argb);
  799. uint32_t* dst = (uint32_t*)(dst_argb);
  800. (void)src_stride;
  801. int x;
  802. for (x = 0; x < dst_width - 1; x += 2) {
  803. dst[0] = src[0];
  804. dst[1] = src[src_stepx];
  805. src += src_stepx * 2;
  806. dst += 2;
  807. }
  808. if (dst_width & 1) {
  809. dst[0] = src[0];
  810. }
  811. }
  812. void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
  813. ptrdiff_t src_stride,
  814. int src_stepx,
  815. uint8_t* dst_argb,
  816. int dst_width) {
  817. int x;
  818. for (x = 0; x < dst_width; ++x) {
  819. dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
  820. src_argb[src_stride + 4] + 2) >>
  821. 2;
  822. dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
  823. src_argb[src_stride + 5] + 2) >>
  824. 2;
  825. dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
  826. src_argb[src_stride + 6] + 2) >>
  827. 2;
  828. dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
  829. src_argb[src_stride + 7] + 2) >>
  830. 2;
  831. src_argb += src_stepx * 4;
  832. dst_argb += 4;
  833. }
  834. }
  835. // Scales a single row of pixels using point sampling.
  836. void ScaleARGBCols_C(uint8_t* dst_argb,
  837. const uint8_t* src_argb,
  838. int dst_width,
  839. int x,
  840. int dx) {
  841. const uint32_t* src = (const uint32_t*)(src_argb);
  842. uint32_t* dst = (uint32_t*)(dst_argb);
  843. int j;
  844. for (j = 0; j < dst_width - 1; j += 2) {
  845. dst[0] = src[x >> 16];
  846. x += dx;
  847. dst[1] = src[x >> 16];
  848. x += dx;
  849. dst += 2;
  850. }
  851. if (dst_width & 1) {
  852. dst[0] = src[x >> 16];
  853. }
  854. }
  855. void ScaleARGBCols64_C(uint8_t* dst_argb,
  856. const uint8_t* src_argb,
  857. int dst_width,
  858. int x32,
  859. int dx) {
  860. int64_t x = (int64_t)(x32);
  861. const uint32_t* src = (const uint32_t*)(src_argb);
  862. uint32_t* dst = (uint32_t*)(dst_argb);
  863. int j;
  864. for (j = 0; j < dst_width - 1; j += 2) {
  865. dst[0] = src[x >> 16];
  866. x += dx;
  867. dst[1] = src[x >> 16];
  868. x += dx;
  869. dst += 2;
  870. }
  871. if (dst_width & 1) {
  872. dst[0] = src[x >> 16];
  873. }
  874. }
  875. // Scales a single row of pixels up by 2x using point sampling.
  876. void ScaleARGBColsUp2_C(uint8_t* dst_argb,
  877. const uint8_t* src_argb,
  878. int dst_width,
  879. int x,
  880. int dx) {
  881. const uint32_t* src = (const uint32_t*)(src_argb);
  882. uint32_t* dst = (uint32_t*)(dst_argb);
  883. int j;
  884. (void)x;
  885. (void)dx;
  886. for (j = 0; j < dst_width - 1; j += 2) {
  887. dst[1] = dst[0] = src[0];
  888. src += 1;
  889. dst += 2;
  890. }
  891. if (dst_width & 1) {
  892. dst[0] = src[0];
  893. }
  894. }
  895. // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
  896. // Mimics SSSE3 blender
  897. #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
  898. #define BLENDERC(a, b, f, s) \
  899. (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
  900. #define BLENDER(a, b, f) \
  901. BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
  902. BLENDERC(a, b, f, 0)
  903. void ScaleARGBFilterCols_C(uint8_t* dst_argb,
  904. const uint8_t* src_argb,
  905. int dst_width,
  906. int x,
  907. int dx) {
  908. const uint32_t* src = (const uint32_t*)(src_argb);
  909. uint32_t* dst = (uint32_t*)(dst_argb);
  910. int j;
  911. for (j = 0; j < dst_width - 1; j += 2) {
  912. int xi = x >> 16;
  913. int xf = (x >> 9) & 0x7f;
  914. uint32_t a = src[xi];
  915. uint32_t b = src[xi + 1];
  916. dst[0] = BLENDER(a, b, xf);
  917. x += dx;
  918. xi = x >> 16;
  919. xf = (x >> 9) & 0x7f;
  920. a = src[xi];
  921. b = src[xi + 1];
  922. dst[1] = BLENDER(a, b, xf);
  923. x += dx;
  924. dst += 2;
  925. }
  926. if (dst_width & 1) {
  927. int xi = x >> 16;
  928. int xf = (x >> 9) & 0x7f;
  929. uint32_t a = src[xi];
  930. uint32_t b = src[xi + 1];
  931. dst[0] = BLENDER(a, b, xf);
  932. }
  933. }
  934. void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
  935. const uint8_t* src_argb,
  936. int dst_width,
  937. int x32,
  938. int dx) {
  939. int64_t x = (int64_t)(x32);
  940. const uint32_t* src = (const uint32_t*)(src_argb);
  941. uint32_t* dst = (uint32_t*)(dst_argb);
  942. int j;
  943. for (j = 0; j < dst_width - 1; j += 2) {
  944. int64_t xi = x >> 16;
  945. int xf = (x >> 9) & 0x7f;
  946. uint32_t a = src[xi];
  947. uint32_t b = src[xi + 1];
  948. dst[0] = BLENDER(a, b, xf);
  949. x += dx;
  950. xi = x >> 16;
  951. xf = (x >> 9) & 0x7f;
  952. a = src[xi];
  953. b = src[xi + 1];
  954. dst[1] = BLENDER(a, b, xf);
  955. x += dx;
  956. dst += 2;
  957. }
  958. if (dst_width & 1) {
  959. int64_t xi = x >> 16;
  960. int xf = (x >> 9) & 0x7f;
  961. uint32_t a = src[xi];
  962. uint32_t b = src[xi + 1];
  963. dst[0] = BLENDER(a, b, xf);
  964. }
  965. }
  966. #undef BLENDER1
  967. #undef BLENDERC
  968. #undef BLENDER
  969. // Scale plane vertically with bilinear interpolation.
  970. void ScalePlaneVertical(int src_height,
  971. int dst_width,
  972. int dst_height,
  973. int src_stride,
  974. int dst_stride,
  975. const uint8_t* src_argb,
  976. uint8_t* dst_argb,
  977. int x,
  978. int y,
  979. int dy,
  980. int bpp,
  981. enum FilterMode filtering) {
  982. // TODO(fbarchard): Allow higher bpp.
  983. int dst_width_bytes = dst_width * bpp;
  984. void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
  985. ptrdiff_t src_stride, int dst_width,
  986. int source_y_fraction) = InterpolateRow_C;
  987. const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
  988. int j;
  989. assert(bpp >= 1 && bpp <= 4);
  990. assert(src_height != 0);
  991. assert(dst_width > 0);
  992. assert(dst_height > 0);
  993. src_argb += (x >> 16) * bpp;
  994. #if defined(HAS_INTERPOLATEROW_SSSE3)
  995. if (TestCpuFlag(kCpuHasSSSE3)) {
  996. InterpolateRow = InterpolateRow_Any_SSSE3;
  997. if (IS_ALIGNED(dst_width_bytes, 16)) {
  998. InterpolateRow = InterpolateRow_SSSE3;
  999. }
  1000. }
  1001. #endif
  1002. #if defined(HAS_INTERPOLATEROW_AVX2)
  1003. if (TestCpuFlag(kCpuHasAVX2)) {
  1004. InterpolateRow = InterpolateRow_Any_AVX2;
  1005. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1006. InterpolateRow = InterpolateRow_AVX2;
  1007. }
  1008. }
  1009. #endif
  1010. #if defined(HAS_INTERPOLATEROW_NEON)
  1011. if (TestCpuFlag(kCpuHasNEON)) {
  1012. InterpolateRow = InterpolateRow_Any_NEON;
  1013. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1014. InterpolateRow = InterpolateRow_NEON;
  1015. }
  1016. }
  1017. #endif
  1018. #if defined(HAS_INTERPOLATEROW_MSA)
  1019. if (TestCpuFlag(kCpuHasMSA)) {
  1020. InterpolateRow = InterpolateRow_Any_MSA;
  1021. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1022. InterpolateRow = InterpolateRow_MSA;
  1023. }
  1024. }
  1025. #endif
  1026. #if defined(HAS_INTERPOLATEROW_MMI)
  1027. if (TestCpuFlag(kCpuHasMMI)) {
  1028. InterpolateRow = InterpolateRow_Any_MMI;
  1029. if (IS_ALIGNED(dst_width_bytes, 8)) {
  1030. InterpolateRow = InterpolateRow_MMI;
  1031. }
  1032. }
  1033. #endif
  1034. for (j = 0; j < dst_height; ++j) {
  1035. int yi;
  1036. int yf;
  1037. if (y > max_y) {
  1038. y = max_y;
  1039. }
  1040. yi = y >> 16;
  1041. yf = filtering ? ((y >> 8) & 255) : 0;
  1042. InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
  1043. dst_width_bytes, yf);
  1044. dst_argb += dst_stride;
  1045. y += dy;
  1046. }
  1047. }
  1048. void ScalePlaneVertical_16(int src_height,
  1049. int dst_width,
  1050. int dst_height,
  1051. int src_stride,
  1052. int dst_stride,
  1053. const uint16_t* src_argb,
  1054. uint16_t* dst_argb,
  1055. int x,
  1056. int y,
  1057. int dy,
  1058. int wpp,
  1059. enum FilterMode filtering) {
  1060. // TODO(fbarchard): Allow higher wpp.
  1061. int dst_width_words = dst_width * wpp;
  1062. void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
  1063. ptrdiff_t src_stride, int dst_width,
  1064. int source_y_fraction) = InterpolateRow_16_C;
  1065. const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
  1066. int j;
  1067. assert(wpp >= 1 && wpp <= 2);
  1068. assert(src_height != 0);
  1069. assert(dst_width > 0);
  1070. assert(dst_height > 0);
  1071. src_argb += (x >> 16) * wpp;
  1072. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  1073. if (TestCpuFlag(kCpuHasSSE2)) {
  1074. InterpolateRow = InterpolateRow_Any_16_SSE2;
  1075. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1076. InterpolateRow = InterpolateRow_16_SSE2;
  1077. }
  1078. }
  1079. #endif
  1080. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  1081. if (TestCpuFlag(kCpuHasSSSE3)) {
  1082. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  1083. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1084. InterpolateRow = InterpolateRow_16_SSSE3;
  1085. }
  1086. }
  1087. #endif
  1088. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  1089. if (TestCpuFlag(kCpuHasAVX2)) {
  1090. InterpolateRow = InterpolateRow_Any_16_AVX2;
  1091. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1092. InterpolateRow = InterpolateRow_16_AVX2;
  1093. }
  1094. }
  1095. #endif
  1096. #if defined(HAS_INTERPOLATEROW_16_NEON)
  1097. if (TestCpuFlag(kCpuHasNEON)) {
  1098. InterpolateRow = InterpolateRow_Any_16_NEON;
  1099. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1100. InterpolateRow = InterpolateRow_16_NEON;
  1101. }
  1102. }
  1103. #endif
  1104. for (j = 0; j < dst_height; ++j) {
  1105. int yi;
  1106. int yf;
  1107. if (y > max_y) {
  1108. y = max_y;
  1109. }
  1110. yi = y >> 16;
  1111. yf = filtering ? ((y >> 8) & 255) : 0;
  1112. InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
  1113. dst_width_words, yf);
  1114. dst_argb += dst_stride;
  1115. y += dy;
  1116. }
  1117. }
  1118. // Simplify the filtering based on scale factors.
  1119. enum FilterMode ScaleFilterReduce(int src_width,
  1120. int src_height,
  1121. int dst_width,
  1122. int dst_height,
  1123. enum FilterMode filtering) {
  1124. if (src_width < 0) {
  1125. src_width = -src_width;
  1126. }
  1127. if (src_height < 0) {
  1128. src_height = -src_height;
  1129. }
  1130. if (filtering == kFilterBox) {
  1131. // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
  1132. if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
  1133. filtering = kFilterBilinear;
  1134. }
  1135. }
  1136. if (filtering == kFilterBilinear) {
  1137. if (src_height == 1) {
  1138. filtering = kFilterLinear;
  1139. }
  1140. // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
  1141. if (dst_height == src_height || dst_height * 3 == src_height) {
  1142. filtering = kFilterLinear;
  1143. }
  1144. // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
  1145. // avoid reading 2 pixels horizontally that causes memory exception.
  1146. if (src_width == 1) {
  1147. filtering = kFilterNone;
  1148. }
  1149. }
  1150. if (filtering == kFilterLinear) {
  1151. if (src_width == 1) {
  1152. filtering = kFilterNone;
  1153. }
  1154. // TODO(fbarchard): Detect any odd scale factor and reduce to None.
  1155. if (dst_width == src_width || dst_width * 3 == src_width) {
  1156. filtering = kFilterNone;
  1157. }
  1158. }
  1159. return filtering;
  1160. }
  1161. // Divide num by div and return as 16.16 fixed point result.
  1162. int FixedDiv_C(int num, int div) {
  1163. return (int)(((int64_t)(num) << 16) / div);
  1164. }
  1165. // Divide num by div and return as 16.16 fixed point result.
  1166. int FixedDiv1_C(int num, int div) {
  1167. return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
  1168. }
  1169. #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
  1170. // Compute slope values for stepping.
  1171. void ScaleSlope(int src_width,
  1172. int src_height,
  1173. int dst_width,
  1174. int dst_height,
  1175. enum FilterMode filtering,
  1176. int* x,
  1177. int* y,
  1178. int* dx,
  1179. int* dy) {
  1180. assert(x != NULL);
  1181. assert(y != NULL);
  1182. assert(dx != NULL);
  1183. assert(dy != NULL);
  1184. assert(src_width != 0);
  1185. assert(src_height != 0);
  1186. assert(dst_width > 0);
  1187. assert(dst_height > 0);
  1188. // Check for 1 pixel and avoid FixedDiv overflow.
  1189. if (dst_width == 1 && src_width >= 32768) {
  1190. dst_width = src_width;
  1191. }
  1192. if (dst_height == 1 && src_height >= 32768) {
  1193. dst_height = src_height;
  1194. }
  1195. if (filtering == kFilterBox) {
  1196. // Scale step for point sampling duplicates all pixels equally.
  1197. *dx = FixedDiv(Abs(src_width), dst_width);
  1198. *dy = FixedDiv(src_height, dst_height);
  1199. *x = 0;
  1200. *y = 0;
  1201. } else if (filtering == kFilterBilinear) {
  1202. // Scale step for bilinear sampling renders last pixel once for upsample.
  1203. if (dst_width <= Abs(src_width)) {
  1204. *dx = FixedDiv(Abs(src_width), dst_width);
  1205. *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
  1206. } else if (dst_width > 1) {
  1207. *dx = FixedDiv1(Abs(src_width), dst_width);
  1208. *x = 0;
  1209. }
  1210. if (dst_height <= src_height) {
  1211. *dy = FixedDiv(src_height, dst_height);
  1212. *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
  1213. } else if (dst_height > 1) {
  1214. *dy = FixedDiv1(src_height, dst_height);
  1215. *y = 0;
  1216. }
  1217. } else if (filtering == kFilterLinear) {
  1218. // Scale step for bilinear sampling renders last pixel once for upsample.
  1219. if (dst_width <= Abs(src_width)) {
  1220. *dx = FixedDiv(Abs(src_width), dst_width);
  1221. *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
  1222. } else if (dst_width > 1) {
  1223. *dx = FixedDiv1(Abs(src_width), dst_width);
  1224. *x = 0;
  1225. }
  1226. *dy = FixedDiv(src_height, dst_height);
  1227. *y = *dy >> 1;
  1228. } else {
  1229. // Scale step for point sampling duplicates all pixels equally.
  1230. *dx = FixedDiv(Abs(src_width), dst_width);
  1231. *dy = FixedDiv(src_height, dst_height);
  1232. *x = CENTERSTART(*dx, 0);
  1233. *y = CENTERSTART(*dy, 0);
  1234. }
  1235. // Negative src_width means horizontally mirror.
  1236. if (src_width < 0) {
  1237. *x += (dst_width - 1) * *dx;
  1238. *dx = -*dx;
  1239. // src_width = -src_width; // Caller must do this.
  1240. }
  1241. }
  1242. #undef CENTERSTART
  1243. // Read 8x2 upsample with filtering and write 16x1.
  1244. // actually reads an extra pixel, so 9x2.
  1245. void ScaleRowUp2_16_C(const uint16_t* src_ptr,
  1246. ptrdiff_t src_stride,
  1247. uint16_t* dst,
  1248. int dst_width) {
  1249. const uint16_t* src2 = src_ptr + src_stride;
  1250. int x;
  1251. for (x = 0; x < dst_width - 1; x += 2) {
  1252. uint16_t p0 = src_ptr[0];
  1253. uint16_t p1 = src_ptr[1];
  1254. uint16_t p2 = src2[0];
  1255. uint16_t p3 = src2[1];
  1256. dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
  1257. dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
  1258. ++src_ptr;
  1259. ++src2;
  1260. dst += 2;
  1261. }
  1262. if (dst_width & 1) {
  1263. uint16_t p0 = src_ptr[0];
  1264. uint16_t p1 = src_ptr[1];
  1265. uint16_t p2 = src2[0];
  1266. uint16_t p3 = src2[1];
  1267. dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
  1268. }
  1269. }
  1270. #ifdef __cplusplus
  1271. } // extern "C"
  1272. } // namespace libyuv
  1273. #endif