scale_common.cc 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323
  1. /*
  2. * Copyright 2013 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyARGB
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. // CPU agnostic row functions
  25. void ScaleRowDown2_C(const uint8_t* src_ptr,
  26. ptrdiff_t src_stride,
  27. uint8_t* dst,
  28. int dst_width) {
  29. int x;
  30. (void)src_stride;
  31. for (x = 0; x < dst_width - 1; x += 2) {
  32. dst[0] = src_ptr[1];
  33. dst[1] = src_ptr[3];
  34. dst += 2;
  35. src_ptr += 4;
  36. }
  37. if (dst_width & 1) {
  38. dst[0] = src_ptr[1];
  39. }
  40. }
  41. void ScaleRowDown2_16_C(const uint16_t* src_ptr,
  42. ptrdiff_t src_stride,
  43. uint16_t* dst,
  44. int dst_width) {
  45. int x;
  46. (void)src_stride;
  47. for (x = 0; x < dst_width - 1; x += 2) {
  48. dst[0] = src_ptr[1];
  49. dst[1] = src_ptr[3];
  50. dst += 2;
  51. src_ptr += 4;
  52. }
  53. if (dst_width & 1) {
  54. dst[0] = src_ptr[1];
  55. }
  56. }
  57. void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
  58. ptrdiff_t src_stride,
  59. uint8_t* dst,
  60. int dst_width) {
  61. const uint8_t* s = src_ptr;
  62. int x;
  63. (void)src_stride;
  64. for (x = 0; x < dst_width - 1; x += 2) {
  65. dst[0] = (s[0] + s[1] + 1) >> 1;
  66. dst[1] = (s[2] + s[3] + 1) >> 1;
  67. dst += 2;
  68. s += 4;
  69. }
  70. if (dst_width & 1) {
  71. dst[0] = (s[0] + s[1] + 1) >> 1;
  72. }
  73. }
  74. void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
  75. ptrdiff_t src_stride,
  76. uint16_t* dst,
  77. int dst_width) {
  78. const uint16_t* s = src_ptr;
  79. int x;
  80. (void)src_stride;
  81. for (x = 0; x < dst_width - 1; x += 2) {
  82. dst[0] = (s[0] + s[1] + 1) >> 1;
  83. dst[1] = (s[2] + s[3] + 1) >> 1;
  84. dst += 2;
  85. s += 4;
  86. }
  87. if (dst_width & 1) {
  88. dst[0] = (s[0] + s[1] + 1) >> 1;
  89. }
  90. }
  91. void ScaleRowDown2Box_C(const uint8_t* src_ptr,
  92. ptrdiff_t src_stride,
  93. uint8_t* dst,
  94. int dst_width) {
  95. const uint8_t* s = src_ptr;
  96. const uint8_t* t = src_ptr + src_stride;
  97. int x;
  98. for (x = 0; x < dst_width - 1; x += 2) {
  99. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  100. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  101. dst += 2;
  102. s += 4;
  103. t += 4;
  104. }
  105. if (dst_width & 1) {
  106. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  107. }
  108. }
  109. void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
  110. ptrdiff_t src_stride,
  111. uint8_t* dst,
  112. int dst_width) {
  113. const uint8_t* s = src_ptr;
  114. const uint8_t* t = src_ptr + src_stride;
  115. int x;
  116. dst_width -= 1;
  117. for (x = 0; x < dst_width - 1; x += 2) {
  118. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  119. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  120. dst += 2;
  121. s += 4;
  122. t += 4;
  123. }
  124. if (dst_width & 1) {
  125. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  126. dst += 1;
  127. s += 2;
  128. t += 2;
  129. }
  130. dst[0] = (s[0] + t[0] + 1) >> 1;
  131. }
  132. void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
  133. ptrdiff_t src_stride,
  134. uint16_t* dst,
  135. int dst_width) {
  136. const uint16_t* s = src_ptr;
  137. const uint16_t* t = src_ptr + src_stride;
  138. int x;
  139. for (x = 0; x < dst_width - 1; x += 2) {
  140. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  141. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  142. dst += 2;
  143. s += 4;
  144. t += 4;
  145. }
  146. if (dst_width & 1) {
  147. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  148. }
  149. }
  150. void ScaleRowDown4_C(const uint8_t* src_ptr,
  151. ptrdiff_t src_stride,
  152. uint8_t* dst,
  153. int dst_width) {
  154. int x;
  155. (void)src_stride;
  156. for (x = 0; x < dst_width - 1; x += 2) {
  157. dst[0] = src_ptr[2];
  158. dst[1] = src_ptr[6];
  159. dst += 2;
  160. src_ptr += 8;
  161. }
  162. if (dst_width & 1) {
  163. dst[0] = src_ptr[2];
  164. }
  165. }
  166. void ScaleRowDown4_16_C(const uint16_t* src_ptr,
  167. ptrdiff_t src_stride,
  168. uint16_t* dst,
  169. int dst_width) {
  170. int x;
  171. (void)src_stride;
  172. for (x = 0; x < dst_width - 1; x += 2) {
  173. dst[0] = src_ptr[2];
  174. dst[1] = src_ptr[6];
  175. dst += 2;
  176. src_ptr += 8;
  177. }
  178. if (dst_width & 1) {
  179. dst[0] = src_ptr[2];
  180. }
  181. }
  182. void ScaleRowDown4Box_C(const uint8_t* src_ptr,
  183. ptrdiff_t src_stride,
  184. uint8_t* dst,
  185. int dst_width) {
  186. intptr_t stride = src_stride;
  187. int x;
  188. for (x = 0; x < dst_width - 1; x += 2) {
  189. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  190. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  191. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  192. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  193. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  194. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  195. src_ptr[stride * 3 + 3] + 8) >>
  196. 4;
  197. dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
  198. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
  199. src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
  200. src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
  201. src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
  202. src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
  203. src_ptr[stride * 3 + 7] + 8) >>
  204. 4;
  205. dst += 2;
  206. src_ptr += 8;
  207. }
  208. if (dst_width & 1) {
  209. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  210. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  211. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  212. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  213. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  214. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  215. src_ptr[stride * 3 + 3] + 8) >>
  216. 4;
  217. }
  218. }
  219. void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
  220. ptrdiff_t src_stride,
  221. uint16_t* dst,
  222. int dst_width) {
  223. intptr_t stride = src_stride;
  224. int x;
  225. for (x = 0; x < dst_width - 1; x += 2) {
  226. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  227. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  228. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  229. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  230. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  231. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  232. src_ptr[stride * 3 + 3] + 8) >>
  233. 4;
  234. dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
  235. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
  236. src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
  237. src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
  238. src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
  239. src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
  240. src_ptr[stride * 3 + 7] + 8) >>
  241. 4;
  242. dst += 2;
  243. src_ptr += 8;
  244. }
  245. if (dst_width & 1) {
  246. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  247. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  248. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  249. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  250. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  251. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  252. src_ptr[stride * 3 + 3] + 8) >>
  253. 4;
  254. }
  255. }
  256. void ScaleRowDown34_C(const uint8_t* src_ptr,
  257. ptrdiff_t src_stride,
  258. uint8_t* dst,
  259. int dst_width) {
  260. int x;
  261. (void)src_stride;
  262. assert((dst_width % 3 == 0) && (dst_width > 0));
  263. for (x = 0; x < dst_width; x += 3) {
  264. dst[0] = src_ptr[0];
  265. dst[1] = src_ptr[1];
  266. dst[2] = src_ptr[3];
  267. dst += 3;
  268. src_ptr += 4;
  269. }
  270. }
  271. void ScaleRowDown34_16_C(const uint16_t* src_ptr,
  272. ptrdiff_t src_stride,
  273. uint16_t* dst,
  274. int dst_width) {
  275. int x;
  276. (void)src_stride;
  277. assert((dst_width % 3 == 0) && (dst_width > 0));
  278. for (x = 0; x < dst_width; x += 3) {
  279. dst[0] = src_ptr[0];
  280. dst[1] = src_ptr[1];
  281. dst[2] = src_ptr[3];
  282. dst += 3;
  283. src_ptr += 4;
  284. }
  285. }
  286. // Filter rows 0 and 1 together, 3 : 1
  287. void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
  288. ptrdiff_t src_stride,
  289. uint8_t* d,
  290. int dst_width) {
  291. const uint8_t* s = src_ptr;
  292. const uint8_t* t = src_ptr + src_stride;
  293. int x;
  294. assert((dst_width % 3 == 0) && (dst_width > 0));
  295. for (x = 0; x < dst_width; x += 3) {
  296. uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  297. uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  298. uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  299. uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  300. uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  301. uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  302. d[0] = (a0 * 3 + b0 + 2) >> 2;
  303. d[1] = (a1 * 3 + b1 + 2) >> 2;
  304. d[2] = (a2 * 3 + b2 + 2) >> 2;
  305. d += 3;
  306. s += 4;
  307. t += 4;
  308. }
  309. }
  310. void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
  311. ptrdiff_t src_stride,
  312. uint16_t* d,
  313. int dst_width) {
  314. const uint16_t* s = src_ptr;
  315. const uint16_t* t = src_ptr + src_stride;
  316. int x;
  317. assert((dst_width % 3 == 0) && (dst_width > 0));
  318. for (x = 0; x < dst_width; x += 3) {
  319. uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  320. uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  321. uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  322. uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  323. uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  324. uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  325. d[0] = (a0 * 3 + b0 + 2) >> 2;
  326. d[1] = (a1 * 3 + b1 + 2) >> 2;
  327. d[2] = (a2 * 3 + b2 + 2) >> 2;
  328. d += 3;
  329. s += 4;
  330. t += 4;
  331. }
  332. }
  333. // Filter rows 1 and 2 together, 1 : 1
  334. void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
  335. ptrdiff_t src_stride,
  336. uint8_t* d,
  337. int dst_width) {
  338. const uint8_t* s = src_ptr;
  339. const uint8_t* t = src_ptr + src_stride;
  340. int x;
  341. assert((dst_width % 3 == 0) && (dst_width > 0));
  342. for (x = 0; x < dst_width; x += 3) {
  343. uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  344. uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  345. uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  346. uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  347. uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  348. uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  349. d[0] = (a0 + b0 + 1) >> 1;
  350. d[1] = (a1 + b1 + 1) >> 1;
  351. d[2] = (a2 + b2 + 1) >> 1;
  352. d += 3;
  353. s += 4;
  354. t += 4;
  355. }
  356. }
  357. void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
  358. ptrdiff_t src_stride,
  359. uint16_t* d,
  360. int dst_width) {
  361. const uint16_t* s = src_ptr;
  362. const uint16_t* t = src_ptr + src_stride;
  363. int x;
  364. assert((dst_width % 3 == 0) && (dst_width > 0));
  365. for (x = 0; x < dst_width; x += 3) {
  366. uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  367. uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  368. uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  369. uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  370. uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  371. uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  372. d[0] = (a0 + b0 + 1) >> 1;
  373. d[1] = (a1 + b1 + 1) >> 1;
  374. d[2] = (a2 + b2 + 1) >> 1;
  375. d += 3;
  376. s += 4;
  377. t += 4;
  378. }
  379. }
  380. // Scales a single row of pixels using point sampling.
  381. void ScaleCols_C(uint8_t* dst_ptr,
  382. const uint8_t* src_ptr,
  383. int dst_width,
  384. int x,
  385. int dx) {
  386. int j;
  387. for (j = 0; j < dst_width - 1; j += 2) {
  388. dst_ptr[0] = src_ptr[x >> 16];
  389. x += dx;
  390. dst_ptr[1] = src_ptr[x >> 16];
  391. x += dx;
  392. dst_ptr += 2;
  393. }
  394. if (dst_width & 1) {
  395. dst_ptr[0] = src_ptr[x >> 16];
  396. }
  397. }
  398. void ScaleCols_16_C(uint16_t* dst_ptr,
  399. const uint16_t* src_ptr,
  400. int dst_width,
  401. int x,
  402. int dx) {
  403. int j;
  404. for (j = 0; j < dst_width - 1; j += 2) {
  405. dst_ptr[0] = src_ptr[x >> 16];
  406. x += dx;
  407. dst_ptr[1] = src_ptr[x >> 16];
  408. x += dx;
  409. dst_ptr += 2;
  410. }
  411. if (dst_width & 1) {
  412. dst_ptr[0] = src_ptr[x >> 16];
  413. }
  414. }
  415. // Scales a single row of pixels up by 2x using point sampling.
  416. void ScaleColsUp2_C(uint8_t* dst_ptr,
  417. const uint8_t* src_ptr,
  418. int dst_width,
  419. int x,
  420. int dx) {
  421. int j;
  422. (void)x;
  423. (void)dx;
  424. for (j = 0; j < dst_width - 1; j += 2) {
  425. dst_ptr[1] = dst_ptr[0] = src_ptr[0];
  426. src_ptr += 1;
  427. dst_ptr += 2;
  428. }
  429. if (dst_width & 1) {
  430. dst_ptr[0] = src_ptr[0];
  431. }
  432. }
  433. void ScaleColsUp2_16_C(uint16_t* dst_ptr,
  434. const uint16_t* src_ptr,
  435. int dst_width,
  436. int x,
  437. int dx) {
  438. int j;
  439. (void)x;
  440. (void)dx;
  441. for (j = 0; j < dst_width - 1; j += 2) {
  442. dst_ptr[1] = dst_ptr[0] = src_ptr[0];
  443. src_ptr += 1;
  444. dst_ptr += 2;
  445. }
  446. if (dst_width & 1) {
  447. dst_ptr[0] = src_ptr[0];
  448. }
  449. }
  450. // (1-f)a + fb can be replaced with a + f(b-a)
  451. #if defined(__arm__) || defined(__aarch64__)
  452. #define BLENDER(a, b, f) \
  453. (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
  454. #else
  455. // Intel uses 7 bit math with rounding.
  456. #define BLENDER(a, b, f) \
  457. (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
  458. #endif
  459. void ScaleFilterCols_C(uint8_t* dst_ptr,
  460. const uint8_t* src_ptr,
  461. int dst_width,
  462. int x,
  463. int dx) {
  464. int j;
  465. for (j = 0; j < dst_width - 1; j += 2) {
  466. int xi = x >> 16;
  467. int a = src_ptr[xi];
  468. int b = src_ptr[xi + 1];
  469. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  470. x += dx;
  471. xi = x >> 16;
  472. a = src_ptr[xi];
  473. b = src_ptr[xi + 1];
  474. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  475. x += dx;
  476. dst_ptr += 2;
  477. }
  478. if (dst_width & 1) {
  479. int xi = x >> 16;
  480. int a = src_ptr[xi];
  481. int b = src_ptr[xi + 1];
  482. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  483. }
  484. }
  485. void ScaleFilterCols64_C(uint8_t* dst_ptr,
  486. const uint8_t* src_ptr,
  487. int dst_width,
  488. int x32,
  489. int dx) {
  490. int64_t x = (int64_t)(x32);
  491. int j;
  492. for (j = 0; j < dst_width - 1; j += 2) {
  493. int64_t xi = x >> 16;
  494. int a = src_ptr[xi];
  495. int b = src_ptr[xi + 1];
  496. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  497. x += dx;
  498. xi = x >> 16;
  499. a = src_ptr[xi];
  500. b = src_ptr[xi + 1];
  501. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  502. x += dx;
  503. dst_ptr += 2;
  504. }
  505. if (dst_width & 1) {
  506. int64_t xi = x >> 16;
  507. int a = src_ptr[xi];
  508. int b = src_ptr[xi + 1];
  509. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  510. }
  511. }
  512. #undef BLENDER
  513. // Same as 8 bit arm blender but return is cast to uint16_t
  514. #define BLENDER(a, b, f) \
  515. (uint16_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
  516. void ScaleFilterCols_16_C(uint16_t* dst_ptr,
  517. const uint16_t* src_ptr,
  518. int dst_width,
  519. int x,
  520. int dx) {
  521. int j;
  522. for (j = 0; j < dst_width - 1; j += 2) {
  523. int xi = x >> 16;
  524. int a = src_ptr[xi];
  525. int b = src_ptr[xi + 1];
  526. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  527. x += dx;
  528. xi = x >> 16;
  529. a = src_ptr[xi];
  530. b = src_ptr[xi + 1];
  531. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  532. x += dx;
  533. dst_ptr += 2;
  534. }
  535. if (dst_width & 1) {
  536. int xi = x >> 16;
  537. int a = src_ptr[xi];
  538. int b = src_ptr[xi + 1];
  539. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  540. }
  541. }
  542. void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
  543. const uint16_t* src_ptr,
  544. int dst_width,
  545. int x32,
  546. int dx) {
  547. int64_t x = (int64_t)(x32);
  548. int j;
  549. for (j = 0; j < dst_width - 1; j += 2) {
  550. int64_t xi = x >> 16;
  551. int a = src_ptr[xi];
  552. int b = src_ptr[xi + 1];
  553. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  554. x += dx;
  555. xi = x >> 16;
  556. a = src_ptr[xi];
  557. b = src_ptr[xi + 1];
  558. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  559. x += dx;
  560. dst_ptr += 2;
  561. }
  562. if (dst_width & 1) {
  563. int64_t xi = x >> 16;
  564. int a = src_ptr[xi];
  565. int b = src_ptr[xi + 1];
  566. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  567. }
  568. }
  569. #undef BLENDER
  570. void ScaleRowDown38_C(const uint8_t* src_ptr,
  571. ptrdiff_t src_stride,
  572. uint8_t* dst,
  573. int dst_width) {
  574. int x;
  575. (void)src_stride;
  576. assert(dst_width % 3 == 0);
  577. for (x = 0; x < dst_width; x += 3) {
  578. dst[0] = src_ptr[0];
  579. dst[1] = src_ptr[3];
  580. dst[2] = src_ptr[6];
  581. dst += 3;
  582. src_ptr += 8;
  583. }
  584. }
  585. void ScaleRowDown38_16_C(const uint16_t* src_ptr,
  586. ptrdiff_t src_stride,
  587. uint16_t* dst,
  588. int dst_width) {
  589. int x;
  590. (void)src_stride;
  591. assert(dst_width % 3 == 0);
  592. for (x = 0; x < dst_width; x += 3) {
  593. dst[0] = src_ptr[0];
  594. dst[1] = src_ptr[3];
  595. dst[2] = src_ptr[6];
  596. dst += 3;
  597. src_ptr += 8;
  598. }
  599. }
  600. // 8x3 -> 3x1
  601. void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
  602. ptrdiff_t src_stride,
  603. uint8_t* dst_ptr,
  604. int dst_width) {
  605. intptr_t stride = src_stride;
  606. int i;
  607. assert((dst_width % 3 == 0) && (dst_width > 0));
  608. for (i = 0; i < dst_width; i += 3) {
  609. dst_ptr[0] =
  610. (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  611. src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
  612. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
  613. (65536 / 9) >>
  614. 16;
  615. dst_ptr[1] =
  616. (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  617. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
  618. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
  619. (65536 / 9) >>
  620. 16;
  621. dst_ptr[2] =
  622. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
  623. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
  624. (65536 / 6) >>
  625. 16;
  626. src_ptr += 8;
  627. dst_ptr += 3;
  628. }
  629. }
  630. void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
  631. ptrdiff_t src_stride,
  632. uint16_t* dst_ptr,
  633. int dst_width) {
  634. intptr_t stride = src_stride;
  635. int i;
  636. assert((dst_width % 3 == 0) && (dst_width > 0));
  637. for (i = 0; i < dst_width; i += 3) {
  638. dst_ptr[0] =
  639. (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  640. src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
  641. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
  642. (65536 / 9) >>
  643. 16;
  644. dst_ptr[1] =
  645. (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  646. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
  647. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
  648. (65536 / 9) >>
  649. 16;
  650. dst_ptr[2] =
  651. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
  652. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
  653. (65536 / 6) >>
  654. 16;
  655. src_ptr += 8;
  656. dst_ptr += 3;
  657. }
  658. }
  659. // 8x2 -> 3x1
  660. void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
  661. ptrdiff_t src_stride,
  662. uint8_t* dst_ptr,
  663. int dst_width) {
  664. intptr_t stride = src_stride;
  665. int i;
  666. assert((dst_width % 3 == 0) && (dst_width > 0));
  667. for (i = 0; i < dst_width; i += 3) {
  668. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  669. src_ptr[stride + 1] + src_ptr[stride + 2]) *
  670. (65536 / 6) >>
  671. 16;
  672. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  673. src_ptr[stride + 4] + src_ptr[stride + 5]) *
  674. (65536 / 6) >>
  675. 16;
  676. dst_ptr[2] =
  677. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
  678. (65536 / 4) >>
  679. 16;
  680. src_ptr += 8;
  681. dst_ptr += 3;
  682. }
  683. }
  684. void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
  685. ptrdiff_t src_stride,
  686. uint16_t* dst_ptr,
  687. int dst_width) {
  688. intptr_t stride = src_stride;
  689. int i;
  690. assert((dst_width % 3 == 0) && (dst_width > 0));
  691. for (i = 0; i < dst_width; i += 3) {
  692. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  693. src_ptr[stride + 1] + src_ptr[stride + 2]) *
  694. (65536 / 6) >>
  695. 16;
  696. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  697. src_ptr[stride + 4] + src_ptr[stride + 5]) *
  698. (65536 / 6) >>
  699. 16;
  700. dst_ptr[2] =
  701. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
  702. (65536 / 4) >>
  703. 16;
  704. src_ptr += 8;
  705. dst_ptr += 3;
  706. }
  707. }
  708. void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
  709. int x;
  710. assert(src_width > 0);
  711. for (x = 0; x < src_width - 1; x += 2) {
  712. dst_ptr[0] += src_ptr[0];
  713. dst_ptr[1] += src_ptr[1];
  714. src_ptr += 2;
  715. dst_ptr += 2;
  716. }
  717. if (src_width & 1) {
  718. dst_ptr[0] += src_ptr[0];
  719. }
  720. }
  721. void ScaleAddRow_16_C(const uint16_t* src_ptr,
  722. uint32_t* dst_ptr,
  723. int src_width) {
  724. int x;
  725. assert(src_width > 0);
  726. for (x = 0; x < src_width - 1; x += 2) {
  727. dst_ptr[0] += src_ptr[0];
  728. dst_ptr[1] += src_ptr[1];
  729. src_ptr += 2;
  730. dst_ptr += 2;
  731. }
  732. if (src_width & 1) {
  733. dst_ptr[0] += src_ptr[0];
  734. }
  735. }
  736. void ScaleARGBRowDown2_C(const uint8_t* src_argb,
  737. ptrdiff_t src_stride,
  738. uint8_t* dst_argb,
  739. int dst_width) {
  740. const uint32_t* src = (const uint32_t*)(src_argb);
  741. uint32_t* dst = (uint32_t*)(dst_argb);
  742. int x;
  743. (void)src_stride;
  744. for (x = 0; x < dst_width - 1; x += 2) {
  745. dst[0] = src[1];
  746. dst[1] = src[3];
  747. src += 4;
  748. dst += 2;
  749. }
  750. if (dst_width & 1) {
  751. dst[0] = src[1];
  752. }
  753. }
  754. void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
  755. ptrdiff_t src_stride,
  756. uint8_t* dst_argb,
  757. int dst_width) {
  758. int x;
  759. (void)src_stride;
  760. for (x = 0; x < dst_width; ++x) {
  761. dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
  762. dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
  763. dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
  764. dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
  765. src_argb += 8;
  766. dst_argb += 4;
  767. }
  768. }
  769. void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
  770. ptrdiff_t src_stride,
  771. uint8_t* dst_argb,
  772. int dst_width) {
  773. int x;
  774. for (x = 0; x < dst_width; ++x) {
  775. dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
  776. src_argb[src_stride + 4] + 2) >>
  777. 2;
  778. dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
  779. src_argb[src_stride + 5] + 2) >>
  780. 2;
  781. dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
  782. src_argb[src_stride + 6] + 2) >>
  783. 2;
  784. dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
  785. src_argb[src_stride + 7] + 2) >>
  786. 2;
  787. src_argb += 8;
  788. dst_argb += 4;
  789. }
  790. }
  791. void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
  792. ptrdiff_t src_stride,
  793. int src_stepx,
  794. uint8_t* dst_argb,
  795. int dst_width) {
  796. const uint32_t* src = (const uint32_t*)(src_argb);
  797. uint32_t* dst = (uint32_t*)(dst_argb);
  798. (void)src_stride;
  799. int x;
  800. for (x = 0; x < dst_width - 1; x += 2) {
  801. dst[0] = src[0];
  802. dst[1] = src[src_stepx];
  803. src += src_stepx * 2;
  804. dst += 2;
  805. }
  806. if (dst_width & 1) {
  807. dst[0] = src[0];
  808. }
  809. }
  810. void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
  811. ptrdiff_t src_stride,
  812. int src_stepx,
  813. uint8_t* dst_argb,
  814. int dst_width) {
  815. int x;
  816. for (x = 0; x < dst_width; ++x) {
  817. dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
  818. src_argb[src_stride + 4] + 2) >>
  819. 2;
  820. dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
  821. src_argb[src_stride + 5] + 2) >>
  822. 2;
  823. dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
  824. src_argb[src_stride + 6] + 2) >>
  825. 2;
  826. dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
  827. src_argb[src_stride + 7] + 2) >>
  828. 2;
  829. src_argb += src_stepx * 4;
  830. dst_argb += 4;
  831. }
  832. }
  833. // Scales a single row of pixels using point sampling.
  834. void ScaleARGBCols_C(uint8_t* dst_argb,
  835. const uint8_t* src_argb,
  836. int dst_width,
  837. int x,
  838. int dx) {
  839. const uint32_t* src = (const uint32_t*)(src_argb);
  840. uint32_t* dst = (uint32_t*)(dst_argb);
  841. int j;
  842. for (j = 0; j < dst_width - 1; j += 2) {
  843. dst[0] = src[x >> 16];
  844. x += dx;
  845. dst[1] = src[x >> 16];
  846. x += dx;
  847. dst += 2;
  848. }
  849. if (dst_width & 1) {
  850. dst[0] = src[x >> 16];
  851. }
  852. }
  853. void ScaleARGBCols64_C(uint8_t* dst_argb,
  854. const uint8_t* src_argb,
  855. int dst_width,
  856. int x32,
  857. int dx) {
  858. int64_t x = (int64_t)(x32);
  859. const uint32_t* src = (const uint32_t*)(src_argb);
  860. uint32_t* dst = (uint32_t*)(dst_argb);
  861. int j;
  862. for (j = 0; j < dst_width - 1; j += 2) {
  863. dst[0] = src[x >> 16];
  864. x += dx;
  865. dst[1] = src[x >> 16];
  866. x += dx;
  867. dst += 2;
  868. }
  869. if (dst_width & 1) {
  870. dst[0] = src[x >> 16];
  871. }
  872. }
  873. // Scales a single row of pixels up by 2x using point sampling.
  874. void ScaleARGBColsUp2_C(uint8_t* dst_argb,
  875. const uint8_t* src_argb,
  876. int dst_width,
  877. int x,
  878. int dx) {
  879. const uint32_t* src = (const uint32_t*)(src_argb);
  880. uint32_t* dst = (uint32_t*)(dst_argb);
  881. int j;
  882. (void)x;
  883. (void)dx;
  884. for (j = 0; j < dst_width - 1; j += 2) {
  885. dst[1] = dst[0] = src[0];
  886. src += 1;
  887. dst += 2;
  888. }
  889. if (dst_width & 1) {
  890. dst[0] = src[0];
  891. }
  892. }
  893. // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
  894. // Mimics SSSE3 blender
  895. #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
  896. #define BLENDERC(a, b, f, s) \
  897. (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
  898. #define BLENDER(a, b, f) \
  899. BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
  900. BLENDERC(a, b, f, 0)
  901. void ScaleARGBFilterCols_C(uint8_t* dst_argb,
  902. const uint8_t* src_argb,
  903. int dst_width,
  904. int x,
  905. int dx) {
  906. const uint32_t* src = (const uint32_t*)(src_argb);
  907. uint32_t* dst = (uint32_t*)(dst_argb);
  908. int j;
  909. for (j = 0; j < dst_width - 1; j += 2) {
  910. int xi = x >> 16;
  911. int xf = (x >> 9) & 0x7f;
  912. uint32_t a = src[xi];
  913. uint32_t b = src[xi + 1];
  914. dst[0] = BLENDER(a, b, xf);
  915. x += dx;
  916. xi = x >> 16;
  917. xf = (x >> 9) & 0x7f;
  918. a = src[xi];
  919. b = src[xi + 1];
  920. dst[1] = BLENDER(a, b, xf);
  921. x += dx;
  922. dst += 2;
  923. }
  924. if (dst_width & 1) {
  925. int xi = x >> 16;
  926. int xf = (x >> 9) & 0x7f;
  927. uint32_t a = src[xi];
  928. uint32_t b = src[xi + 1];
  929. dst[0] = BLENDER(a, b, xf);
  930. }
  931. }
  932. void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
  933. const uint8_t* src_argb,
  934. int dst_width,
  935. int x32,
  936. int dx) {
  937. int64_t x = (int64_t)(x32);
  938. const uint32_t* src = (const uint32_t*)(src_argb);
  939. uint32_t* dst = (uint32_t*)(dst_argb);
  940. int j;
  941. for (j = 0; j < dst_width - 1; j += 2) {
  942. int64_t xi = x >> 16;
  943. int xf = (x >> 9) & 0x7f;
  944. uint32_t a = src[xi];
  945. uint32_t b = src[xi + 1];
  946. dst[0] = BLENDER(a, b, xf);
  947. x += dx;
  948. xi = x >> 16;
  949. xf = (x >> 9) & 0x7f;
  950. a = src[xi];
  951. b = src[xi + 1];
  952. dst[1] = BLENDER(a, b, xf);
  953. x += dx;
  954. dst += 2;
  955. }
  956. if (dst_width & 1) {
  957. int64_t xi = x >> 16;
  958. int xf = (x >> 9) & 0x7f;
  959. uint32_t a = src[xi];
  960. uint32_t b = src[xi + 1];
  961. dst[0] = BLENDER(a, b, xf);
  962. }
  963. }
  964. #undef BLENDER1
  965. #undef BLENDERC
  966. #undef BLENDER
  967. // Scale plane vertically with bilinear interpolation.
  968. void ScalePlaneVertical(int src_height,
  969. int dst_width,
  970. int dst_height,
  971. int src_stride,
  972. int dst_stride,
  973. const uint8_t* src_argb,
  974. uint8_t* dst_argb,
  975. int x,
  976. int y,
  977. int dy,
  978. int bpp,
  979. enum FilterMode filtering) {
  980. // TODO(fbarchard): Allow higher bpp.
  981. int dst_width_bytes = dst_width * bpp;
  982. void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
  983. ptrdiff_t src_stride, int dst_width,
  984. int source_y_fraction) = InterpolateRow_C;
  985. const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
  986. int j;
  987. assert(bpp >= 1 && bpp <= 4);
  988. assert(src_height != 0);
  989. assert(dst_width > 0);
  990. assert(dst_height > 0);
  991. src_argb += (x >> 16) * bpp;
  992. #if defined(HAS_INTERPOLATEROW_SSSE3)
  993. if (TestCpuFlag(kCpuHasSSSE3)) {
  994. InterpolateRow = InterpolateRow_Any_SSSE3;
  995. if (IS_ALIGNED(dst_width_bytes, 16)) {
  996. InterpolateRow = InterpolateRow_SSSE3;
  997. }
  998. }
  999. #endif
  1000. #if defined(HAS_INTERPOLATEROW_AVX2)
  1001. if (TestCpuFlag(kCpuHasAVX2)) {
  1002. InterpolateRow = InterpolateRow_Any_AVX2;
  1003. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1004. InterpolateRow = InterpolateRow_AVX2;
  1005. }
  1006. }
  1007. #endif
  1008. #if defined(HAS_INTERPOLATEROW_NEON)
  1009. if (TestCpuFlag(kCpuHasNEON)) {
  1010. InterpolateRow = InterpolateRow_Any_NEON;
  1011. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1012. InterpolateRow = InterpolateRow_NEON;
  1013. }
  1014. }
  1015. #endif
  1016. #if defined(HAS_INTERPOLATEROW_MSA)
  1017. if (TestCpuFlag(kCpuHasMSA)) {
  1018. InterpolateRow = InterpolateRow_Any_MSA;
  1019. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1020. InterpolateRow = InterpolateRow_MSA;
  1021. }
  1022. }
  1023. #endif
  1024. for (j = 0; j < dst_height; ++j) {
  1025. int yi;
  1026. int yf;
  1027. if (y > max_y) {
  1028. y = max_y;
  1029. }
  1030. yi = y >> 16;
  1031. yf = filtering ? ((y >> 8) & 255) : 0;
  1032. InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
  1033. dst_width_bytes, yf);
  1034. dst_argb += dst_stride;
  1035. y += dy;
  1036. }
  1037. }
  1038. void ScalePlaneVertical_16(int src_height,
  1039. int dst_width,
  1040. int dst_height,
  1041. int src_stride,
  1042. int dst_stride,
  1043. const uint16_t* src_argb,
  1044. uint16_t* dst_argb,
  1045. int x,
  1046. int y,
  1047. int dy,
  1048. int wpp,
  1049. enum FilterMode filtering) {
  1050. // TODO(fbarchard): Allow higher wpp.
  1051. int dst_width_words = dst_width * wpp;
  1052. void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
  1053. ptrdiff_t src_stride, int dst_width,
  1054. int source_y_fraction) = InterpolateRow_16_C;
  1055. const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
  1056. int j;
  1057. assert(wpp >= 1 && wpp <= 2);
  1058. assert(src_height != 0);
  1059. assert(dst_width > 0);
  1060. assert(dst_height > 0);
  1061. src_argb += (x >> 16) * wpp;
  1062. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  1063. if (TestCpuFlag(kCpuHasSSE2)) {
  1064. InterpolateRow = InterpolateRow_Any_16_SSE2;
  1065. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1066. InterpolateRow = InterpolateRow_16_SSE2;
  1067. }
  1068. }
  1069. #endif
  1070. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  1071. if (TestCpuFlag(kCpuHasSSSE3)) {
  1072. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  1073. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1074. InterpolateRow = InterpolateRow_16_SSSE3;
  1075. }
  1076. }
  1077. #endif
  1078. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  1079. if (TestCpuFlag(kCpuHasAVX2)) {
  1080. InterpolateRow = InterpolateRow_Any_16_AVX2;
  1081. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1082. InterpolateRow = InterpolateRow_16_AVX2;
  1083. }
  1084. }
  1085. #endif
  1086. #if defined(HAS_INTERPOLATEROW_16_NEON)
  1087. if (TestCpuFlag(kCpuHasNEON)) {
  1088. InterpolateRow = InterpolateRow_Any_16_NEON;
  1089. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1090. InterpolateRow = InterpolateRow_16_NEON;
  1091. }
  1092. }
  1093. #endif
  1094. for (j = 0; j < dst_height; ++j) {
  1095. int yi;
  1096. int yf;
  1097. if (y > max_y) {
  1098. y = max_y;
  1099. }
  1100. yi = y >> 16;
  1101. yf = filtering ? ((y >> 8) & 255) : 0;
  1102. InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
  1103. dst_width_words, yf);
  1104. dst_argb += dst_stride;
  1105. y += dy;
  1106. }
  1107. }
  1108. // Simplify the filtering based on scale factors.
  1109. enum FilterMode ScaleFilterReduce(int src_width,
  1110. int src_height,
  1111. int dst_width,
  1112. int dst_height,
  1113. enum FilterMode filtering) {
  1114. if (src_width < 0) {
  1115. src_width = -src_width;
  1116. }
  1117. if (src_height < 0) {
  1118. src_height = -src_height;
  1119. }
  1120. if (filtering == kFilterBox) {
  1121. // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
  1122. if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
  1123. filtering = kFilterBilinear;
  1124. }
  1125. }
  1126. if (filtering == kFilterBilinear) {
  1127. if (src_height == 1) {
  1128. filtering = kFilterLinear;
  1129. }
  1130. // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
  1131. if (dst_height == src_height || dst_height * 3 == src_height) {
  1132. filtering = kFilterLinear;
  1133. }
  1134. // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
  1135. // avoid reading 2 pixels horizontally that causes memory exception.
  1136. if (src_width == 1) {
  1137. filtering = kFilterNone;
  1138. }
  1139. }
  1140. if (filtering == kFilterLinear) {
  1141. if (src_width == 1) {
  1142. filtering = kFilterNone;
  1143. }
  1144. // TODO(fbarchard): Detect any odd scale factor and reduce to None.
  1145. if (dst_width == src_width || dst_width * 3 == src_width) {
  1146. filtering = kFilterNone;
  1147. }
  1148. }
  1149. return filtering;
  1150. }
  1151. // Divide num by div and return as 16.16 fixed point result.
  1152. int FixedDiv_C(int num, int div) {
  1153. return (int)(((int64_t)(num) << 16) / div);
  1154. }
  1155. // Divide num by div and return as 16.16 fixed point result.
  1156. int FixedDiv1_C(int num, int div) {
  1157. return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
  1158. }
  1159. #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
  1160. // Compute slope values for stepping.
  1161. void ScaleSlope(int src_width,
  1162. int src_height,
  1163. int dst_width,
  1164. int dst_height,
  1165. enum FilterMode filtering,
  1166. int* x,
  1167. int* y,
  1168. int* dx,
  1169. int* dy) {
  1170. assert(x != NULL);
  1171. assert(y != NULL);
  1172. assert(dx != NULL);
  1173. assert(dy != NULL);
  1174. assert(src_width != 0);
  1175. assert(src_height != 0);
  1176. assert(dst_width > 0);
  1177. assert(dst_height > 0);
  1178. // Check for 1 pixel and avoid FixedDiv overflow.
  1179. if (dst_width == 1 && src_width >= 32768) {
  1180. dst_width = src_width;
  1181. }
  1182. if (dst_height == 1 && src_height >= 32768) {
  1183. dst_height = src_height;
  1184. }
  1185. if (filtering == kFilterBox) {
  1186. // Scale step for point sampling duplicates all pixels equally.
  1187. *dx = FixedDiv(Abs(src_width), dst_width);
  1188. *dy = FixedDiv(src_height, dst_height);
  1189. *x = 0;
  1190. *y = 0;
  1191. } else if (filtering == kFilterBilinear) {
  1192. // Scale step for bilinear sampling renders last pixel once for upsample.
  1193. if (dst_width <= Abs(src_width)) {
  1194. *dx = FixedDiv(Abs(src_width), dst_width);
  1195. *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
  1196. } else if (dst_width > 1) {
  1197. *dx = FixedDiv1(Abs(src_width), dst_width);
  1198. *x = 0;
  1199. }
  1200. if (dst_height <= src_height) {
  1201. *dy = FixedDiv(src_height, dst_height);
  1202. *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
  1203. } else if (dst_height > 1) {
  1204. *dy = FixedDiv1(src_height, dst_height);
  1205. *y = 0;
  1206. }
  1207. } else if (filtering == kFilterLinear) {
  1208. // Scale step for bilinear sampling renders last pixel once for upsample.
  1209. if (dst_width <= Abs(src_width)) {
  1210. *dx = FixedDiv(Abs(src_width), dst_width);
  1211. *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
  1212. } else if (dst_width > 1) {
  1213. *dx = FixedDiv1(Abs(src_width), dst_width);
  1214. *x = 0;
  1215. }
  1216. *dy = FixedDiv(src_height, dst_height);
  1217. *y = *dy >> 1;
  1218. } else {
  1219. // Scale step for point sampling duplicates all pixels equally.
  1220. *dx = FixedDiv(Abs(src_width), dst_width);
  1221. *dy = FixedDiv(src_height, dst_height);
  1222. *x = CENTERSTART(*dx, 0);
  1223. *y = CENTERSTART(*dy, 0);
  1224. }
  1225. // Negative src_width means horizontally mirror.
  1226. if (src_width < 0) {
  1227. *x += (dst_width - 1) * *dx;
  1228. *dx = -*dx;
  1229. // src_width = -src_width; // Caller must do this.
  1230. }
  1231. }
  1232. #undef CENTERSTART
  1233. // Read 8x2 upsample with filtering and write 16x1.
  1234. // actually reads an extra pixel, so 9x2.
  1235. void ScaleRowUp2_16_C(const uint16_t* src_ptr,
  1236. ptrdiff_t src_stride,
  1237. uint16_t* dst,
  1238. int dst_width) {
  1239. const uint16_t* src2 = src_ptr + src_stride;
  1240. int x;
  1241. for (x = 0; x < dst_width - 1; x += 2) {
  1242. uint16_t p0 = src_ptr[0];
  1243. uint16_t p1 = src_ptr[1];
  1244. uint16_t p2 = src2[0];
  1245. uint16_t p3 = src2[1];
  1246. dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
  1247. dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
  1248. ++src_ptr;
  1249. ++src2;
  1250. dst += 2;
  1251. }
  1252. if (dst_width & 1) {
  1253. uint16_t p0 = src_ptr[0];
  1254. uint16_t p1 = src_ptr[1];
  1255. uint16_t p2 = src2[0];
  1256. uint16_t p3 = src2[1];
  1257. dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
  1258. }
  1259. }
  1260. #ifdef __cplusplus
  1261. } // extern "C"
  1262. } // namespace libyuv
  1263. #endif