scale_argb.cc 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyARGB
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. // ScaleARGB ARGB, 1/2
  25. // This is an optimized version for scaling down a ARGB to 1/2 of
  26. // its original size.
  27. static void ScaleARGBDown2(int src_width,
  28. int src_height,
  29. int dst_width,
  30. int dst_height,
  31. int src_stride,
  32. int dst_stride,
  33. const uint8_t* src_argb,
  34. uint8_t* dst_argb,
  35. int x,
  36. int dx,
  37. int y,
  38. int dy,
  39. enum FilterMode filtering) {
  40. int j;
  41. int row_stride = src_stride * (dy >> 16);
  42. void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
  43. uint8_t* dst_argb, int dst_width) =
  44. filtering == kFilterNone
  45. ? ScaleARGBRowDown2_C
  46. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
  47. : ScaleARGBRowDown2Box_C);
  48. (void)src_width;
  49. (void)src_height;
  50. (void)dx;
  51. assert(dx == 65536 * 2); // Test scale factor of 2.
  52. assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
  53. // Advance to odd row, even column.
  54. if (filtering == kFilterBilinear) {
  55. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  56. } else {
  57. src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
  58. }
  59. #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
  60. if (TestCpuFlag(kCpuHasSSE2)) {
  61. ScaleARGBRowDown2 =
  62. filtering == kFilterNone
  63. ? ScaleARGBRowDown2_Any_SSE2
  64. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
  65. : ScaleARGBRowDown2Box_Any_SSE2);
  66. if (IS_ALIGNED(dst_width, 4)) {
  67. ScaleARGBRowDown2 =
  68. filtering == kFilterNone
  69. ? ScaleARGBRowDown2_SSE2
  70. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
  71. : ScaleARGBRowDown2Box_SSE2);
  72. }
  73. }
  74. #endif
  75. #if defined(HAS_SCALEARGBROWDOWN2_NEON)
  76. if (TestCpuFlag(kCpuHasNEON)) {
  77. ScaleARGBRowDown2 =
  78. filtering == kFilterNone
  79. ? ScaleARGBRowDown2_Any_NEON
  80. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
  81. : ScaleARGBRowDown2Box_Any_NEON);
  82. if (IS_ALIGNED(dst_width, 8)) {
  83. ScaleARGBRowDown2 =
  84. filtering == kFilterNone
  85. ? ScaleARGBRowDown2_NEON
  86. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
  87. : ScaleARGBRowDown2Box_NEON);
  88. }
  89. }
  90. #endif
  91. #if defined(HAS_SCALEARGBROWDOWN2_MSA)
  92. if (TestCpuFlag(kCpuHasMSA)) {
  93. ScaleARGBRowDown2 =
  94. filtering == kFilterNone
  95. ? ScaleARGBRowDown2_Any_MSA
  96. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
  97. : ScaleARGBRowDown2Box_Any_MSA);
  98. if (IS_ALIGNED(dst_width, 4)) {
  99. ScaleARGBRowDown2 =
  100. filtering == kFilterNone
  101. ? ScaleARGBRowDown2_MSA
  102. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
  103. : ScaleARGBRowDown2Box_MSA);
  104. }
  105. }
  106. #endif
  107. if (filtering == kFilterLinear) {
  108. src_stride = 0;
  109. }
  110. for (j = 0; j < dst_height; ++j) {
  111. ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
  112. src_argb += row_stride;
  113. dst_argb += dst_stride;
  114. }
  115. }
  116. // ScaleARGB ARGB, 1/4
  117. // This is an optimized version for scaling down a ARGB to 1/4 of
  118. // its original size.
  119. static void ScaleARGBDown4Box(int src_width,
  120. int src_height,
  121. int dst_width,
  122. int dst_height,
  123. int src_stride,
  124. int dst_stride,
  125. const uint8_t* src_argb,
  126. uint8_t* dst_argb,
  127. int x,
  128. int dx,
  129. int y,
  130. int dy) {
  131. int j;
  132. // Allocate 2 rows of ARGB.
  133. const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
  134. align_buffer_64(row, kRowSize * 2);
  135. int row_stride = src_stride * (dy >> 16);
  136. void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
  137. uint8_t* dst_argb, int dst_width) =
  138. ScaleARGBRowDown2Box_C;
  139. // Advance to odd row, even column.
  140. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  141. (void)src_width;
  142. (void)src_height;
  143. (void)dx;
  144. assert(dx == 65536 * 4); // Test scale factor of 4.
  145. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
  146. #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
  147. if (TestCpuFlag(kCpuHasSSE2)) {
  148. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
  149. if (IS_ALIGNED(dst_width, 4)) {
  150. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
  151. }
  152. }
  153. #endif
  154. #if defined(HAS_SCALEARGBROWDOWN2_NEON)
  155. if (TestCpuFlag(kCpuHasNEON)) {
  156. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
  157. if (IS_ALIGNED(dst_width, 8)) {
  158. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
  159. }
  160. }
  161. #endif
  162. for (j = 0; j < dst_height; ++j) {
  163. ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
  164. ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
  165. dst_width * 2);
  166. ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
  167. src_argb += row_stride;
  168. dst_argb += dst_stride;
  169. }
  170. free_aligned_buffer_64(row);
  171. }
  172. // ScaleARGB ARGB Even
  173. // This is an optimized version for scaling down a ARGB to even
  174. // multiple of its original size.
  175. static void ScaleARGBDownEven(int src_width,
  176. int src_height,
  177. int dst_width,
  178. int dst_height,
  179. int src_stride,
  180. int dst_stride,
  181. const uint8_t* src_argb,
  182. uint8_t* dst_argb,
  183. int x,
  184. int dx,
  185. int y,
  186. int dy,
  187. enum FilterMode filtering) {
  188. int j;
  189. int col_step = dx >> 16;
  190. int row_stride = (dy >> 16) * src_stride;
  191. void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
  192. int src_step, uint8_t* dst_argb, int dst_width) =
  193. filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
  194. (void)src_width;
  195. (void)src_height;
  196. assert(IS_ALIGNED(src_width, 2));
  197. assert(IS_ALIGNED(src_height, 2));
  198. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  199. #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
  200. if (TestCpuFlag(kCpuHasSSE2)) {
  201. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
  202. : ScaleARGBRowDownEven_Any_SSE2;
  203. if (IS_ALIGNED(dst_width, 4)) {
  204. ScaleARGBRowDownEven =
  205. filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
  206. }
  207. }
  208. #endif
  209. #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
  210. if (TestCpuFlag(kCpuHasNEON)) {
  211. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
  212. : ScaleARGBRowDownEven_Any_NEON;
  213. if (IS_ALIGNED(dst_width, 4)) {
  214. ScaleARGBRowDownEven =
  215. filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
  216. }
  217. }
  218. #endif
  219. #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
  220. if (TestCpuFlag(kCpuHasMSA)) {
  221. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
  222. : ScaleARGBRowDownEven_Any_MSA;
  223. if (IS_ALIGNED(dst_width, 4)) {
  224. ScaleARGBRowDownEven =
  225. filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
  226. }
  227. }
  228. #endif
  229. if (filtering == kFilterLinear) {
  230. src_stride = 0;
  231. }
  232. for (j = 0; j < dst_height; ++j) {
  233. ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
  234. src_argb += row_stride;
  235. dst_argb += dst_stride;
  236. }
  237. }
  238. // Scale ARGB down with bilinear interpolation.
  239. static void ScaleARGBBilinearDown(int src_width,
  240. int src_height,
  241. int dst_width,
  242. int dst_height,
  243. int src_stride,
  244. int dst_stride,
  245. const uint8_t* src_argb,
  246. uint8_t* dst_argb,
  247. int x,
  248. int dx,
  249. int y,
  250. int dy,
  251. enum FilterMode filtering) {
  252. int j;
  253. void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
  254. ptrdiff_t src_stride, int dst_width,
  255. int source_y_fraction) = InterpolateRow_C;
  256. void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
  257. int dst_width, int x, int dx) =
  258. (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
  259. int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
  260. int64_t xl = (dx >= 0) ? x : xlast;
  261. int64_t xr = (dx >= 0) ? xlast : x;
  262. int clip_src_width;
  263. xl = (xl >> 16) & ~3; // Left edge aligned.
  264. xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
  265. xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
  266. if (xr > src_width) {
  267. xr = src_width;
  268. }
  269. clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
  270. src_argb += xl * 4;
  271. x -= (int)(xl << 16);
  272. #if defined(HAS_INTERPOLATEROW_SSSE3)
  273. if (TestCpuFlag(kCpuHasSSSE3)) {
  274. InterpolateRow = InterpolateRow_Any_SSSE3;
  275. if (IS_ALIGNED(clip_src_width, 16)) {
  276. InterpolateRow = InterpolateRow_SSSE3;
  277. }
  278. }
  279. #endif
  280. #if defined(HAS_INTERPOLATEROW_AVX2)
  281. if (TestCpuFlag(kCpuHasAVX2)) {
  282. InterpolateRow = InterpolateRow_Any_AVX2;
  283. if (IS_ALIGNED(clip_src_width, 32)) {
  284. InterpolateRow = InterpolateRow_AVX2;
  285. }
  286. }
  287. #endif
  288. #if defined(HAS_INTERPOLATEROW_NEON)
  289. if (TestCpuFlag(kCpuHasNEON)) {
  290. InterpolateRow = InterpolateRow_Any_NEON;
  291. if (IS_ALIGNED(clip_src_width, 16)) {
  292. InterpolateRow = InterpolateRow_NEON;
  293. }
  294. }
  295. #endif
  296. #if defined(HAS_INTERPOLATEROW_MSA)
  297. if (TestCpuFlag(kCpuHasMSA)) {
  298. InterpolateRow = InterpolateRow_Any_MSA;
  299. if (IS_ALIGNED(clip_src_width, 32)) {
  300. InterpolateRow = InterpolateRow_MSA;
  301. }
  302. }
  303. #endif
  304. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  305. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  306. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  307. }
  308. #endif
  309. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  310. if (TestCpuFlag(kCpuHasNEON)) {
  311. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  312. if (IS_ALIGNED(dst_width, 4)) {
  313. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  314. }
  315. }
  316. #endif
  317. #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
  318. if (TestCpuFlag(kCpuHasMSA)) {
  319. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
  320. if (IS_ALIGNED(dst_width, 8)) {
  321. ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
  322. }
  323. }
  324. #endif
  325. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  326. // Allocate a row of ARGB.
  327. {
  328. align_buffer_64(row, clip_src_width * 4);
  329. const int max_y = (src_height - 1) << 16;
  330. if (y > max_y) {
  331. y = max_y;
  332. }
  333. for (j = 0; j < dst_height; ++j) {
  334. int yi = y >> 16;
  335. const uint8_t* src = src_argb + yi * src_stride;
  336. if (filtering == kFilterLinear) {
  337. ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
  338. } else {
  339. int yf = (y >> 8) & 255;
  340. InterpolateRow(row, src, src_stride, clip_src_width, yf);
  341. ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
  342. }
  343. dst_argb += dst_stride;
  344. y += dy;
  345. if (y > max_y) {
  346. y = max_y;
  347. }
  348. }
  349. free_aligned_buffer_64(row);
  350. }
  351. }
  352. // Scale ARGB up with bilinear interpolation.
  353. static void ScaleARGBBilinearUp(int src_width,
  354. int src_height,
  355. int dst_width,
  356. int dst_height,
  357. int src_stride,
  358. int dst_stride,
  359. const uint8_t* src_argb,
  360. uint8_t* dst_argb,
  361. int x,
  362. int dx,
  363. int y,
  364. int dy,
  365. enum FilterMode filtering) {
  366. int j;
  367. void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
  368. ptrdiff_t src_stride, int dst_width,
  369. int source_y_fraction) = InterpolateRow_C;
  370. void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
  371. int dst_width, int x, int dx) =
  372. filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
  373. const int max_y = (src_height - 1) << 16;
  374. #if defined(HAS_INTERPOLATEROW_SSSE3)
  375. if (TestCpuFlag(kCpuHasSSSE3)) {
  376. InterpolateRow = InterpolateRow_Any_SSSE3;
  377. if (IS_ALIGNED(dst_width, 4)) {
  378. InterpolateRow = InterpolateRow_SSSE3;
  379. }
  380. }
  381. #endif
  382. #if defined(HAS_INTERPOLATEROW_AVX2)
  383. if (TestCpuFlag(kCpuHasAVX2)) {
  384. InterpolateRow = InterpolateRow_Any_AVX2;
  385. if (IS_ALIGNED(dst_width, 8)) {
  386. InterpolateRow = InterpolateRow_AVX2;
  387. }
  388. }
  389. #endif
  390. #if defined(HAS_INTERPOLATEROW_NEON)
  391. if (TestCpuFlag(kCpuHasNEON)) {
  392. InterpolateRow = InterpolateRow_Any_NEON;
  393. if (IS_ALIGNED(dst_width, 4)) {
  394. InterpolateRow = InterpolateRow_NEON;
  395. }
  396. }
  397. #endif
  398. #if defined(HAS_INTERPOLATEROW_MSA)
  399. if (TestCpuFlag(kCpuHasMSA)) {
  400. InterpolateRow = InterpolateRow_Any_MSA;
  401. if (IS_ALIGNED(dst_width, 8)) {
  402. InterpolateRow = InterpolateRow_MSA;
  403. }
  404. }
  405. #endif
  406. if (src_width >= 32768) {
  407. ScaleARGBFilterCols =
  408. filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
  409. }
  410. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  411. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  412. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  413. }
  414. #endif
  415. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  416. if (filtering && TestCpuFlag(kCpuHasNEON)) {
  417. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  418. if (IS_ALIGNED(dst_width, 4)) {
  419. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  420. }
  421. }
  422. #endif
  423. #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
  424. if (filtering && TestCpuFlag(kCpuHasMSA)) {
  425. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
  426. if (IS_ALIGNED(dst_width, 8)) {
  427. ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
  428. }
  429. }
  430. #endif
  431. #if defined(HAS_SCALEARGBCOLS_SSE2)
  432. if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  433. ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  434. }
  435. #endif
  436. #if defined(HAS_SCALEARGBCOLS_NEON)
  437. if (!filtering && TestCpuFlag(kCpuHasNEON)) {
  438. ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
  439. if (IS_ALIGNED(dst_width, 8)) {
  440. ScaleARGBFilterCols = ScaleARGBCols_NEON;
  441. }
  442. }
  443. #endif
  444. #if defined(HAS_SCALEARGBCOLS_MSA)
  445. if (!filtering && TestCpuFlag(kCpuHasMSA)) {
  446. ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
  447. if (IS_ALIGNED(dst_width, 4)) {
  448. ScaleARGBFilterCols = ScaleARGBCols_MSA;
  449. }
  450. }
  451. #endif
  452. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  453. ScaleARGBFilterCols = ScaleARGBColsUp2_C;
  454. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  455. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  456. ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
  457. }
  458. #endif
  459. }
  460. if (y > max_y) {
  461. y = max_y;
  462. }
  463. {
  464. int yi = y >> 16;
  465. const uint8_t* src = src_argb + yi * src_stride;
  466. // Allocate 2 rows of ARGB.
  467. const int kRowSize = (dst_width * 4 + 31) & ~31;
  468. align_buffer_64(row, kRowSize * 2);
  469. uint8_t* rowptr = row;
  470. int rowstride = kRowSize;
  471. int lasty = yi;
  472. ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
  473. if (src_height > 1) {
  474. src += src_stride;
  475. }
  476. ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  477. src += src_stride;
  478. for (j = 0; j < dst_height; ++j) {
  479. yi = y >> 16;
  480. if (yi != lasty) {
  481. if (y > max_y) {
  482. y = max_y;
  483. yi = y >> 16;
  484. src = src_argb + yi * src_stride;
  485. }
  486. if (yi != lasty) {
  487. ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
  488. rowptr += rowstride;
  489. rowstride = -rowstride;
  490. lasty = yi;
  491. src += src_stride;
  492. }
  493. }
  494. if (filtering == kFilterLinear) {
  495. InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
  496. } else {
  497. int yf = (y >> 8) & 255;
  498. InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
  499. }
  500. dst_argb += dst_stride;
  501. y += dy;
  502. }
  503. free_aligned_buffer_64(row);
  504. }
  505. }
  506. #ifdef YUVSCALEUP
  507. // Scale YUV to ARGB up with bilinear interpolation.
  508. static void ScaleYUVToARGBBilinearUp(int src_width,
  509. int src_height,
  510. int dst_width,
  511. int dst_height,
  512. int src_stride_y,
  513. int src_stride_u,
  514. int src_stride_v,
  515. int dst_stride_argb,
  516. const uint8_t* src_y,
  517. const uint8_t* src_u,
  518. const uint8_t* src_v,
  519. uint8_t* dst_argb,
  520. int x,
  521. int dx,
  522. int y,
  523. int dy,
  524. enum FilterMode filtering) {
  525. int j;
  526. void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
  527. const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
  528. I422ToARGBRow_C;
  529. #if defined(HAS_I422TOARGBROW_SSSE3)
  530. if (TestCpuFlag(kCpuHasSSSE3)) {
  531. I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
  532. if (IS_ALIGNED(src_width, 8)) {
  533. I422ToARGBRow = I422ToARGBRow_SSSE3;
  534. }
  535. }
  536. #endif
  537. #if defined(HAS_I422TOARGBROW_AVX2)
  538. if (TestCpuFlag(kCpuHasAVX2)) {
  539. I422ToARGBRow = I422ToARGBRow_Any_AVX2;
  540. if (IS_ALIGNED(src_width, 16)) {
  541. I422ToARGBRow = I422ToARGBRow_AVX2;
  542. }
  543. }
  544. #endif
  545. #if defined(HAS_I422TOARGBROW_NEON)
  546. if (TestCpuFlag(kCpuHasNEON)) {
  547. I422ToARGBRow = I422ToARGBRow_Any_NEON;
  548. if (IS_ALIGNED(src_width, 8)) {
  549. I422ToARGBRow = I422ToARGBRow_NEON;
  550. }
  551. }
  552. #endif
  553. #if defined(HAS_I422TOARGBROW_MSA)
  554. if (TestCpuFlag(kCpuHasMSA)) {
  555. I422ToARGBRow = I422ToARGBRow_Any_MSA;
  556. if (IS_ALIGNED(src_width, 8)) {
  557. I422ToARGBRow = I422ToARGBRow_MSA;
  558. }
  559. }
  560. #endif
  561. void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
  562. ptrdiff_t src_stride, int dst_width,
  563. int source_y_fraction) = InterpolateRow_C;
  564. #if defined(HAS_INTERPOLATEROW_SSSE3)
  565. if (TestCpuFlag(kCpuHasSSSE3)) {
  566. InterpolateRow = InterpolateRow_Any_SSSE3;
  567. if (IS_ALIGNED(dst_width, 4)) {
  568. InterpolateRow = InterpolateRow_SSSE3;
  569. }
  570. }
  571. #endif
  572. #if defined(HAS_INTERPOLATEROW_AVX2)
  573. if (TestCpuFlag(kCpuHasAVX2)) {
  574. InterpolateRow = InterpolateRow_Any_AVX2;
  575. if (IS_ALIGNED(dst_width, 8)) {
  576. InterpolateRow = InterpolateRow_AVX2;
  577. }
  578. }
  579. #endif
  580. #if defined(HAS_INTERPOLATEROW_NEON)
  581. if (TestCpuFlag(kCpuHasNEON)) {
  582. InterpolateRow = InterpolateRow_Any_NEON;
  583. if (IS_ALIGNED(dst_width, 4)) {
  584. InterpolateRow = InterpolateRow_NEON;
  585. }
  586. }
  587. #endif
  588. #if defined(HAS_INTERPOLATEROW_MSA)
  589. if (TestCpuFlag(kCpuHasMSA)) {
  590. InterpolateRow = InterpolateRow_Any_MSA;
  591. if (IS_ALIGNED(dst_width, 8)) {
  592. InterpolateRow = InterpolateRow_MSA;
  593. }
  594. }
  595. #endif
  596. void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
  597. int dst_width, int x, int dx) =
  598. filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
  599. if (src_width >= 32768) {
  600. ScaleARGBFilterCols =
  601. filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
  602. }
  603. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  604. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  605. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  606. }
  607. #endif
  608. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  609. if (filtering && TestCpuFlag(kCpuHasNEON)) {
  610. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  611. if (IS_ALIGNED(dst_width, 4)) {
  612. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  613. }
  614. }
  615. #endif
  616. #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
  617. if (filtering && TestCpuFlag(kCpuHasMSA)) {
  618. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
  619. if (IS_ALIGNED(dst_width, 8)) {
  620. ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
  621. }
  622. }
  623. #endif
  624. #if defined(HAS_SCALEARGBCOLS_SSE2)
  625. if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  626. ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  627. }
  628. #endif
  629. #if defined(HAS_SCALEARGBCOLS_NEON)
  630. if (!filtering && TestCpuFlag(kCpuHasNEON)) {
  631. ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
  632. if (IS_ALIGNED(dst_width, 8)) {
  633. ScaleARGBFilterCols = ScaleARGBCols_NEON;
  634. }
  635. }
  636. #endif
  637. #if defined(HAS_SCALEARGBCOLS_MSA)
  638. if (!filtering && TestCpuFlag(kCpuHasMSA)) {
  639. ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
  640. if (IS_ALIGNED(dst_width, 4)) {
  641. ScaleARGBFilterCols = ScaleARGBCols_MSA;
  642. }
  643. }
  644. #endif
  645. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  646. ScaleARGBFilterCols = ScaleARGBColsUp2_C;
  647. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  648. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  649. ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
  650. }
  651. #endif
  652. }
  653. const int max_y = (src_height - 1) << 16;
  654. if (y > max_y) {
  655. y = max_y;
  656. }
  657. const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
  658. int yi = y >> 16;
  659. int uv_yi = yi >> kYShift;
  660. const uint8_t* src_row_y = src_y + yi * src_stride_y;
  661. const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
  662. const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
  663. // Allocate 2 rows of ARGB.
  664. const int kRowSize = (dst_width * 4 + 31) & ~31;
  665. align_buffer_64(row, kRowSize * 2);
  666. // Allocate 1 row of ARGB for source conversion.
  667. align_buffer_64(argb_row, src_width * 4);
  668. uint8_t* rowptr = row;
  669. int rowstride = kRowSize;
  670. int lasty = yi;
  671. // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
  672. ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
  673. if (src_height > 1) {
  674. src_row_y += src_stride_y;
  675. if (yi & 1) {
  676. src_row_u += src_stride_u;
  677. src_row_v += src_stride_v;
  678. }
  679. }
  680. ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
  681. if (src_height > 2) {
  682. src_row_y += src_stride_y;
  683. if (!(yi & 1)) {
  684. src_row_u += src_stride_u;
  685. src_row_v += src_stride_v;
  686. }
  687. }
  688. for (j = 0; j < dst_height; ++j) {
  689. yi = y >> 16;
  690. if (yi != lasty) {
  691. if (y > max_y) {
  692. y = max_y;
  693. yi = y >> 16;
  694. uv_yi = yi >> kYShift;
  695. src_row_y = src_y + yi * src_stride_y;
  696. src_row_u = src_u + uv_yi * src_stride_u;
  697. src_row_v = src_v + uv_yi * src_stride_v;
  698. }
  699. if (yi != lasty) {
  700. // TODO(fbarchard): Convert the clipped region of row.
  701. I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
  702. ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
  703. rowptr += rowstride;
  704. rowstride = -rowstride;
  705. lasty = yi;
  706. src_row_y += src_stride_y;
  707. if (yi & 1) {
  708. src_row_u += src_stride_u;
  709. src_row_v += src_stride_v;
  710. }
  711. }
  712. }
  713. if (filtering == kFilterLinear) {
  714. InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
  715. } else {
  716. int yf = (y >> 8) & 255;
  717. InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
  718. }
  719. dst_argb += dst_stride_argb;
  720. y += dy;
  721. }
  722. free_aligned_buffer_64(row);
  723. free_aligned_buffer_64(row_argb);
  724. }
  725. #endif
  726. // Scale ARGB to/from any dimensions, without interpolation.
  727. // Fixed point math is used for performance: The upper 16 bits
  728. // of x and dx is the integer part of the source position and
  729. // the lower 16 bits are the fixed decimal part.
  730. static void ScaleARGBSimple(int src_width,
  731. int src_height,
  732. int dst_width,
  733. int dst_height,
  734. int src_stride,
  735. int dst_stride,
  736. const uint8_t* src_argb,
  737. uint8_t* dst_argb,
  738. int x,
  739. int dx,
  740. int y,
  741. int dy) {
  742. int j;
  743. void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
  744. int dst_width, int x, int dx) =
  745. (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
  746. (void)src_height;
  747. #if defined(HAS_SCALEARGBCOLS_SSE2)
  748. if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  749. ScaleARGBCols = ScaleARGBCols_SSE2;
  750. }
  751. #endif
  752. #if defined(HAS_SCALEARGBCOLS_NEON)
  753. if (TestCpuFlag(kCpuHasNEON)) {
  754. ScaleARGBCols = ScaleARGBCols_Any_NEON;
  755. if (IS_ALIGNED(dst_width, 8)) {
  756. ScaleARGBCols = ScaleARGBCols_NEON;
  757. }
  758. }
  759. #endif
  760. #if defined(HAS_SCALEARGBCOLS_MSA)
  761. if (TestCpuFlag(kCpuHasMSA)) {
  762. ScaleARGBCols = ScaleARGBCols_Any_MSA;
  763. if (IS_ALIGNED(dst_width, 4)) {
  764. ScaleARGBCols = ScaleARGBCols_MSA;
  765. }
  766. }
  767. #endif
  768. if (src_width * 2 == dst_width && x < 0x8000) {
  769. ScaleARGBCols = ScaleARGBColsUp2_C;
  770. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  771. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  772. ScaleARGBCols = ScaleARGBColsUp2_SSE2;
  773. }
  774. #endif
  775. }
  776. for (j = 0; j < dst_height; ++j) {
  777. ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
  778. dx);
  779. dst_argb += dst_stride;
  780. y += dy;
  781. }
  782. }
  783. // ScaleARGB a ARGB.
  784. // This function in turn calls a scaling function
  785. // suitable for handling the desired resolutions.
  786. static void ScaleARGB(const uint8_t* src,
  787. int src_stride,
  788. int src_width,
  789. int src_height,
  790. uint8_t* dst,
  791. int dst_stride,
  792. int dst_width,
  793. int dst_height,
  794. int clip_x,
  795. int clip_y,
  796. int clip_width,
  797. int clip_height,
  798. enum FilterMode filtering) {
  799. // Initial source x/y coordinate and step values as 16.16 fixed point.
  800. int x = 0;
  801. int y = 0;
  802. int dx = 0;
  803. int dy = 0;
  804. // ARGB does not support box filter yet, but allow the user to pass it.
  805. // Simplify filtering when possible.
  806. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
  807. filtering);
  808. // Negative src_height means invert the image.
  809. if (src_height < 0) {
  810. src_height = -src_height;
  811. src = src + (src_height - 1) * src_stride;
  812. src_stride = -src_stride;
  813. }
  814. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  815. &dx, &dy);
  816. src_width = Abs(src_width);
  817. if (clip_x) {
  818. int64_t clipf = (int64_t)(clip_x)*dx;
  819. x += (clipf & 0xffff);
  820. src += (clipf >> 16) * 4;
  821. dst += clip_x * 4;
  822. }
  823. if (clip_y) {
  824. int64_t clipf = (int64_t)(clip_y)*dy;
  825. y += (clipf & 0xffff);
  826. src += (clipf >> 16) * src_stride;
  827. dst += clip_y * dst_stride;
  828. }
  829. // Special case for integer step values.
  830. if (((dx | dy) & 0xffff) == 0) {
  831. if (!dx || !dy) { // 1 pixel wide and/or tall.
  832. filtering = kFilterNone;
  833. } else {
  834. // Optimized even scale down. ie 2, 4, 6, 8, 10x.
  835. if (!(dx & 0x10000) && !(dy & 0x10000)) {
  836. if (dx == 0x20000) {
  837. // Optimized 1/2 downsample.
  838. ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
  839. src_stride, dst_stride, src, dst, x, dx, y, dy,
  840. filtering);
  841. return;
  842. }
  843. if (dx == 0x40000 && filtering == kFilterBox) {
  844. // Optimized 1/4 box downsample.
  845. ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
  846. src_stride, dst_stride, src, dst, x, dx, y, dy);
  847. return;
  848. }
  849. ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
  850. src_stride, dst_stride, src, dst, x, dx, y, dy,
  851. filtering);
  852. return;
  853. }
  854. // Optimized odd scale down. ie 3, 5, 7, 9x.
  855. if ((dx & 0x10000) && (dy & 0x10000)) {
  856. filtering = kFilterNone;
  857. if (dx == 0x10000 && dy == 0x10000) {
  858. // Straight copy.
  859. ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
  860. dst, dst_stride, clip_width, clip_height);
  861. return;
  862. }
  863. }
  864. }
  865. }
  866. if (dx == 0x10000 && (x & 0xffff) == 0) {
  867. // Arbitrary scale vertically, but unscaled vertically.
  868. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
  869. dst_stride, src, dst, x, y, dy, 4, filtering);
  870. return;
  871. }
  872. if (filtering && dy < 65536) {
  873. ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
  874. src_stride, dst_stride, src, dst, x, dx, y, dy,
  875. filtering);
  876. return;
  877. }
  878. if (filtering) {
  879. ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
  880. src_stride, dst_stride, src, dst, x, dx, y, dy,
  881. filtering);
  882. return;
  883. }
  884. ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
  885. dst_stride, src, dst, x, dx, y, dy);
  886. }
  887. LIBYUV_API
  888. int ARGBScaleClip(const uint8_t* src_argb,
  889. int src_stride_argb,
  890. int src_width,
  891. int src_height,
  892. uint8_t* dst_argb,
  893. int dst_stride_argb,
  894. int dst_width,
  895. int dst_height,
  896. int clip_x,
  897. int clip_y,
  898. int clip_width,
  899. int clip_height,
  900. enum FilterMode filtering) {
  901. if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
  902. dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
  903. clip_width > 32768 || clip_height > 32768 ||
  904. (clip_x + clip_width) > dst_width ||
  905. (clip_y + clip_height) > dst_height) {
  906. return -1;
  907. }
  908. ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
  909. dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
  910. clip_height, filtering);
  911. return 0;
  912. }
  913. // Scale an ARGB image.
  914. LIBYUV_API
  915. int ARGBScale(const uint8_t* src_argb,
  916. int src_stride_argb,
  917. int src_width,
  918. int src_height,
  919. uint8_t* dst_argb,
  920. int dst_stride_argb,
  921. int dst_width,
  922. int dst_height,
  923. enum FilterMode filtering) {
  924. if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
  925. src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
  926. return -1;
  927. }
  928. ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
  929. dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
  930. filtering);
  931. return 0;
  932. }
  933. // Scale with YUV conversion to ARGB and clipping.
  934. LIBYUV_API
  935. int YUVToARGBScaleClip(const uint8_t* src_y,
  936. int src_stride_y,
  937. const uint8_t* src_u,
  938. int src_stride_u,
  939. const uint8_t* src_v,
  940. int src_stride_v,
  941. uint32_t src_fourcc,
  942. int src_width,
  943. int src_height,
  944. uint8_t* dst_argb,
  945. int dst_stride_argb,
  946. uint32_t dst_fourcc,
  947. int dst_width,
  948. int dst_height,
  949. int clip_x,
  950. int clip_y,
  951. int clip_width,
  952. int clip_height,
  953. enum FilterMode filtering) {
  954. uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
  955. int r;
  956. (void)src_fourcc; // TODO(fbarchard): implement and/or assert.
  957. (void)dst_fourcc;
  958. I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
  959. argb_buffer, src_width * 4, src_width, src_height);
  960. r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
  961. dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
  962. clip_width, clip_height, filtering);
  963. free(argb_buffer);
  964. return r;
  965. }
  966. #ifdef __cplusplus
  967. } // extern "C"
  968. } // namespace libyuv
  969. #endif