scale.cc 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyPlane
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
  25. // Scale plane, 1/2
  26. // This is an optimized version for scaling down a plane to 1/2 of
  27. // its original size.
  28. static void ScalePlaneDown2(int src_width,
  29. int src_height,
  30. int dst_width,
  31. int dst_height,
  32. int src_stride,
  33. int dst_stride,
  34. const uint8_t* src_ptr,
  35. uint8_t* dst_ptr,
  36. enum FilterMode filtering) {
  37. int y;
  38. void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  39. uint8_t* dst_ptr, int dst_width) =
  40. filtering == kFilterNone
  41. ? ScaleRowDown2_C
  42. : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
  43. : ScaleRowDown2Box_C);
  44. int row_stride = src_stride << 1;
  45. (void)src_width;
  46. (void)src_height;
  47. if (!filtering) {
  48. src_ptr += src_stride; // Point to odd rows.
  49. src_stride = 0;
  50. }
  51. #if defined(HAS_SCALEROWDOWN2_NEON)
  52. if (TestCpuFlag(kCpuHasNEON)) {
  53. ScaleRowDown2 =
  54. filtering == kFilterNone
  55. ? ScaleRowDown2_Any_NEON
  56. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
  57. : ScaleRowDown2Box_Any_NEON);
  58. if (IS_ALIGNED(dst_width, 16)) {
  59. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
  60. : (filtering == kFilterLinear
  61. ? ScaleRowDown2Linear_NEON
  62. : ScaleRowDown2Box_NEON);
  63. }
  64. }
  65. #endif
  66. #if defined(HAS_SCALEROWDOWN2_SSSE3)
  67. if (TestCpuFlag(kCpuHasSSSE3)) {
  68. ScaleRowDown2 =
  69. filtering == kFilterNone
  70. ? ScaleRowDown2_Any_SSSE3
  71. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
  72. : ScaleRowDown2Box_Any_SSSE3);
  73. if (IS_ALIGNED(dst_width, 16)) {
  74. ScaleRowDown2 =
  75. filtering == kFilterNone
  76. ? ScaleRowDown2_SSSE3
  77. : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
  78. : ScaleRowDown2Box_SSSE3);
  79. }
  80. }
  81. #endif
  82. #if defined(HAS_SCALEROWDOWN2_AVX2)
  83. if (TestCpuFlag(kCpuHasAVX2)) {
  84. ScaleRowDown2 =
  85. filtering == kFilterNone
  86. ? ScaleRowDown2_Any_AVX2
  87. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
  88. : ScaleRowDown2Box_Any_AVX2);
  89. if (IS_ALIGNED(dst_width, 32)) {
  90. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
  91. : (filtering == kFilterLinear
  92. ? ScaleRowDown2Linear_AVX2
  93. : ScaleRowDown2Box_AVX2);
  94. }
  95. }
  96. #endif
  97. #if defined(HAS_SCALEROWDOWN2_MSA)
  98. if (TestCpuFlag(kCpuHasMSA)) {
  99. ScaleRowDown2 =
  100. filtering == kFilterNone
  101. ? ScaleRowDown2_Any_MSA
  102. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
  103. : ScaleRowDown2Box_Any_MSA);
  104. if (IS_ALIGNED(dst_width, 32)) {
  105. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
  106. : (filtering == kFilterLinear
  107. ? ScaleRowDown2Linear_MSA
  108. : ScaleRowDown2Box_MSA);
  109. }
  110. }
  111. #endif
  112. if (filtering == kFilterLinear) {
  113. src_stride = 0;
  114. }
  115. // TODO(fbarchard): Loop through source height to allow odd height.
  116. for (y = 0; y < dst_height; ++y) {
  117. ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
  118. src_ptr += row_stride;
  119. dst_ptr += dst_stride;
  120. }
  121. }
  122. static void ScalePlaneDown2_16(int src_width,
  123. int src_height,
  124. int dst_width,
  125. int dst_height,
  126. int src_stride,
  127. int dst_stride,
  128. const uint16_t* src_ptr,
  129. uint16_t* dst_ptr,
  130. enum FilterMode filtering) {
  131. int y;
  132. void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  133. uint16_t* dst_ptr, int dst_width) =
  134. filtering == kFilterNone
  135. ? ScaleRowDown2_16_C
  136. : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
  137. : ScaleRowDown2Box_16_C);
  138. int row_stride = src_stride << 1;
  139. (void)src_width;
  140. (void)src_height;
  141. if (!filtering) {
  142. src_ptr += src_stride; // Point to odd rows.
  143. src_stride = 0;
  144. }
  145. #if defined(HAS_SCALEROWDOWN2_16_NEON)
  146. if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
  147. ScaleRowDown2 =
  148. filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
  149. }
  150. #endif
  151. #if defined(HAS_SCALEROWDOWN2_16_SSE2)
  152. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
  153. ScaleRowDown2 =
  154. filtering == kFilterNone
  155. ? ScaleRowDown2_16_SSE2
  156. : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
  157. : ScaleRowDown2Box_16_SSE2);
  158. }
  159. #endif
  160. if (filtering == kFilterLinear) {
  161. src_stride = 0;
  162. }
  163. // TODO(fbarchard): Loop through source height to allow odd height.
  164. for (y = 0; y < dst_height; ++y) {
  165. ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
  166. src_ptr += row_stride;
  167. dst_ptr += dst_stride;
  168. }
  169. }
  170. // Scale plane, 1/4
  171. // This is an optimized version for scaling down a plane to 1/4 of
  172. // its original size.
  173. static void ScalePlaneDown4(int src_width,
  174. int src_height,
  175. int dst_width,
  176. int dst_height,
  177. int src_stride,
  178. int dst_stride,
  179. const uint8_t* src_ptr,
  180. uint8_t* dst_ptr,
  181. enum FilterMode filtering) {
  182. int y;
  183. void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  184. uint8_t* dst_ptr, int dst_width) =
  185. filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
  186. int row_stride = src_stride << 2;
  187. (void)src_width;
  188. (void)src_height;
  189. if (!filtering) {
  190. src_ptr += src_stride * 2; // Point to row 2.
  191. src_stride = 0;
  192. }
  193. #if defined(HAS_SCALEROWDOWN4_NEON)
  194. if (TestCpuFlag(kCpuHasNEON)) {
  195. ScaleRowDown4 =
  196. filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
  197. if (IS_ALIGNED(dst_width, 8)) {
  198. ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
  199. }
  200. }
  201. #endif
  202. #if defined(HAS_SCALEROWDOWN4_SSSE3)
  203. if (TestCpuFlag(kCpuHasSSSE3)) {
  204. ScaleRowDown4 =
  205. filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
  206. if (IS_ALIGNED(dst_width, 8)) {
  207. ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
  208. }
  209. }
  210. #endif
  211. #if defined(HAS_SCALEROWDOWN4_AVX2)
  212. if (TestCpuFlag(kCpuHasAVX2)) {
  213. ScaleRowDown4 =
  214. filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
  215. if (IS_ALIGNED(dst_width, 16)) {
  216. ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
  217. }
  218. }
  219. #endif
  220. #if defined(HAS_SCALEROWDOWN4_MSA)
  221. if (TestCpuFlag(kCpuHasMSA)) {
  222. ScaleRowDown4 =
  223. filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
  224. if (IS_ALIGNED(dst_width, 16)) {
  225. ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
  226. }
  227. }
  228. #endif
  229. if (filtering == kFilterLinear) {
  230. src_stride = 0;
  231. }
  232. for (y = 0; y < dst_height; ++y) {
  233. ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
  234. src_ptr += row_stride;
  235. dst_ptr += dst_stride;
  236. }
  237. }
  238. static void ScalePlaneDown4_16(int src_width,
  239. int src_height,
  240. int dst_width,
  241. int dst_height,
  242. int src_stride,
  243. int dst_stride,
  244. const uint16_t* src_ptr,
  245. uint16_t* dst_ptr,
  246. enum FilterMode filtering) {
  247. int y;
  248. void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  249. uint16_t* dst_ptr, int dst_width) =
  250. filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
  251. int row_stride = src_stride << 2;
  252. (void)src_width;
  253. (void)src_height;
  254. if (!filtering) {
  255. src_ptr += src_stride * 2; // Point to row 2.
  256. src_stride = 0;
  257. }
  258. #if defined(HAS_SCALEROWDOWN4_16_NEON)
  259. if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
  260. ScaleRowDown4 =
  261. filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
  262. }
  263. #endif
  264. #if defined(HAS_SCALEROWDOWN4_16_SSE2)
  265. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  266. ScaleRowDown4 =
  267. filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
  268. }
  269. #endif
  270. if (filtering == kFilterLinear) {
  271. src_stride = 0;
  272. }
  273. for (y = 0; y < dst_height; ++y) {
  274. ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
  275. src_ptr += row_stride;
  276. dst_ptr += dst_stride;
  277. }
  278. }
  279. // Scale plane down, 3/4
  280. static void ScalePlaneDown34(int src_width,
  281. int src_height,
  282. int dst_width,
  283. int dst_height,
  284. int src_stride,
  285. int dst_stride,
  286. const uint8_t* src_ptr,
  287. uint8_t* dst_ptr,
  288. enum FilterMode filtering) {
  289. int y;
  290. void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  291. uint8_t* dst_ptr, int dst_width);
  292. void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  293. uint8_t* dst_ptr, int dst_width);
  294. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  295. (void)src_width;
  296. (void)src_height;
  297. assert(dst_width % 3 == 0);
  298. if (!filtering) {
  299. ScaleRowDown34_0 = ScaleRowDown34_C;
  300. ScaleRowDown34_1 = ScaleRowDown34_C;
  301. } else {
  302. ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
  303. ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
  304. }
  305. #if defined(HAS_SCALEROWDOWN34_NEON)
  306. if (TestCpuFlag(kCpuHasNEON)) {
  307. if (!filtering) {
  308. ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
  309. ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
  310. } else {
  311. ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
  312. ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
  313. }
  314. if (dst_width % 24 == 0) {
  315. if (!filtering) {
  316. ScaleRowDown34_0 = ScaleRowDown34_NEON;
  317. ScaleRowDown34_1 = ScaleRowDown34_NEON;
  318. } else {
  319. ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
  320. ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
  321. }
  322. }
  323. }
  324. #endif
  325. #if defined(HAS_SCALEROWDOWN34_MSA)
  326. if (TestCpuFlag(kCpuHasMSA)) {
  327. if (!filtering) {
  328. ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
  329. ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
  330. } else {
  331. ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
  332. ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
  333. }
  334. if (dst_width % 48 == 0) {
  335. if (!filtering) {
  336. ScaleRowDown34_0 = ScaleRowDown34_MSA;
  337. ScaleRowDown34_1 = ScaleRowDown34_MSA;
  338. } else {
  339. ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
  340. ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
  341. }
  342. }
  343. }
  344. #endif
  345. #if defined(HAS_SCALEROWDOWN34_SSSE3)
  346. if (TestCpuFlag(kCpuHasSSSE3)) {
  347. if (!filtering) {
  348. ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
  349. ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
  350. } else {
  351. ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
  352. ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
  353. }
  354. if (dst_width % 24 == 0) {
  355. if (!filtering) {
  356. ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
  357. ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
  358. } else {
  359. ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
  360. ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
  361. }
  362. }
  363. }
  364. #endif
  365. for (y = 0; y < dst_height - 2; y += 3) {
  366. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  367. src_ptr += src_stride;
  368. dst_ptr += dst_stride;
  369. ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
  370. src_ptr += src_stride;
  371. dst_ptr += dst_stride;
  372. ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
  373. src_ptr += src_stride * 2;
  374. dst_ptr += dst_stride;
  375. }
  376. // Remainder 1 or 2 rows with last row vertically unfiltered
  377. if ((dst_height % 3) == 2) {
  378. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  379. src_ptr += src_stride;
  380. dst_ptr += dst_stride;
  381. ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
  382. } else if ((dst_height % 3) == 1) {
  383. ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
  384. }
  385. }
  386. static void ScalePlaneDown34_16(int src_width,
  387. int src_height,
  388. int dst_width,
  389. int dst_height,
  390. int src_stride,
  391. int dst_stride,
  392. const uint16_t* src_ptr,
  393. uint16_t* dst_ptr,
  394. enum FilterMode filtering) {
  395. int y;
  396. void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  397. uint16_t* dst_ptr, int dst_width);
  398. void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  399. uint16_t* dst_ptr, int dst_width);
  400. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  401. (void)src_width;
  402. (void)src_height;
  403. assert(dst_width % 3 == 0);
  404. if (!filtering) {
  405. ScaleRowDown34_0 = ScaleRowDown34_16_C;
  406. ScaleRowDown34_1 = ScaleRowDown34_16_C;
  407. } else {
  408. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
  409. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
  410. }
  411. #if defined(HAS_SCALEROWDOWN34_16_NEON)
  412. if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
  413. if (!filtering) {
  414. ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
  415. ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
  416. } else {
  417. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
  418. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
  419. }
  420. }
  421. #endif
  422. #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
  423. if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
  424. if (!filtering) {
  425. ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
  426. ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
  427. } else {
  428. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
  429. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
  430. }
  431. }
  432. #endif
  433. for (y = 0; y < dst_height - 2; y += 3) {
  434. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  435. src_ptr += src_stride;
  436. dst_ptr += dst_stride;
  437. ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
  438. src_ptr += src_stride;
  439. dst_ptr += dst_stride;
  440. ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
  441. src_ptr += src_stride * 2;
  442. dst_ptr += dst_stride;
  443. }
  444. // Remainder 1 or 2 rows with last row vertically unfiltered
  445. if ((dst_height % 3) == 2) {
  446. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  447. src_ptr += src_stride;
  448. dst_ptr += dst_stride;
  449. ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
  450. } else if ((dst_height % 3) == 1) {
  451. ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
  452. }
  453. }
  454. // Scale plane, 3/8
  455. // This is an optimized version for scaling down a plane to 3/8
  456. // of its original size.
  457. //
  458. // Uses box filter arranges like this
  459. // aaabbbcc -> abc
  460. // aaabbbcc def
  461. // aaabbbcc ghi
  462. // dddeeeff
  463. // dddeeeff
  464. // dddeeeff
  465. // ggghhhii
  466. // ggghhhii
  467. // Boxes are 3x3, 2x3, 3x2 and 2x2
  468. static void ScalePlaneDown38(int src_width,
  469. int src_height,
  470. int dst_width,
  471. int dst_height,
  472. int src_stride,
  473. int dst_stride,
  474. const uint8_t* src_ptr,
  475. uint8_t* dst_ptr,
  476. enum FilterMode filtering) {
  477. int y;
  478. void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  479. uint8_t* dst_ptr, int dst_width);
  480. void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  481. uint8_t* dst_ptr, int dst_width);
  482. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  483. assert(dst_width % 3 == 0);
  484. (void)src_width;
  485. (void)src_height;
  486. if (!filtering) {
  487. ScaleRowDown38_3 = ScaleRowDown38_C;
  488. ScaleRowDown38_2 = ScaleRowDown38_C;
  489. } else {
  490. ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
  491. ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
  492. }
  493. #if defined(HAS_SCALEROWDOWN38_NEON)
  494. if (TestCpuFlag(kCpuHasNEON)) {
  495. if (!filtering) {
  496. ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
  497. ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
  498. } else {
  499. ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
  500. ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
  501. }
  502. if (dst_width % 12 == 0) {
  503. if (!filtering) {
  504. ScaleRowDown38_3 = ScaleRowDown38_NEON;
  505. ScaleRowDown38_2 = ScaleRowDown38_NEON;
  506. } else {
  507. ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
  508. ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
  509. }
  510. }
  511. }
  512. #endif
  513. #if defined(HAS_SCALEROWDOWN38_SSSE3)
  514. if (TestCpuFlag(kCpuHasSSSE3)) {
  515. if (!filtering) {
  516. ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
  517. ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
  518. } else {
  519. ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
  520. ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
  521. }
  522. if (dst_width % 12 == 0 && !filtering) {
  523. ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
  524. ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
  525. }
  526. if (dst_width % 6 == 0 && filtering) {
  527. ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
  528. ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
  529. }
  530. }
  531. #endif
  532. #if defined(HAS_SCALEROWDOWN38_MSA)
  533. if (TestCpuFlag(kCpuHasMSA)) {
  534. if (!filtering) {
  535. ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
  536. ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
  537. } else {
  538. ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
  539. ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
  540. }
  541. if (dst_width % 12 == 0) {
  542. if (!filtering) {
  543. ScaleRowDown38_3 = ScaleRowDown38_MSA;
  544. ScaleRowDown38_2 = ScaleRowDown38_MSA;
  545. } else {
  546. ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
  547. ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
  548. }
  549. }
  550. }
  551. #endif
  552. for (y = 0; y < dst_height - 2; y += 3) {
  553. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  554. src_ptr += src_stride * 3;
  555. dst_ptr += dst_stride;
  556. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  557. src_ptr += src_stride * 3;
  558. dst_ptr += dst_stride;
  559. ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
  560. src_ptr += src_stride * 2;
  561. dst_ptr += dst_stride;
  562. }
  563. // Remainder 1 or 2 rows with last row vertically unfiltered
  564. if ((dst_height % 3) == 2) {
  565. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  566. src_ptr += src_stride * 3;
  567. dst_ptr += dst_stride;
  568. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  569. } else if ((dst_height % 3) == 1) {
  570. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  571. }
  572. }
  573. static void ScalePlaneDown38_16(int src_width,
  574. int src_height,
  575. int dst_width,
  576. int dst_height,
  577. int src_stride,
  578. int dst_stride,
  579. const uint16_t* src_ptr,
  580. uint16_t* dst_ptr,
  581. enum FilterMode filtering) {
  582. int y;
  583. void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  584. uint16_t* dst_ptr, int dst_width);
  585. void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  586. uint16_t* dst_ptr, int dst_width);
  587. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  588. (void)src_width;
  589. (void)src_height;
  590. assert(dst_width % 3 == 0);
  591. if (!filtering) {
  592. ScaleRowDown38_3 = ScaleRowDown38_16_C;
  593. ScaleRowDown38_2 = ScaleRowDown38_16_C;
  594. } else {
  595. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
  596. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
  597. }
  598. #if defined(HAS_SCALEROWDOWN38_16_NEON)
  599. if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
  600. if (!filtering) {
  601. ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
  602. ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
  603. } else {
  604. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
  605. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
  606. }
  607. }
  608. #endif
  609. #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
  610. if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
  611. if (!filtering) {
  612. ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
  613. ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
  614. } else {
  615. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
  616. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
  617. }
  618. }
  619. #endif
  620. for (y = 0; y < dst_height - 2; y += 3) {
  621. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  622. src_ptr += src_stride * 3;
  623. dst_ptr += dst_stride;
  624. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  625. src_ptr += src_stride * 3;
  626. dst_ptr += dst_stride;
  627. ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
  628. src_ptr += src_stride * 2;
  629. dst_ptr += dst_stride;
  630. }
  631. // Remainder 1 or 2 rows with last row vertically unfiltered
  632. if ((dst_height % 3) == 2) {
  633. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  634. src_ptr += src_stride * 3;
  635. dst_ptr += dst_stride;
  636. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  637. } else if ((dst_height % 3) == 1) {
  638. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  639. }
  640. }
  641. #define MIN1(x) ((x) < 1 ? 1 : (x))
  642. static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
  643. uint32_t sum = 0u;
  644. int x;
  645. assert(iboxwidth > 0);
  646. for (x = 0; x < iboxwidth; ++x) {
  647. sum += src_ptr[x];
  648. }
  649. return sum;
  650. }
  651. static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
  652. uint32_t sum = 0u;
  653. int x;
  654. assert(iboxwidth > 0);
  655. for (x = 0; x < iboxwidth; ++x) {
  656. sum += src_ptr[x];
  657. }
  658. return sum;
  659. }
  660. static void ScaleAddCols2_C(int dst_width,
  661. int boxheight,
  662. int x,
  663. int dx,
  664. const uint16_t* src_ptr,
  665. uint8_t* dst_ptr) {
  666. int i;
  667. int scaletbl[2];
  668. int minboxwidth = dx >> 16;
  669. int boxwidth;
  670. scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
  671. scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
  672. for (i = 0; i < dst_width; ++i) {
  673. int ix = x >> 16;
  674. x += dx;
  675. boxwidth = MIN1((x >> 16) - ix);
  676. *dst_ptr++ =
  677. SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
  678. 16;
  679. }
  680. }
  681. static void ScaleAddCols2_16_C(int dst_width,
  682. int boxheight,
  683. int x,
  684. int dx,
  685. const uint32_t* src_ptr,
  686. uint16_t* dst_ptr) {
  687. int i;
  688. int scaletbl[2];
  689. int minboxwidth = dx >> 16;
  690. int boxwidth;
  691. scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
  692. scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
  693. for (i = 0; i < dst_width; ++i) {
  694. int ix = x >> 16;
  695. x += dx;
  696. boxwidth = MIN1((x >> 16) - ix);
  697. *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
  698. scaletbl[boxwidth - minboxwidth] >>
  699. 16;
  700. }
  701. }
  702. static void ScaleAddCols0_C(int dst_width,
  703. int boxheight,
  704. int x,
  705. int dx,
  706. const uint16_t* src_ptr,
  707. uint8_t* dst_ptr) {
  708. int scaleval = 65536 / boxheight;
  709. int i;
  710. (void)dx;
  711. src_ptr += (x >> 16);
  712. for (i = 0; i < dst_width; ++i) {
  713. *dst_ptr++ = src_ptr[i] * scaleval >> 16;
  714. }
  715. }
  716. static void ScaleAddCols1_C(int dst_width,
  717. int boxheight,
  718. int x,
  719. int dx,
  720. const uint16_t* src_ptr,
  721. uint8_t* dst_ptr) {
  722. int boxwidth = MIN1(dx >> 16);
  723. int scaleval = 65536 / (boxwidth * boxheight);
  724. int i;
  725. x >>= 16;
  726. for (i = 0; i < dst_width; ++i) {
  727. *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
  728. x += boxwidth;
  729. }
  730. }
  731. static void ScaleAddCols1_16_C(int dst_width,
  732. int boxheight,
  733. int x,
  734. int dx,
  735. const uint32_t* src_ptr,
  736. uint16_t* dst_ptr) {
  737. int boxwidth = MIN1(dx >> 16);
  738. int scaleval = 65536 / (boxwidth * boxheight);
  739. int i;
  740. for (i = 0; i < dst_width; ++i) {
  741. *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
  742. x += boxwidth;
  743. }
  744. }
  745. // Scale plane down to any dimensions, with interpolation.
  746. // (boxfilter).
  747. //
  748. // Same method as SimpleScale, which is fixed point, outputting
  749. // one pixel of destination using fixed point (16.16) to step
  750. // through source, sampling a box of pixel with simple
  751. // averaging.
  752. static void ScalePlaneBox(int src_width,
  753. int src_height,
  754. int dst_width,
  755. int dst_height,
  756. int src_stride,
  757. int dst_stride,
  758. const uint8_t* src_ptr,
  759. uint8_t* dst_ptr) {
  760. int j, k;
  761. // Initial source x/y coordinate and step values as 16.16 fixed point.
  762. int x = 0;
  763. int y = 0;
  764. int dx = 0;
  765. int dy = 0;
  766. const int max_y = (src_height << 16);
  767. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
  768. &dx, &dy);
  769. src_width = Abs(src_width);
  770. {
  771. // Allocate a row buffer of uint16_t.
  772. align_buffer_64(row16, src_width * 2);
  773. void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
  774. const uint16_t* src_ptr, uint8_t* dst_ptr) =
  775. (dx & 0xffff) ? ScaleAddCols2_C
  776. : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
  777. void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
  778. int src_width) = ScaleAddRow_C;
  779. #if defined(HAS_SCALEADDROW_SSE2)
  780. if (TestCpuFlag(kCpuHasSSE2)) {
  781. ScaleAddRow = ScaleAddRow_Any_SSE2;
  782. if (IS_ALIGNED(src_width, 16)) {
  783. ScaleAddRow = ScaleAddRow_SSE2;
  784. }
  785. }
  786. #endif
  787. #if defined(HAS_SCALEADDROW_AVX2)
  788. if (TestCpuFlag(kCpuHasAVX2)) {
  789. ScaleAddRow = ScaleAddRow_Any_AVX2;
  790. if (IS_ALIGNED(src_width, 32)) {
  791. ScaleAddRow = ScaleAddRow_AVX2;
  792. }
  793. }
  794. #endif
  795. #if defined(HAS_SCALEADDROW_NEON)
  796. if (TestCpuFlag(kCpuHasNEON)) {
  797. ScaleAddRow = ScaleAddRow_Any_NEON;
  798. if (IS_ALIGNED(src_width, 16)) {
  799. ScaleAddRow = ScaleAddRow_NEON;
  800. }
  801. }
  802. #endif
  803. #if defined(HAS_SCALEADDROW_MSA)
  804. if (TestCpuFlag(kCpuHasMSA)) {
  805. ScaleAddRow = ScaleAddRow_Any_MSA;
  806. if (IS_ALIGNED(src_width, 16)) {
  807. ScaleAddRow = ScaleAddRow_MSA;
  808. }
  809. }
  810. #endif
  811. for (j = 0; j < dst_height; ++j) {
  812. int boxheight;
  813. int iy = y >> 16;
  814. const uint8_t* src = src_ptr + iy * src_stride;
  815. y += dy;
  816. if (y > max_y) {
  817. y = max_y;
  818. }
  819. boxheight = MIN1((y >> 16) - iy);
  820. memset(row16, 0, src_width * 2);
  821. for (k = 0; k < boxheight; ++k) {
  822. ScaleAddRow(src, (uint16_t*)(row16), src_width);
  823. src += src_stride;
  824. }
  825. ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
  826. dst_ptr += dst_stride;
  827. }
  828. free_aligned_buffer_64(row16);
  829. }
  830. }
  831. static void ScalePlaneBox_16(int src_width,
  832. int src_height,
  833. int dst_width,
  834. int dst_height,
  835. int src_stride,
  836. int dst_stride,
  837. const uint16_t* src_ptr,
  838. uint16_t* dst_ptr) {
  839. int j, k;
  840. // Initial source x/y coordinate and step values as 16.16 fixed point.
  841. int x = 0;
  842. int y = 0;
  843. int dx = 0;
  844. int dy = 0;
  845. const int max_y = (src_height << 16);
  846. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
  847. &dx, &dy);
  848. src_width = Abs(src_width);
  849. {
  850. // Allocate a row buffer of uint32_t.
  851. align_buffer_64(row32, src_width * 4);
  852. void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
  853. const uint32_t* src_ptr, uint16_t* dst_ptr) =
  854. (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
  855. void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
  856. int src_width) = ScaleAddRow_16_C;
  857. #if defined(HAS_SCALEADDROW_16_SSE2)
  858. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
  859. ScaleAddRow = ScaleAddRow_16_SSE2;
  860. }
  861. #endif
  862. for (j = 0; j < dst_height; ++j) {
  863. int boxheight;
  864. int iy = y >> 16;
  865. const uint16_t* src = src_ptr + iy * src_stride;
  866. y += dy;
  867. if (y > max_y) {
  868. y = max_y;
  869. }
  870. boxheight = MIN1((y >> 16) - iy);
  871. memset(row32, 0, src_width * 4);
  872. for (k = 0; k < boxheight; ++k) {
  873. ScaleAddRow(src, (uint32_t*)(row32), src_width);
  874. src += src_stride;
  875. }
  876. ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
  877. dst_ptr += dst_stride;
  878. }
  879. free_aligned_buffer_64(row32);
  880. }
  881. }
  882. // Scale plane down with bilinear interpolation.
  883. void ScalePlaneBilinearDown(int src_width,
  884. int src_height,
  885. int dst_width,
  886. int dst_height,
  887. int src_stride,
  888. int dst_stride,
  889. const uint8_t* src_ptr,
  890. uint8_t* dst_ptr,
  891. enum FilterMode filtering) {
  892. // Initial source x/y coordinate and step values as 16.16 fixed point.
  893. int x = 0;
  894. int y = 0;
  895. int dx = 0;
  896. int dy = 0;
  897. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  898. // Allocate a row buffer.
  899. align_buffer_64(row, src_width);
  900. const int max_y = (src_height - 1) << 16;
  901. int j;
  902. void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
  903. int dst_width, int x, int dx) =
  904. (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
  905. void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
  906. ptrdiff_t src_stride, int dst_width,
  907. int source_y_fraction) = InterpolateRow_C;
  908. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  909. &dx, &dy);
  910. src_width = Abs(src_width);
  911. #if defined(HAS_INTERPOLATEROW_SSSE3)
  912. if (TestCpuFlag(kCpuHasSSSE3)) {
  913. InterpolateRow = InterpolateRow_Any_SSSE3;
  914. if (IS_ALIGNED(src_width, 16)) {
  915. InterpolateRow = InterpolateRow_SSSE3;
  916. }
  917. }
  918. #endif
  919. #if defined(HAS_INTERPOLATEROW_AVX2)
  920. if (TestCpuFlag(kCpuHasAVX2)) {
  921. InterpolateRow = InterpolateRow_Any_AVX2;
  922. if (IS_ALIGNED(src_width, 32)) {
  923. InterpolateRow = InterpolateRow_AVX2;
  924. }
  925. }
  926. #endif
  927. #if defined(HAS_INTERPOLATEROW_NEON)
  928. if (TestCpuFlag(kCpuHasNEON)) {
  929. InterpolateRow = InterpolateRow_Any_NEON;
  930. if (IS_ALIGNED(src_width, 16)) {
  931. InterpolateRow = InterpolateRow_NEON;
  932. }
  933. }
  934. #endif
  935. #if defined(HAS_INTERPOLATEROW_MSA)
  936. if (TestCpuFlag(kCpuHasMSA)) {
  937. InterpolateRow = InterpolateRow_Any_MSA;
  938. if (IS_ALIGNED(src_width, 32)) {
  939. InterpolateRow = InterpolateRow_MSA;
  940. }
  941. }
  942. #endif
  943. #if defined(HAS_SCALEFILTERCOLS_SSSE3)
  944. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  945. ScaleFilterCols = ScaleFilterCols_SSSE3;
  946. }
  947. #endif
  948. #if defined(HAS_SCALEFILTERCOLS_NEON)
  949. if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
  950. ScaleFilterCols = ScaleFilterCols_Any_NEON;
  951. if (IS_ALIGNED(dst_width, 8)) {
  952. ScaleFilterCols = ScaleFilterCols_NEON;
  953. }
  954. }
  955. #endif
  956. #if defined(HAS_SCALEFILTERCOLS_MSA)
  957. if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
  958. ScaleFilterCols = ScaleFilterCols_Any_MSA;
  959. if (IS_ALIGNED(dst_width, 16)) {
  960. ScaleFilterCols = ScaleFilterCols_MSA;
  961. }
  962. }
  963. #endif
  964. if (y > max_y) {
  965. y = max_y;
  966. }
  967. for (j = 0; j < dst_height; ++j) {
  968. int yi = y >> 16;
  969. const uint8_t* src = src_ptr + yi * src_stride;
  970. if (filtering == kFilterLinear) {
  971. ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
  972. } else {
  973. int yf = (y >> 8) & 255;
  974. InterpolateRow(row, src, src_stride, src_width, yf);
  975. ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
  976. }
  977. dst_ptr += dst_stride;
  978. y += dy;
  979. if (y > max_y) {
  980. y = max_y;
  981. }
  982. }
  983. free_aligned_buffer_64(row);
  984. }
  985. void ScalePlaneBilinearDown_16(int src_width,
  986. int src_height,
  987. int dst_width,
  988. int dst_height,
  989. int src_stride,
  990. int dst_stride,
  991. const uint16_t* src_ptr,
  992. uint16_t* dst_ptr,
  993. enum FilterMode filtering) {
  994. // Initial source x/y coordinate and step values as 16.16 fixed point.
  995. int x = 0;
  996. int y = 0;
  997. int dx = 0;
  998. int dy = 0;
  999. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  1000. // Allocate a row buffer.
  1001. align_buffer_64(row, src_width * 2);
  1002. const int max_y = (src_height - 1) << 16;
  1003. int j;
  1004. void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
  1005. int dst_width, int x, int dx) =
  1006. (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
  1007. void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
  1008. ptrdiff_t src_stride, int dst_width,
  1009. int source_y_fraction) = InterpolateRow_16_C;
  1010. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  1011. &dx, &dy);
  1012. src_width = Abs(src_width);
  1013. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  1014. if (TestCpuFlag(kCpuHasSSE2)) {
  1015. InterpolateRow = InterpolateRow_Any_16_SSE2;
  1016. if (IS_ALIGNED(src_width, 16)) {
  1017. InterpolateRow = InterpolateRow_16_SSE2;
  1018. }
  1019. }
  1020. #endif
  1021. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  1022. if (TestCpuFlag(kCpuHasSSSE3)) {
  1023. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  1024. if (IS_ALIGNED(src_width, 16)) {
  1025. InterpolateRow = InterpolateRow_16_SSSE3;
  1026. }
  1027. }
  1028. #endif
  1029. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  1030. if (TestCpuFlag(kCpuHasAVX2)) {
  1031. InterpolateRow = InterpolateRow_Any_16_AVX2;
  1032. if (IS_ALIGNED(src_width, 32)) {
  1033. InterpolateRow = InterpolateRow_16_AVX2;
  1034. }
  1035. }
  1036. #endif
  1037. #if defined(HAS_INTERPOLATEROW_16_NEON)
  1038. if (TestCpuFlag(kCpuHasNEON)) {
  1039. InterpolateRow = InterpolateRow_Any_16_NEON;
  1040. if (IS_ALIGNED(src_width, 16)) {
  1041. InterpolateRow = InterpolateRow_16_NEON;
  1042. }
  1043. }
  1044. #endif
  1045. #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
  1046. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1047. ScaleFilterCols = ScaleFilterCols_16_SSSE3;
  1048. }
  1049. #endif
  1050. if (y > max_y) {
  1051. y = max_y;
  1052. }
  1053. for (j = 0; j < dst_height; ++j) {
  1054. int yi = y >> 16;
  1055. const uint16_t* src = src_ptr + yi * src_stride;
  1056. if (filtering == kFilterLinear) {
  1057. ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
  1058. } else {
  1059. int yf = (y >> 8) & 255;
  1060. InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
  1061. ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
  1062. }
  1063. dst_ptr += dst_stride;
  1064. y += dy;
  1065. if (y > max_y) {
  1066. y = max_y;
  1067. }
  1068. }
  1069. free_aligned_buffer_64(row);
  1070. }
  1071. // Scale up down with bilinear interpolation.
  1072. void ScalePlaneBilinearUp(int src_width,
  1073. int src_height,
  1074. int dst_width,
  1075. int dst_height,
  1076. int src_stride,
  1077. int dst_stride,
  1078. const uint8_t* src_ptr,
  1079. uint8_t* dst_ptr,
  1080. enum FilterMode filtering) {
  1081. int j;
  1082. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1083. int x = 0;
  1084. int y = 0;
  1085. int dx = 0;
  1086. int dy = 0;
  1087. const int max_y = (src_height - 1) << 16;
  1088. void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
  1089. ptrdiff_t src_stride, int dst_width,
  1090. int source_y_fraction) = InterpolateRow_C;
  1091. void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
  1092. int dst_width, int x, int dx) =
  1093. filtering ? ScaleFilterCols_C : ScaleCols_C;
  1094. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  1095. &dx, &dy);
  1096. src_width = Abs(src_width);
  1097. #if defined(HAS_INTERPOLATEROW_SSSE3)
  1098. if (TestCpuFlag(kCpuHasSSSE3)) {
  1099. InterpolateRow = InterpolateRow_Any_SSSE3;
  1100. if (IS_ALIGNED(dst_width, 16)) {
  1101. InterpolateRow = InterpolateRow_SSSE3;
  1102. }
  1103. }
  1104. #endif
  1105. #if defined(HAS_INTERPOLATEROW_AVX2)
  1106. if (TestCpuFlag(kCpuHasAVX2)) {
  1107. InterpolateRow = InterpolateRow_Any_AVX2;
  1108. if (IS_ALIGNED(dst_width, 32)) {
  1109. InterpolateRow = InterpolateRow_AVX2;
  1110. }
  1111. }
  1112. #endif
  1113. #if defined(HAS_INTERPOLATEROW_NEON)
  1114. if (TestCpuFlag(kCpuHasNEON)) {
  1115. InterpolateRow = InterpolateRow_Any_NEON;
  1116. if (IS_ALIGNED(dst_width, 16)) {
  1117. InterpolateRow = InterpolateRow_NEON;
  1118. }
  1119. }
  1120. #endif
  1121. if (filtering && src_width >= 32768) {
  1122. ScaleFilterCols = ScaleFilterCols64_C;
  1123. }
  1124. #if defined(HAS_SCALEFILTERCOLS_SSSE3)
  1125. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1126. ScaleFilterCols = ScaleFilterCols_SSSE3;
  1127. }
  1128. #endif
  1129. #if defined(HAS_SCALEFILTERCOLS_NEON)
  1130. if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
  1131. ScaleFilterCols = ScaleFilterCols_Any_NEON;
  1132. if (IS_ALIGNED(dst_width, 8)) {
  1133. ScaleFilterCols = ScaleFilterCols_NEON;
  1134. }
  1135. }
  1136. #endif
  1137. #if defined(HAS_SCALEFILTERCOLS_MSA)
  1138. if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
  1139. ScaleFilterCols = ScaleFilterCols_Any_MSA;
  1140. if (IS_ALIGNED(dst_width, 16)) {
  1141. ScaleFilterCols = ScaleFilterCols_MSA;
  1142. }
  1143. }
  1144. #endif
  1145. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  1146. ScaleFilterCols = ScaleColsUp2_C;
  1147. #if defined(HAS_SCALECOLS_SSE2)
  1148. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1149. ScaleFilterCols = ScaleColsUp2_SSE2;
  1150. }
  1151. #endif
  1152. }
  1153. if (y > max_y) {
  1154. y = max_y;
  1155. }
  1156. {
  1157. int yi = y >> 16;
  1158. const uint8_t* src = src_ptr + yi * src_stride;
  1159. // Allocate 2 row buffers.
  1160. const int kRowSize = (dst_width + 31) & ~31;
  1161. align_buffer_64(row, kRowSize * 2);
  1162. uint8_t* rowptr = row;
  1163. int rowstride = kRowSize;
  1164. int lasty = yi;
  1165. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1166. if (src_height > 1) {
  1167. src += src_stride;
  1168. }
  1169. ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  1170. src += src_stride;
  1171. for (j = 0; j < dst_height; ++j) {
  1172. yi = y >> 16;
  1173. if (yi != lasty) {
  1174. if (y > max_y) {
  1175. y = max_y;
  1176. yi = y >> 16;
  1177. src = src_ptr + yi * src_stride;
  1178. }
  1179. if (yi != lasty) {
  1180. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1181. rowptr += rowstride;
  1182. rowstride = -rowstride;
  1183. lasty = yi;
  1184. src += src_stride;
  1185. }
  1186. }
  1187. if (filtering == kFilterLinear) {
  1188. InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
  1189. } else {
  1190. int yf = (y >> 8) & 255;
  1191. InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
  1192. }
  1193. dst_ptr += dst_stride;
  1194. y += dy;
  1195. }
  1196. free_aligned_buffer_64(row);
  1197. }
  1198. }
  1199. void ScalePlaneBilinearUp_16(int src_width,
  1200. int src_height,
  1201. int dst_width,
  1202. int dst_height,
  1203. int src_stride,
  1204. int dst_stride,
  1205. const uint16_t* src_ptr,
  1206. uint16_t* dst_ptr,
  1207. enum FilterMode filtering) {
  1208. int j;
  1209. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1210. int x = 0;
  1211. int y = 0;
  1212. int dx = 0;
  1213. int dy = 0;
  1214. const int max_y = (src_height - 1) << 16;
  1215. void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
  1216. ptrdiff_t src_stride, int dst_width,
  1217. int source_y_fraction) = InterpolateRow_16_C;
  1218. void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
  1219. int dst_width, int x, int dx) =
  1220. filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
  1221. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  1222. &dx, &dy);
  1223. src_width = Abs(src_width);
  1224. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  1225. if (TestCpuFlag(kCpuHasSSE2)) {
  1226. InterpolateRow = InterpolateRow_Any_16_SSE2;
  1227. if (IS_ALIGNED(dst_width, 16)) {
  1228. InterpolateRow = InterpolateRow_16_SSE2;
  1229. }
  1230. }
  1231. #endif
  1232. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  1233. if (TestCpuFlag(kCpuHasSSSE3)) {
  1234. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  1235. if (IS_ALIGNED(dst_width, 16)) {
  1236. InterpolateRow = InterpolateRow_16_SSSE3;
  1237. }
  1238. }
  1239. #endif
  1240. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  1241. if (TestCpuFlag(kCpuHasAVX2)) {
  1242. InterpolateRow = InterpolateRow_Any_16_AVX2;
  1243. if (IS_ALIGNED(dst_width, 32)) {
  1244. InterpolateRow = InterpolateRow_16_AVX2;
  1245. }
  1246. }
  1247. #endif
  1248. #if defined(HAS_INTERPOLATEROW_16_NEON)
  1249. if (TestCpuFlag(kCpuHasNEON)) {
  1250. InterpolateRow = InterpolateRow_Any_16_NEON;
  1251. if (IS_ALIGNED(dst_width, 16)) {
  1252. InterpolateRow = InterpolateRow_16_NEON;
  1253. }
  1254. }
  1255. #endif
  1256. if (filtering && src_width >= 32768) {
  1257. ScaleFilterCols = ScaleFilterCols64_16_C;
  1258. }
  1259. #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
  1260. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1261. ScaleFilterCols = ScaleFilterCols_16_SSSE3;
  1262. }
  1263. #endif
  1264. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  1265. ScaleFilterCols = ScaleColsUp2_16_C;
  1266. #if defined(HAS_SCALECOLS_16_SSE2)
  1267. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1268. ScaleFilterCols = ScaleColsUp2_16_SSE2;
  1269. }
  1270. #endif
  1271. }
  1272. if (y > max_y) {
  1273. y = max_y;
  1274. }
  1275. {
  1276. int yi = y >> 16;
  1277. const uint16_t* src = src_ptr + yi * src_stride;
  1278. // Allocate 2 row buffers.
  1279. const int kRowSize = (dst_width + 31) & ~31;
  1280. align_buffer_64(row, kRowSize * 4);
  1281. uint16_t* rowptr = (uint16_t*)row;
  1282. int rowstride = kRowSize;
  1283. int lasty = yi;
  1284. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1285. if (src_height > 1) {
  1286. src += src_stride;
  1287. }
  1288. ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  1289. src += src_stride;
  1290. for (j = 0; j < dst_height; ++j) {
  1291. yi = y >> 16;
  1292. if (yi != lasty) {
  1293. if (y > max_y) {
  1294. y = max_y;
  1295. yi = y >> 16;
  1296. src = src_ptr + yi * src_stride;
  1297. }
  1298. if (yi != lasty) {
  1299. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1300. rowptr += rowstride;
  1301. rowstride = -rowstride;
  1302. lasty = yi;
  1303. src += src_stride;
  1304. }
  1305. }
  1306. if (filtering == kFilterLinear) {
  1307. InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
  1308. } else {
  1309. int yf = (y >> 8) & 255;
  1310. InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
  1311. }
  1312. dst_ptr += dst_stride;
  1313. y += dy;
  1314. }
  1315. free_aligned_buffer_64(row);
  1316. }
  1317. }
  1318. // Scale Plane to/from any dimensions, without interpolation.
  1319. // Fixed point math is used for performance: The upper 16 bits
  1320. // of x and dx is the integer part of the source position and
  1321. // the lower 16 bits are the fixed decimal part.
  1322. static void ScalePlaneSimple(int src_width,
  1323. int src_height,
  1324. int dst_width,
  1325. int dst_height,
  1326. int src_stride,
  1327. int dst_stride,
  1328. const uint8_t* src_ptr,
  1329. uint8_t* dst_ptr) {
  1330. int i;
  1331. void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
  1332. int x, int dx) = ScaleCols_C;
  1333. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1334. int x = 0;
  1335. int y = 0;
  1336. int dx = 0;
  1337. int dy = 0;
  1338. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
  1339. &dx, &dy);
  1340. src_width = Abs(src_width);
  1341. if (src_width * 2 == dst_width && x < 0x8000) {
  1342. ScaleCols = ScaleColsUp2_C;
  1343. #if defined(HAS_SCALECOLS_SSE2)
  1344. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1345. ScaleCols = ScaleColsUp2_SSE2;
  1346. }
  1347. #endif
  1348. }
  1349. for (i = 0; i < dst_height; ++i) {
  1350. ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
  1351. dst_ptr += dst_stride;
  1352. y += dy;
  1353. }
  1354. }
  1355. static void ScalePlaneSimple_16(int src_width,
  1356. int src_height,
  1357. int dst_width,
  1358. int dst_height,
  1359. int src_stride,
  1360. int dst_stride,
  1361. const uint16_t* src_ptr,
  1362. uint16_t* dst_ptr) {
  1363. int i;
  1364. void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
  1365. int x, int dx) = ScaleCols_16_C;
  1366. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1367. int x = 0;
  1368. int y = 0;
  1369. int dx = 0;
  1370. int dy = 0;
  1371. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
  1372. &dx, &dy);
  1373. src_width = Abs(src_width);
  1374. if (src_width * 2 == dst_width && x < 0x8000) {
  1375. ScaleCols = ScaleColsUp2_16_C;
  1376. #if defined(HAS_SCALECOLS_16_SSE2)
  1377. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1378. ScaleCols = ScaleColsUp2_16_SSE2;
  1379. }
  1380. #endif
  1381. }
  1382. for (i = 0; i < dst_height; ++i) {
  1383. ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
  1384. dst_ptr += dst_stride;
  1385. y += dy;
  1386. }
  1387. }
  1388. // Scale a plane.
  1389. // This function dispatches to a specialized scaler based on scale factor.
  1390. LIBYUV_API
  1391. void ScalePlane(const uint8_t* src,
  1392. int src_stride,
  1393. int src_width,
  1394. int src_height,
  1395. uint8_t* dst,
  1396. int dst_stride,
  1397. int dst_width,
  1398. int dst_height,
  1399. enum FilterMode filtering) {
  1400. // Simplify filtering when possible.
  1401. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
  1402. filtering);
  1403. // Negative height means invert the image.
  1404. if (src_height < 0) {
  1405. src_height = -src_height;
  1406. src = src + (src_height - 1) * src_stride;
  1407. src_stride = -src_stride;
  1408. }
  1409. // Use specialized scales to improve performance for common resolutions.
  1410. // For example, all the 1/2 scalings will use ScalePlaneDown2()
  1411. if (dst_width == src_width && dst_height == src_height) {
  1412. // Straight copy.
  1413. CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
  1414. return;
  1415. }
  1416. if (dst_width == src_width && filtering != kFilterBox) {
  1417. int dy = FixedDiv(src_height, dst_height);
  1418. // Arbitrary scale vertically, but unscaled horizontally.
  1419. ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
  1420. dst_stride, src, dst, 0, 0, dy, 1, filtering);
  1421. return;
  1422. }
  1423. if (dst_width <= Abs(src_width) && dst_height <= src_height) {
  1424. // Scale down.
  1425. if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
  1426. // optimized, 3/4
  1427. ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
  1428. dst_stride, src, dst, filtering);
  1429. return;
  1430. }
  1431. if (2 * dst_width == src_width && 2 * dst_height == src_height) {
  1432. // optimized, 1/2
  1433. ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
  1434. dst_stride, src, dst, filtering);
  1435. return;
  1436. }
  1437. // 3/8 rounded up for odd sized chroma height.
  1438. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
  1439. // optimized, 3/8
  1440. ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
  1441. dst_stride, src, dst, filtering);
  1442. return;
  1443. }
  1444. if (4 * dst_width == src_width && 4 * dst_height == src_height &&
  1445. (filtering == kFilterBox || filtering == kFilterNone)) {
  1446. // optimized, 1/4
  1447. ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
  1448. dst_stride, src, dst, filtering);
  1449. return;
  1450. }
  1451. }
  1452. if (filtering == kFilterBox && dst_height * 2 < src_height) {
  1453. ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
  1454. dst_stride, src, dst);
  1455. return;
  1456. }
  1457. if (filtering && dst_height > src_height) {
  1458. ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
  1459. src_stride, dst_stride, src, dst, filtering);
  1460. return;
  1461. }
  1462. if (filtering) {
  1463. ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
  1464. src_stride, dst_stride, src, dst, filtering);
  1465. return;
  1466. }
  1467. ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
  1468. dst_stride, src, dst);
  1469. }
  1470. LIBYUV_API
  1471. void ScalePlane_16(const uint16_t* src,
  1472. int src_stride,
  1473. int src_width,
  1474. int src_height,
  1475. uint16_t* dst,
  1476. int dst_stride,
  1477. int dst_width,
  1478. int dst_height,
  1479. enum FilterMode filtering) {
  1480. // Simplify filtering when possible.
  1481. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
  1482. filtering);
  1483. // Negative height means invert the image.
  1484. if (src_height < 0) {
  1485. src_height = -src_height;
  1486. src = src + (src_height - 1) * src_stride;
  1487. src_stride = -src_stride;
  1488. }
  1489. // Use specialized scales to improve performance for common resolutions.
  1490. // For example, all the 1/2 scalings will use ScalePlaneDown2()
  1491. if (dst_width == src_width && dst_height == src_height) {
  1492. // Straight copy.
  1493. CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
  1494. return;
  1495. }
  1496. if (dst_width == src_width && filtering != kFilterBox) {
  1497. int dy = FixedDiv(src_height, dst_height);
  1498. // Arbitrary scale vertically, but unscaled vertically.
  1499. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
  1500. dst_stride, src, dst, 0, 0, dy, 1, filtering);
  1501. return;
  1502. }
  1503. if (dst_width <= Abs(src_width) && dst_height <= src_height) {
  1504. // Scale down.
  1505. if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
  1506. // optimized, 3/4
  1507. ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
  1508. src_stride, dst_stride, src, dst, filtering);
  1509. return;
  1510. }
  1511. if (2 * dst_width == src_width && 2 * dst_height == src_height) {
  1512. // optimized, 1/2
  1513. ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
  1514. src_stride, dst_stride, src, dst, filtering);
  1515. return;
  1516. }
  1517. // 3/8 rounded up for odd sized chroma height.
  1518. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
  1519. // optimized, 3/8
  1520. ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
  1521. src_stride, dst_stride, src, dst, filtering);
  1522. return;
  1523. }
  1524. if (4 * dst_width == src_width && 4 * dst_height == src_height &&
  1525. (filtering == kFilterBox || filtering == kFilterNone)) {
  1526. // optimized, 1/4
  1527. ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
  1528. src_stride, dst_stride, src, dst, filtering);
  1529. return;
  1530. }
  1531. }
  1532. if (filtering == kFilterBox && dst_height * 2 < src_height) {
  1533. ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
  1534. dst_stride, src, dst);
  1535. return;
  1536. }
  1537. if (filtering && dst_height > src_height) {
  1538. ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
  1539. src_stride, dst_stride, src, dst, filtering);
  1540. return;
  1541. }
  1542. if (filtering) {
  1543. ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
  1544. src_stride, dst_stride, src, dst, filtering);
  1545. return;
  1546. }
  1547. ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
  1548. dst_stride, src, dst);
  1549. }
  1550. // Scale an I420 image.
  1551. // This function in turn calls a scaling function for each plane.
  1552. LIBYUV_API
  1553. int I420Scale(const uint8_t* src_y,
  1554. int src_stride_y,
  1555. const uint8_t* src_u,
  1556. int src_stride_u,
  1557. const uint8_t* src_v,
  1558. int src_stride_v,
  1559. int src_width,
  1560. int src_height,
  1561. uint8_t* dst_y,
  1562. int dst_stride_y,
  1563. uint8_t* dst_u,
  1564. int dst_stride_u,
  1565. uint8_t* dst_v,
  1566. int dst_stride_v,
  1567. int dst_width,
  1568. int dst_height,
  1569. enum FilterMode filtering) {
  1570. int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
  1571. int src_halfheight = SUBSAMPLE(src_height, 1, 1);
  1572. int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
  1573. int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
  1574. if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
  1575. src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
  1576. dst_width <= 0 || dst_height <= 0) {
  1577. return -1;
  1578. }
  1579. ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
  1580. dst_width, dst_height, filtering);
  1581. ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
  1582. dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
  1583. ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
  1584. dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
  1585. return 0;
  1586. }
  1587. LIBYUV_API
  1588. int I420Scale_16(const uint16_t* src_y,
  1589. int src_stride_y,
  1590. const uint16_t* src_u,
  1591. int src_stride_u,
  1592. const uint16_t* src_v,
  1593. int src_stride_v,
  1594. int src_width,
  1595. int src_height,
  1596. uint16_t* dst_y,
  1597. int dst_stride_y,
  1598. uint16_t* dst_u,
  1599. int dst_stride_u,
  1600. uint16_t* dst_v,
  1601. int dst_stride_v,
  1602. int dst_width,
  1603. int dst_height,
  1604. enum FilterMode filtering) {
  1605. int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
  1606. int src_halfheight = SUBSAMPLE(src_height, 1, 1);
  1607. int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
  1608. int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
  1609. if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
  1610. src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
  1611. dst_width <= 0 || dst_height <= 0) {
  1612. return -1;
  1613. }
  1614. ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
  1615. dst_width, dst_height, filtering);
  1616. ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
  1617. dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
  1618. ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
  1619. dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
  1620. return 0;
  1621. }
  1622. // Deprecated api
  1623. LIBYUV_API
  1624. int Scale(const uint8_t* src_y,
  1625. const uint8_t* src_u,
  1626. const uint8_t* src_v,
  1627. int src_stride_y,
  1628. int src_stride_u,
  1629. int src_stride_v,
  1630. int src_width,
  1631. int src_height,
  1632. uint8_t* dst_y,
  1633. uint8_t* dst_u,
  1634. uint8_t* dst_v,
  1635. int dst_stride_y,
  1636. int dst_stride_u,
  1637. int dst_stride_v,
  1638. int dst_width,
  1639. int dst_height,
  1640. LIBYUV_BOOL interpolate) {
  1641. return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
  1642. src_stride_v, src_width, src_height, dst_y, dst_stride_y,
  1643. dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
  1644. dst_height, interpolate ? kFilterBox : kFilterNone);
  1645. }
  1646. #ifdef __cplusplus
  1647. } // extern "C"
  1648. } // namespace libyuv
  1649. #endif