scale.cc 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyPlane
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
  25. // Scale plane, 1/2
  26. // This is an optimized version for scaling down a plane to 1/2 of
  27. // its original size.
  28. static void ScalePlaneDown2(int src_width,
  29. int src_height,
  30. int dst_width,
  31. int dst_height,
  32. int src_stride,
  33. int dst_stride,
  34. const uint8_t* src_ptr,
  35. uint8_t* dst_ptr,
  36. enum FilterMode filtering) {
  37. int y;
  38. void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  39. uint8_t* dst_ptr, int dst_width) =
  40. filtering == kFilterNone
  41. ? ScaleRowDown2_C
  42. : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
  43. : ScaleRowDown2Box_C);
  44. int row_stride = src_stride << 1;
  45. (void)src_width;
  46. (void)src_height;
  47. if (!filtering) {
  48. src_ptr += src_stride; // Point to odd rows.
  49. src_stride = 0;
  50. }
  51. #if defined(HAS_SCALEROWDOWN2_NEON)
  52. if (TestCpuFlag(kCpuHasNEON)) {
  53. ScaleRowDown2 =
  54. filtering == kFilterNone
  55. ? ScaleRowDown2_Any_NEON
  56. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
  57. : ScaleRowDown2Box_Any_NEON);
  58. if (IS_ALIGNED(dst_width, 16)) {
  59. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
  60. : (filtering == kFilterLinear
  61. ? ScaleRowDown2Linear_NEON
  62. : ScaleRowDown2Box_NEON);
  63. }
  64. }
  65. #endif
  66. #if defined(HAS_SCALEROWDOWN2_SSSE3)
  67. if (TestCpuFlag(kCpuHasSSSE3)) {
  68. ScaleRowDown2 =
  69. filtering == kFilterNone
  70. ? ScaleRowDown2_Any_SSSE3
  71. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
  72. : ScaleRowDown2Box_Any_SSSE3);
  73. if (IS_ALIGNED(dst_width, 16)) {
  74. ScaleRowDown2 =
  75. filtering == kFilterNone
  76. ? ScaleRowDown2_SSSE3
  77. : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
  78. : ScaleRowDown2Box_SSSE3);
  79. }
  80. }
  81. #endif
  82. #if defined(HAS_SCALEROWDOWN2_AVX2)
  83. if (TestCpuFlag(kCpuHasAVX2)) {
  84. ScaleRowDown2 =
  85. filtering == kFilterNone
  86. ? ScaleRowDown2_Any_AVX2
  87. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
  88. : ScaleRowDown2Box_Any_AVX2);
  89. if (IS_ALIGNED(dst_width, 32)) {
  90. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
  91. : (filtering == kFilterLinear
  92. ? ScaleRowDown2Linear_AVX2
  93. : ScaleRowDown2Box_AVX2);
  94. }
  95. }
  96. #endif
  97. #if defined(HAS_SCALEROWDOWN2_MSA)
  98. if (TestCpuFlag(kCpuHasMSA)) {
  99. ScaleRowDown2 =
  100. filtering == kFilterNone
  101. ? ScaleRowDown2_Any_MSA
  102. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
  103. : ScaleRowDown2Box_Any_MSA);
  104. if (IS_ALIGNED(dst_width, 32)) {
  105. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
  106. : (filtering == kFilterLinear
  107. ? ScaleRowDown2Linear_MSA
  108. : ScaleRowDown2Box_MSA);
  109. }
  110. }
  111. #endif
  112. #if defined(HAS_SCALEROWDOWN2_MMI)
  113. if (TestCpuFlag(kCpuHasMMI)) {
  114. ScaleRowDown2 =
  115. filtering == kFilterNone
  116. ? ScaleRowDown2_Any_MMI
  117. : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MMI
  118. : ScaleRowDown2Box_Any_MMI);
  119. if (IS_ALIGNED(dst_width, 8)) {
  120. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MMI
  121. : (filtering == kFilterLinear
  122. ? ScaleRowDown2Linear_MMI
  123. : ScaleRowDown2Box_MMI);
  124. }
  125. }
  126. #endif
  127. if (filtering == kFilterLinear) {
  128. src_stride = 0;
  129. }
  130. // TODO(fbarchard): Loop through source height to allow odd height.
  131. for (y = 0; y < dst_height; ++y) {
  132. ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
  133. src_ptr += row_stride;
  134. dst_ptr += dst_stride;
  135. }
  136. }
  137. static void ScalePlaneDown2_16(int src_width,
  138. int src_height,
  139. int dst_width,
  140. int dst_height,
  141. int src_stride,
  142. int dst_stride,
  143. const uint16_t* src_ptr,
  144. uint16_t* dst_ptr,
  145. enum FilterMode filtering) {
  146. int y;
  147. void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  148. uint16_t* dst_ptr, int dst_width) =
  149. filtering == kFilterNone
  150. ? ScaleRowDown2_16_C
  151. : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
  152. : ScaleRowDown2Box_16_C);
  153. int row_stride = src_stride << 1;
  154. (void)src_width;
  155. (void)src_height;
  156. if (!filtering) {
  157. src_ptr += src_stride; // Point to odd rows.
  158. src_stride = 0;
  159. }
  160. #if defined(HAS_SCALEROWDOWN2_16_NEON)
  161. if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
  162. ScaleRowDown2 =
  163. filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
  164. }
  165. #endif
  166. #if defined(HAS_SCALEROWDOWN2_16_SSE2)
  167. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
  168. ScaleRowDown2 =
  169. filtering == kFilterNone
  170. ? ScaleRowDown2_16_SSE2
  171. : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
  172. : ScaleRowDown2Box_16_SSE2);
  173. }
  174. #endif
  175. #if defined(HAS_SCALEROWDOWN2_16_MMI)
  176. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
  177. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_MMI
  178. : (filtering == kFilterLinear
  179. ? ScaleRowDown2Linear_16_MMI
  180. : ScaleRowDown2Box_16_MMI);
  181. }
  182. #endif
  183. if (filtering == kFilterLinear) {
  184. src_stride = 0;
  185. }
  186. // TODO(fbarchard): Loop through source height to allow odd height.
  187. for (y = 0; y < dst_height; ++y) {
  188. ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
  189. src_ptr += row_stride;
  190. dst_ptr += dst_stride;
  191. }
  192. }
  193. // Scale plane, 1/4
  194. // This is an optimized version for scaling down a plane to 1/4 of
  195. // its original size.
  196. static void ScalePlaneDown4(int src_width,
  197. int src_height,
  198. int dst_width,
  199. int dst_height,
  200. int src_stride,
  201. int dst_stride,
  202. const uint8_t* src_ptr,
  203. uint8_t* dst_ptr,
  204. enum FilterMode filtering) {
  205. int y;
  206. void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  207. uint8_t* dst_ptr, int dst_width) =
  208. filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
  209. int row_stride = src_stride << 2;
  210. (void)src_width;
  211. (void)src_height;
  212. if (!filtering) {
  213. src_ptr += src_stride * 2; // Point to row 2.
  214. src_stride = 0;
  215. }
  216. #if defined(HAS_SCALEROWDOWN4_NEON)
  217. if (TestCpuFlag(kCpuHasNEON)) {
  218. ScaleRowDown4 =
  219. filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
  220. if (IS_ALIGNED(dst_width, 8)) {
  221. ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
  222. }
  223. }
  224. #endif
  225. #if defined(HAS_SCALEROWDOWN4_SSSE3)
  226. if (TestCpuFlag(kCpuHasSSSE3)) {
  227. ScaleRowDown4 =
  228. filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
  229. if (IS_ALIGNED(dst_width, 8)) {
  230. ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
  231. }
  232. }
  233. #endif
  234. #if defined(HAS_SCALEROWDOWN4_AVX2)
  235. if (TestCpuFlag(kCpuHasAVX2)) {
  236. ScaleRowDown4 =
  237. filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
  238. if (IS_ALIGNED(dst_width, 16)) {
  239. ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
  240. }
  241. }
  242. #endif
  243. #if defined(HAS_SCALEROWDOWN4_MSA)
  244. if (TestCpuFlag(kCpuHasMSA)) {
  245. ScaleRowDown4 =
  246. filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
  247. if (IS_ALIGNED(dst_width, 16)) {
  248. ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
  249. }
  250. }
  251. #endif
  252. #if defined(HAS_SCALEROWDOWN4_MMI)
  253. if (TestCpuFlag(kCpuHasMMI)) {
  254. ScaleRowDown4 =
  255. filtering ? ScaleRowDown4Box_Any_MMI : ScaleRowDown4_Any_MMI;
  256. if (IS_ALIGNED(dst_width, 8)) {
  257. ScaleRowDown4 = filtering ? ScaleRowDown4Box_MMI : ScaleRowDown4_MMI;
  258. }
  259. }
  260. #endif
  261. if (filtering == kFilterLinear) {
  262. src_stride = 0;
  263. }
  264. for (y = 0; y < dst_height; ++y) {
  265. ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
  266. src_ptr += row_stride;
  267. dst_ptr += dst_stride;
  268. }
  269. }
  270. static void ScalePlaneDown4_16(int src_width,
  271. int src_height,
  272. int dst_width,
  273. int dst_height,
  274. int src_stride,
  275. int dst_stride,
  276. const uint16_t* src_ptr,
  277. uint16_t* dst_ptr,
  278. enum FilterMode filtering) {
  279. int y;
  280. void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  281. uint16_t* dst_ptr, int dst_width) =
  282. filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
  283. int row_stride = src_stride << 2;
  284. (void)src_width;
  285. (void)src_height;
  286. if (!filtering) {
  287. src_ptr += src_stride * 2; // Point to row 2.
  288. src_stride = 0;
  289. }
  290. #if defined(HAS_SCALEROWDOWN4_16_NEON)
  291. if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
  292. ScaleRowDown4 =
  293. filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
  294. }
  295. #endif
  296. #if defined(HAS_SCALEROWDOWN4_16_SSE2)
  297. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  298. ScaleRowDown4 =
  299. filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
  300. }
  301. #endif
  302. #if defined(HAS_SCALEROWDOWN4_16_MMI)
  303. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
  304. ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_MMI : ScaleRowDown4_16_MMI;
  305. }
  306. #endif
  307. if (filtering == kFilterLinear) {
  308. src_stride = 0;
  309. }
  310. for (y = 0; y < dst_height; ++y) {
  311. ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
  312. src_ptr += row_stride;
  313. dst_ptr += dst_stride;
  314. }
  315. }
  316. // Scale plane down, 3/4
  317. static void ScalePlaneDown34(int src_width,
  318. int src_height,
  319. int dst_width,
  320. int dst_height,
  321. int src_stride,
  322. int dst_stride,
  323. const uint8_t* src_ptr,
  324. uint8_t* dst_ptr,
  325. enum FilterMode filtering) {
  326. int y;
  327. void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  328. uint8_t* dst_ptr, int dst_width);
  329. void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  330. uint8_t* dst_ptr, int dst_width);
  331. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  332. (void)src_width;
  333. (void)src_height;
  334. assert(dst_width % 3 == 0);
  335. if (!filtering) {
  336. ScaleRowDown34_0 = ScaleRowDown34_C;
  337. ScaleRowDown34_1 = ScaleRowDown34_C;
  338. } else {
  339. ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
  340. ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
  341. }
  342. #if defined(HAS_SCALEROWDOWN34_NEON)
  343. if (TestCpuFlag(kCpuHasNEON)) {
  344. if (!filtering) {
  345. ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
  346. ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
  347. } else {
  348. ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
  349. ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
  350. }
  351. if (dst_width % 24 == 0) {
  352. if (!filtering) {
  353. ScaleRowDown34_0 = ScaleRowDown34_NEON;
  354. ScaleRowDown34_1 = ScaleRowDown34_NEON;
  355. } else {
  356. ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
  357. ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
  358. }
  359. }
  360. }
  361. #endif
  362. #if defined(HAS_SCALEROWDOWN34_MSA)
  363. if (TestCpuFlag(kCpuHasMSA)) {
  364. if (!filtering) {
  365. ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
  366. ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
  367. } else {
  368. ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
  369. ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
  370. }
  371. if (dst_width % 48 == 0) {
  372. if (!filtering) {
  373. ScaleRowDown34_0 = ScaleRowDown34_MSA;
  374. ScaleRowDown34_1 = ScaleRowDown34_MSA;
  375. } else {
  376. ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
  377. ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
  378. }
  379. }
  380. }
  381. #endif
  382. #if defined(HAS_SCALEROWDOWN34_SSSE3)
  383. if (TestCpuFlag(kCpuHasSSSE3)) {
  384. if (!filtering) {
  385. ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
  386. ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
  387. } else {
  388. ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
  389. ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
  390. }
  391. if (dst_width % 24 == 0) {
  392. if (!filtering) {
  393. ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
  394. ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
  395. } else {
  396. ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
  397. ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
  398. }
  399. }
  400. }
  401. #endif
  402. for (y = 0; y < dst_height - 2; y += 3) {
  403. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  404. src_ptr += src_stride;
  405. dst_ptr += dst_stride;
  406. ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
  407. src_ptr += src_stride;
  408. dst_ptr += dst_stride;
  409. ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
  410. src_ptr += src_stride * 2;
  411. dst_ptr += dst_stride;
  412. }
  413. // Remainder 1 or 2 rows with last row vertically unfiltered
  414. if ((dst_height % 3) == 2) {
  415. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  416. src_ptr += src_stride;
  417. dst_ptr += dst_stride;
  418. ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
  419. } else if ((dst_height % 3) == 1) {
  420. ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
  421. }
  422. }
  423. static void ScalePlaneDown34_16(int src_width,
  424. int src_height,
  425. int dst_width,
  426. int dst_height,
  427. int src_stride,
  428. int dst_stride,
  429. const uint16_t* src_ptr,
  430. uint16_t* dst_ptr,
  431. enum FilterMode filtering) {
  432. int y;
  433. void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  434. uint16_t* dst_ptr, int dst_width);
  435. void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  436. uint16_t* dst_ptr, int dst_width);
  437. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  438. (void)src_width;
  439. (void)src_height;
  440. assert(dst_width % 3 == 0);
  441. if (!filtering) {
  442. ScaleRowDown34_0 = ScaleRowDown34_16_C;
  443. ScaleRowDown34_1 = ScaleRowDown34_16_C;
  444. } else {
  445. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
  446. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
  447. }
  448. #if defined(HAS_SCALEROWDOWN34_16_NEON)
  449. if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
  450. if (!filtering) {
  451. ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
  452. ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
  453. } else {
  454. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
  455. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
  456. }
  457. }
  458. #endif
  459. #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
  460. if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
  461. if (!filtering) {
  462. ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
  463. ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
  464. } else {
  465. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
  466. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
  467. }
  468. }
  469. #endif
  470. for (y = 0; y < dst_height - 2; y += 3) {
  471. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  472. src_ptr += src_stride;
  473. dst_ptr += dst_stride;
  474. ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
  475. src_ptr += src_stride;
  476. dst_ptr += dst_stride;
  477. ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
  478. src_ptr += src_stride * 2;
  479. dst_ptr += dst_stride;
  480. }
  481. // Remainder 1 or 2 rows with last row vertically unfiltered
  482. if ((dst_height % 3) == 2) {
  483. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  484. src_ptr += src_stride;
  485. dst_ptr += dst_stride;
  486. ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
  487. } else if ((dst_height % 3) == 1) {
  488. ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
  489. }
  490. }
  491. // Scale plane, 3/8
  492. // This is an optimized version for scaling down a plane to 3/8
  493. // of its original size.
  494. //
  495. // Uses box filter arranges like this
  496. // aaabbbcc -> abc
  497. // aaabbbcc def
  498. // aaabbbcc ghi
  499. // dddeeeff
  500. // dddeeeff
  501. // dddeeeff
  502. // ggghhhii
  503. // ggghhhii
  504. // Boxes are 3x3, 2x3, 3x2 and 2x2
  505. static void ScalePlaneDown38(int src_width,
  506. int src_height,
  507. int dst_width,
  508. int dst_height,
  509. int src_stride,
  510. int dst_stride,
  511. const uint8_t* src_ptr,
  512. uint8_t* dst_ptr,
  513. enum FilterMode filtering) {
  514. int y;
  515. void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  516. uint8_t* dst_ptr, int dst_width);
  517. void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
  518. uint8_t* dst_ptr, int dst_width);
  519. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  520. assert(dst_width % 3 == 0);
  521. (void)src_width;
  522. (void)src_height;
  523. if (!filtering) {
  524. ScaleRowDown38_3 = ScaleRowDown38_C;
  525. ScaleRowDown38_2 = ScaleRowDown38_C;
  526. } else {
  527. ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
  528. ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
  529. }
  530. #if defined(HAS_SCALEROWDOWN38_NEON)
  531. if (TestCpuFlag(kCpuHasNEON)) {
  532. if (!filtering) {
  533. ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
  534. ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
  535. } else {
  536. ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
  537. ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
  538. }
  539. if (dst_width % 12 == 0) {
  540. if (!filtering) {
  541. ScaleRowDown38_3 = ScaleRowDown38_NEON;
  542. ScaleRowDown38_2 = ScaleRowDown38_NEON;
  543. } else {
  544. ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
  545. ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
  546. }
  547. }
  548. }
  549. #endif
  550. #if defined(HAS_SCALEROWDOWN38_SSSE3)
  551. if (TestCpuFlag(kCpuHasSSSE3)) {
  552. if (!filtering) {
  553. ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
  554. ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
  555. } else {
  556. ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
  557. ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
  558. }
  559. if (dst_width % 12 == 0 && !filtering) {
  560. ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
  561. ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
  562. }
  563. if (dst_width % 6 == 0 && filtering) {
  564. ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
  565. ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
  566. }
  567. }
  568. #endif
  569. #if defined(HAS_SCALEROWDOWN38_MSA)
  570. if (TestCpuFlag(kCpuHasMSA)) {
  571. if (!filtering) {
  572. ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
  573. ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
  574. } else {
  575. ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
  576. ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
  577. }
  578. if (dst_width % 12 == 0) {
  579. if (!filtering) {
  580. ScaleRowDown38_3 = ScaleRowDown38_MSA;
  581. ScaleRowDown38_2 = ScaleRowDown38_MSA;
  582. } else {
  583. ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
  584. ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
  585. }
  586. }
  587. }
  588. #endif
  589. for (y = 0; y < dst_height - 2; y += 3) {
  590. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  591. src_ptr += src_stride * 3;
  592. dst_ptr += dst_stride;
  593. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  594. src_ptr += src_stride * 3;
  595. dst_ptr += dst_stride;
  596. ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
  597. src_ptr += src_stride * 2;
  598. dst_ptr += dst_stride;
  599. }
  600. // Remainder 1 or 2 rows with last row vertically unfiltered
  601. if ((dst_height % 3) == 2) {
  602. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  603. src_ptr += src_stride * 3;
  604. dst_ptr += dst_stride;
  605. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  606. } else if ((dst_height % 3) == 1) {
  607. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  608. }
  609. }
  610. static void ScalePlaneDown38_16(int src_width,
  611. int src_height,
  612. int dst_width,
  613. int dst_height,
  614. int src_stride,
  615. int dst_stride,
  616. const uint16_t* src_ptr,
  617. uint16_t* dst_ptr,
  618. enum FilterMode filtering) {
  619. int y;
  620. void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  621. uint16_t* dst_ptr, int dst_width);
  622. void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
  623. uint16_t* dst_ptr, int dst_width);
  624. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  625. (void)src_width;
  626. (void)src_height;
  627. assert(dst_width % 3 == 0);
  628. if (!filtering) {
  629. ScaleRowDown38_3 = ScaleRowDown38_16_C;
  630. ScaleRowDown38_2 = ScaleRowDown38_16_C;
  631. } else {
  632. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
  633. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
  634. }
  635. #if defined(HAS_SCALEROWDOWN38_16_NEON)
  636. if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
  637. if (!filtering) {
  638. ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
  639. ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
  640. } else {
  641. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
  642. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
  643. }
  644. }
  645. #endif
  646. #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
  647. if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
  648. if (!filtering) {
  649. ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
  650. ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
  651. } else {
  652. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
  653. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
  654. }
  655. }
  656. #endif
  657. for (y = 0; y < dst_height - 2; y += 3) {
  658. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  659. src_ptr += src_stride * 3;
  660. dst_ptr += dst_stride;
  661. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  662. src_ptr += src_stride * 3;
  663. dst_ptr += dst_stride;
  664. ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
  665. src_ptr += src_stride * 2;
  666. dst_ptr += dst_stride;
  667. }
  668. // Remainder 1 or 2 rows with last row vertically unfiltered
  669. if ((dst_height % 3) == 2) {
  670. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  671. src_ptr += src_stride * 3;
  672. dst_ptr += dst_stride;
  673. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  674. } else if ((dst_height % 3) == 1) {
  675. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  676. }
  677. }
  678. #define MIN1(x) ((x) < 1 ? 1 : (x))
  679. static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
  680. uint32_t sum = 0u;
  681. int x;
  682. assert(iboxwidth > 0);
  683. for (x = 0; x < iboxwidth; ++x) {
  684. sum += src_ptr[x];
  685. }
  686. return sum;
  687. }
  688. static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
  689. uint32_t sum = 0u;
  690. int x;
  691. assert(iboxwidth > 0);
  692. for (x = 0; x < iboxwidth; ++x) {
  693. sum += src_ptr[x];
  694. }
  695. return sum;
  696. }
  697. static void ScaleAddCols2_C(int dst_width,
  698. int boxheight,
  699. int x,
  700. int dx,
  701. const uint16_t* src_ptr,
  702. uint8_t* dst_ptr) {
  703. #ifdef __clang_analyzer__
  704. *dst_ptr = 0;
  705. #else
  706. int i;
  707. int scaletbl[2];
  708. int minboxwidth = dx >> 16;
  709. int boxwidth;
  710. scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
  711. scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
  712. for (i = 0; i < dst_width; ++i) {
  713. int ix = x >> 16;
  714. x += dx;
  715. boxwidth = MIN1((x >> 16) - ix);
  716. *dst_ptr++ =
  717. SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
  718. 16;
  719. }
  720. #endif
  721. }
  722. static void ScaleAddCols2_16_C(int dst_width,
  723. int boxheight,
  724. int x,
  725. int dx,
  726. const uint32_t* src_ptr,
  727. uint16_t* dst_ptr) {
  728. #ifdef __clang_analyzer__
  729. * dst_ptr = 0;
  730. #else
  731. int i;
  732. int scaletbl[2];
  733. int minboxwidth = dx >> 16;
  734. int boxwidth;
  735. scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
  736. scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
  737. for (i = 0; i < dst_width; ++i) {
  738. int ix = x >> 16;
  739. x += dx;
  740. boxwidth = MIN1((x >> 16) - ix);
  741. *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
  742. scaletbl[boxwidth - minboxwidth] >>
  743. 16;
  744. }
  745. #endif
  746. }
  747. static void ScaleAddCols0_C(int dst_width,
  748. int boxheight,
  749. int x,
  750. int dx,
  751. const uint16_t* src_ptr,
  752. uint8_t* dst_ptr) {
  753. int scaleval = 65536 / boxheight;
  754. int i;
  755. (void)dx;
  756. src_ptr += (x >> 16);
  757. for (i = 0; i < dst_width; ++i) {
  758. *dst_ptr++ = src_ptr[i] * scaleval >> 16;
  759. }
  760. }
  761. static void ScaleAddCols1_C(int dst_width,
  762. int boxheight,
  763. int x,
  764. int dx,
  765. const uint16_t* src_ptr,
  766. uint8_t* dst_ptr) {
  767. int boxwidth = MIN1(dx >> 16);
  768. int scaleval = 65536 / (boxwidth * boxheight);
  769. int i;
  770. x >>= 16;
  771. for (i = 0; i < dst_width; ++i) {
  772. *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
  773. x += boxwidth;
  774. }
  775. }
  776. static void ScaleAddCols1_16_C(int dst_width,
  777. int boxheight,
  778. int x,
  779. int dx,
  780. const uint32_t* src_ptr,
  781. uint16_t* dst_ptr) {
  782. int boxwidth = MIN1(dx >> 16);
  783. int scaleval = 65536 / (boxwidth * boxheight);
  784. int i;
  785. for (i = 0; i < dst_width; ++i) {
  786. *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
  787. x += boxwidth;
  788. }
  789. }
  790. // Scale plane down to any dimensions, with interpolation.
  791. // (boxfilter).
  792. //
  793. // Same method as SimpleScale, which is fixed point, outputting
  794. // one pixel of destination using fixed point (16.16) to step
  795. // through source, sampling a box of pixel with simple
  796. // averaging.
  797. static void ScalePlaneBox(int src_width,
  798. int src_height,
  799. int dst_width,
  800. int dst_height,
  801. int src_stride,
  802. int dst_stride,
  803. const uint8_t* src_ptr,
  804. uint8_t* dst_ptr) {
  805. int j, k;
  806. // Initial source x/y coordinate and step values as 16.16 fixed point.
  807. int x = 0;
  808. int y = 0;
  809. int dx = 0;
  810. int dy = 0;
  811. const int max_y = (src_height << 16);
  812. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
  813. &dx, &dy);
  814. src_width = Abs(src_width);
  815. {
  816. // Allocate a row buffer of uint16_t.
  817. align_buffer_64(row16, src_width * 2);
  818. void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
  819. const uint16_t* src_ptr, uint8_t* dst_ptr) =
  820. (dx & 0xffff) ? ScaleAddCols2_C
  821. : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
  822. void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
  823. int src_width) = ScaleAddRow_C;
  824. #if defined(HAS_SCALEADDROW_SSE2)
  825. if (TestCpuFlag(kCpuHasSSE2)) {
  826. ScaleAddRow = ScaleAddRow_Any_SSE2;
  827. if (IS_ALIGNED(src_width, 16)) {
  828. ScaleAddRow = ScaleAddRow_SSE2;
  829. }
  830. }
  831. #endif
  832. #if defined(HAS_SCALEADDROW_AVX2)
  833. if (TestCpuFlag(kCpuHasAVX2)) {
  834. ScaleAddRow = ScaleAddRow_Any_AVX2;
  835. if (IS_ALIGNED(src_width, 32)) {
  836. ScaleAddRow = ScaleAddRow_AVX2;
  837. }
  838. }
  839. #endif
  840. #if defined(HAS_SCALEADDROW_NEON)
  841. if (TestCpuFlag(kCpuHasNEON)) {
  842. ScaleAddRow = ScaleAddRow_Any_NEON;
  843. if (IS_ALIGNED(src_width, 16)) {
  844. ScaleAddRow = ScaleAddRow_NEON;
  845. }
  846. }
  847. #endif
  848. #if defined(HAS_SCALEADDROW_MSA)
  849. if (TestCpuFlag(kCpuHasMSA)) {
  850. ScaleAddRow = ScaleAddRow_Any_MSA;
  851. if (IS_ALIGNED(src_width, 16)) {
  852. ScaleAddRow = ScaleAddRow_MSA;
  853. }
  854. }
  855. #endif
  856. #if defined(HAS_SCALEADDROW_MMI)
  857. if (TestCpuFlag(kCpuHasMMI)) {
  858. ScaleAddRow = ScaleAddRow_Any_MMI;
  859. if (IS_ALIGNED(src_width, 8)) {
  860. ScaleAddRow = ScaleAddRow_MMI;
  861. }
  862. }
  863. #endif
  864. for (j = 0; j < dst_height; ++j) {
  865. int boxheight;
  866. int iy = y >> 16;
  867. const uint8_t* src = src_ptr + iy * src_stride;
  868. y += dy;
  869. if (y > max_y) {
  870. y = max_y;
  871. }
  872. boxheight = MIN1((y >> 16) - iy);
  873. memset(row16, 0, src_width * 2);
  874. for (k = 0; k < boxheight; ++k) {
  875. ScaleAddRow(src, (uint16_t*)(row16), src_width);
  876. src += src_stride;
  877. }
  878. ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
  879. dst_ptr += dst_stride;
  880. }
  881. free_aligned_buffer_64(row16);
  882. }
  883. }
  884. static void ScalePlaneBox_16(int src_width,
  885. int src_height,
  886. int dst_width,
  887. int dst_height,
  888. int src_stride,
  889. int dst_stride,
  890. const uint16_t* src_ptr,
  891. uint16_t* dst_ptr) {
  892. int j, k;
  893. // Initial source x/y coordinate and step values as 16.16 fixed point.
  894. int x = 0;
  895. int y = 0;
  896. int dx = 0;
  897. int dy = 0;
  898. const int max_y = (src_height << 16);
  899. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
  900. &dx, &dy);
  901. src_width = Abs(src_width);
  902. {
  903. // Allocate a row buffer of uint32_t.
  904. align_buffer_64(row32, src_width * 4);
  905. void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
  906. const uint32_t* src_ptr, uint16_t* dst_ptr) =
  907. (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
  908. void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
  909. int src_width) = ScaleAddRow_16_C;
  910. #if defined(HAS_SCALEADDROW_16_SSE2)
  911. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
  912. ScaleAddRow = ScaleAddRow_16_SSE2;
  913. }
  914. #endif
  915. #if defined(HAS_SCALEADDROW_16_MMI)
  916. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(src_width, 4)) {
  917. ScaleAddRow = ScaleAddRow_16_MMI;
  918. }
  919. #endif
  920. for (j = 0; j < dst_height; ++j) {
  921. int boxheight;
  922. int iy = y >> 16;
  923. const uint16_t* src = src_ptr + iy * src_stride;
  924. y += dy;
  925. if (y > max_y) {
  926. y = max_y;
  927. }
  928. boxheight = MIN1((y >> 16) - iy);
  929. memset(row32, 0, src_width * 4);
  930. for (k = 0; k < boxheight; ++k) {
  931. ScaleAddRow(src, (uint32_t*)(row32), src_width);
  932. src += src_stride;
  933. }
  934. ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
  935. dst_ptr += dst_stride;
  936. }
  937. free_aligned_buffer_64(row32);
  938. }
  939. }
  940. // Scale plane down with bilinear interpolation.
  941. void ScalePlaneBilinearDown(int src_width,
  942. int src_height,
  943. int dst_width,
  944. int dst_height,
  945. int src_stride,
  946. int dst_stride,
  947. const uint8_t* src_ptr,
  948. uint8_t* dst_ptr,
  949. enum FilterMode filtering) {
  950. // Initial source x/y coordinate and step values as 16.16 fixed point.
  951. int x = 0;
  952. int y = 0;
  953. int dx = 0;
  954. int dy = 0;
  955. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  956. // Allocate a row buffer.
  957. align_buffer_64(row, src_width);
  958. const int max_y = (src_height - 1) << 16;
  959. int j;
  960. void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
  961. int dst_width, int x, int dx) =
  962. (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
  963. void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
  964. ptrdiff_t src_stride, int dst_width,
  965. int source_y_fraction) = InterpolateRow_C;
  966. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  967. &dx, &dy);
  968. src_width = Abs(src_width);
  969. #if defined(HAS_INTERPOLATEROW_SSSE3)
  970. if (TestCpuFlag(kCpuHasSSSE3)) {
  971. InterpolateRow = InterpolateRow_Any_SSSE3;
  972. if (IS_ALIGNED(src_width, 16)) {
  973. InterpolateRow = InterpolateRow_SSSE3;
  974. }
  975. }
  976. #endif
  977. #if defined(HAS_INTERPOLATEROW_AVX2)
  978. if (TestCpuFlag(kCpuHasAVX2)) {
  979. InterpolateRow = InterpolateRow_Any_AVX2;
  980. if (IS_ALIGNED(src_width, 32)) {
  981. InterpolateRow = InterpolateRow_AVX2;
  982. }
  983. }
  984. #endif
  985. #if defined(HAS_INTERPOLATEROW_NEON)
  986. if (TestCpuFlag(kCpuHasNEON)) {
  987. InterpolateRow = InterpolateRow_Any_NEON;
  988. if (IS_ALIGNED(src_width, 16)) {
  989. InterpolateRow = InterpolateRow_NEON;
  990. }
  991. }
  992. #endif
  993. #if defined(HAS_INTERPOLATEROW_MSA)
  994. if (TestCpuFlag(kCpuHasMSA)) {
  995. InterpolateRow = InterpolateRow_Any_MSA;
  996. if (IS_ALIGNED(src_width, 32)) {
  997. InterpolateRow = InterpolateRow_MSA;
  998. }
  999. }
  1000. #endif
  1001. #if defined(HAS_INTERPOLATEROW_MMI)
  1002. if (TestCpuFlag(kCpuHasMMI)) {
  1003. InterpolateRow = InterpolateRow_Any_MMI;
  1004. if (IS_ALIGNED(src_width, 16)) {
  1005. InterpolateRow = InterpolateRow_MMI;
  1006. }
  1007. }
  1008. #endif
  1009. #if defined(HAS_SCALEFILTERCOLS_SSSE3)
  1010. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1011. ScaleFilterCols = ScaleFilterCols_SSSE3;
  1012. }
  1013. #endif
  1014. #if defined(HAS_SCALEFILTERCOLS_NEON)
  1015. if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
  1016. ScaleFilterCols = ScaleFilterCols_Any_NEON;
  1017. if (IS_ALIGNED(dst_width, 8)) {
  1018. ScaleFilterCols = ScaleFilterCols_NEON;
  1019. }
  1020. }
  1021. #endif
  1022. #if defined(HAS_SCALEFILTERCOLS_MSA)
  1023. if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
  1024. ScaleFilterCols = ScaleFilterCols_Any_MSA;
  1025. if (IS_ALIGNED(dst_width, 16)) {
  1026. ScaleFilterCols = ScaleFilterCols_MSA;
  1027. }
  1028. }
  1029. #endif
  1030. if (y > max_y) {
  1031. y = max_y;
  1032. }
  1033. for (j = 0; j < dst_height; ++j) {
  1034. int yi = y >> 16;
  1035. const uint8_t* src = src_ptr + yi * src_stride;
  1036. if (filtering == kFilterLinear) {
  1037. ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
  1038. } else {
  1039. int yf = (y >> 8) & 255;
  1040. InterpolateRow(row, src, src_stride, src_width, yf);
  1041. ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
  1042. }
  1043. dst_ptr += dst_stride;
  1044. y += dy;
  1045. if (y > max_y) {
  1046. y = max_y;
  1047. }
  1048. }
  1049. free_aligned_buffer_64(row);
  1050. }
  1051. void ScalePlaneBilinearDown_16(int src_width,
  1052. int src_height,
  1053. int dst_width,
  1054. int dst_height,
  1055. int src_stride,
  1056. int dst_stride,
  1057. const uint16_t* src_ptr,
  1058. uint16_t* dst_ptr,
  1059. enum FilterMode filtering) {
  1060. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1061. int x = 0;
  1062. int y = 0;
  1063. int dx = 0;
  1064. int dy = 0;
  1065. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  1066. // Allocate a row buffer.
  1067. align_buffer_64(row, src_width * 2);
  1068. const int max_y = (src_height - 1) << 16;
  1069. int j;
  1070. void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
  1071. int dst_width, int x, int dx) =
  1072. (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
  1073. void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
  1074. ptrdiff_t src_stride, int dst_width,
  1075. int source_y_fraction) = InterpolateRow_16_C;
  1076. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  1077. &dx, &dy);
  1078. src_width = Abs(src_width);
  1079. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  1080. if (TestCpuFlag(kCpuHasSSE2)) {
  1081. InterpolateRow = InterpolateRow_Any_16_SSE2;
  1082. if (IS_ALIGNED(src_width, 16)) {
  1083. InterpolateRow = InterpolateRow_16_SSE2;
  1084. }
  1085. }
  1086. #endif
  1087. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  1088. if (TestCpuFlag(kCpuHasSSSE3)) {
  1089. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  1090. if (IS_ALIGNED(src_width, 16)) {
  1091. InterpolateRow = InterpolateRow_16_SSSE3;
  1092. }
  1093. }
  1094. #endif
  1095. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  1096. if (TestCpuFlag(kCpuHasAVX2)) {
  1097. InterpolateRow = InterpolateRow_Any_16_AVX2;
  1098. if (IS_ALIGNED(src_width, 32)) {
  1099. InterpolateRow = InterpolateRow_16_AVX2;
  1100. }
  1101. }
  1102. #endif
  1103. #if defined(HAS_INTERPOLATEROW_16_NEON)
  1104. if (TestCpuFlag(kCpuHasNEON)) {
  1105. InterpolateRow = InterpolateRow_Any_16_NEON;
  1106. if (IS_ALIGNED(src_width, 16)) {
  1107. InterpolateRow = InterpolateRow_16_NEON;
  1108. }
  1109. }
  1110. #endif
  1111. #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
  1112. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1113. ScaleFilterCols = ScaleFilterCols_16_SSSE3;
  1114. }
  1115. #endif
  1116. if (y > max_y) {
  1117. y = max_y;
  1118. }
  1119. for (j = 0; j < dst_height; ++j) {
  1120. int yi = y >> 16;
  1121. const uint16_t* src = src_ptr + yi * src_stride;
  1122. if (filtering == kFilterLinear) {
  1123. ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
  1124. } else {
  1125. int yf = (y >> 8) & 255;
  1126. InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
  1127. ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
  1128. }
  1129. dst_ptr += dst_stride;
  1130. y += dy;
  1131. if (y > max_y) {
  1132. y = max_y;
  1133. }
  1134. }
  1135. free_aligned_buffer_64(row);
  1136. }
  1137. // Scale up down with bilinear interpolation.
  1138. void ScalePlaneBilinearUp(int src_width,
  1139. int src_height,
  1140. int dst_width,
  1141. int dst_height,
  1142. int src_stride,
  1143. int dst_stride,
  1144. const uint8_t* src_ptr,
  1145. uint8_t* dst_ptr,
  1146. enum FilterMode filtering) {
  1147. int j;
  1148. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1149. int x = 0;
  1150. int y = 0;
  1151. int dx = 0;
  1152. int dy = 0;
  1153. const int max_y = (src_height - 1) << 16;
  1154. void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
  1155. ptrdiff_t src_stride, int dst_width,
  1156. int source_y_fraction) = InterpolateRow_C;
  1157. void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
  1158. int dst_width, int x, int dx) =
  1159. filtering ? ScaleFilterCols_C : ScaleCols_C;
  1160. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  1161. &dx, &dy);
  1162. src_width = Abs(src_width);
  1163. #if defined(HAS_INTERPOLATEROW_SSSE3)
  1164. if (TestCpuFlag(kCpuHasSSSE3)) {
  1165. InterpolateRow = InterpolateRow_Any_SSSE3;
  1166. if (IS_ALIGNED(dst_width, 16)) {
  1167. InterpolateRow = InterpolateRow_SSSE3;
  1168. }
  1169. }
  1170. #endif
  1171. #if defined(HAS_INTERPOLATEROW_AVX2)
  1172. if (TestCpuFlag(kCpuHasAVX2)) {
  1173. InterpolateRow = InterpolateRow_Any_AVX2;
  1174. if (IS_ALIGNED(dst_width, 32)) {
  1175. InterpolateRow = InterpolateRow_AVX2;
  1176. }
  1177. }
  1178. #endif
  1179. #if defined(HAS_INTERPOLATEROW_NEON)
  1180. if (TestCpuFlag(kCpuHasNEON)) {
  1181. InterpolateRow = InterpolateRow_Any_NEON;
  1182. if (IS_ALIGNED(dst_width, 16)) {
  1183. InterpolateRow = InterpolateRow_NEON;
  1184. }
  1185. }
  1186. #endif
  1187. if (filtering && src_width >= 32768) {
  1188. ScaleFilterCols = ScaleFilterCols64_C;
  1189. }
  1190. #if defined(HAS_SCALEFILTERCOLS_SSSE3)
  1191. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1192. ScaleFilterCols = ScaleFilterCols_SSSE3;
  1193. }
  1194. #endif
  1195. #if defined(HAS_SCALEFILTERCOLS_NEON)
  1196. if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
  1197. ScaleFilterCols = ScaleFilterCols_Any_NEON;
  1198. if (IS_ALIGNED(dst_width, 8)) {
  1199. ScaleFilterCols = ScaleFilterCols_NEON;
  1200. }
  1201. }
  1202. #endif
  1203. #if defined(HAS_SCALEFILTERCOLS_MSA)
  1204. if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
  1205. ScaleFilterCols = ScaleFilterCols_Any_MSA;
  1206. if (IS_ALIGNED(dst_width, 16)) {
  1207. ScaleFilterCols = ScaleFilterCols_MSA;
  1208. }
  1209. }
  1210. #endif
  1211. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  1212. ScaleFilterCols = ScaleColsUp2_C;
  1213. #if defined(HAS_SCALECOLS_SSE2)
  1214. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1215. ScaleFilterCols = ScaleColsUp2_SSE2;
  1216. }
  1217. #endif
  1218. #if defined(HAS_SCALECOLS_MMI)
  1219. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
  1220. ScaleFilterCols = ScaleColsUp2_MMI;
  1221. }
  1222. #endif
  1223. }
  1224. if (y > max_y) {
  1225. y = max_y;
  1226. }
  1227. {
  1228. int yi = y >> 16;
  1229. const uint8_t* src = src_ptr + yi * src_stride;
  1230. // Allocate 2 row buffers.
  1231. const int kRowSize = (dst_width + 31) & ~31;
  1232. align_buffer_64(row, kRowSize * 2);
  1233. uint8_t* rowptr = row;
  1234. int rowstride = kRowSize;
  1235. int lasty = yi;
  1236. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1237. if (src_height > 1) {
  1238. src += src_stride;
  1239. }
  1240. ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  1241. src += src_stride;
  1242. for (j = 0; j < dst_height; ++j) {
  1243. yi = y >> 16;
  1244. if (yi != lasty) {
  1245. if (y > max_y) {
  1246. y = max_y;
  1247. yi = y >> 16;
  1248. src = src_ptr + yi * src_stride;
  1249. }
  1250. if (yi != lasty) {
  1251. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1252. rowptr += rowstride;
  1253. rowstride = -rowstride;
  1254. lasty = yi;
  1255. src += src_stride;
  1256. }
  1257. }
  1258. if (filtering == kFilterLinear) {
  1259. InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
  1260. } else {
  1261. int yf = (y >> 8) & 255;
  1262. InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
  1263. }
  1264. dst_ptr += dst_stride;
  1265. y += dy;
  1266. }
  1267. free_aligned_buffer_64(row);
  1268. }
  1269. }
  1270. void ScalePlaneBilinearUp_16(int src_width,
  1271. int src_height,
  1272. int dst_width,
  1273. int dst_height,
  1274. int src_stride,
  1275. int dst_stride,
  1276. const uint16_t* src_ptr,
  1277. uint16_t* dst_ptr,
  1278. enum FilterMode filtering) {
  1279. int j;
  1280. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1281. int x = 0;
  1282. int y = 0;
  1283. int dx = 0;
  1284. int dy = 0;
  1285. const int max_y = (src_height - 1) << 16;
  1286. void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
  1287. ptrdiff_t src_stride, int dst_width,
  1288. int source_y_fraction) = InterpolateRow_16_C;
  1289. void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
  1290. int dst_width, int x, int dx) =
  1291. filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
  1292. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  1293. &dx, &dy);
  1294. src_width = Abs(src_width);
  1295. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  1296. if (TestCpuFlag(kCpuHasSSE2)) {
  1297. InterpolateRow = InterpolateRow_Any_16_SSE2;
  1298. if (IS_ALIGNED(dst_width, 16)) {
  1299. InterpolateRow = InterpolateRow_16_SSE2;
  1300. }
  1301. }
  1302. #endif
  1303. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  1304. if (TestCpuFlag(kCpuHasSSSE3)) {
  1305. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  1306. if (IS_ALIGNED(dst_width, 16)) {
  1307. InterpolateRow = InterpolateRow_16_SSSE3;
  1308. }
  1309. }
  1310. #endif
  1311. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  1312. if (TestCpuFlag(kCpuHasAVX2)) {
  1313. InterpolateRow = InterpolateRow_Any_16_AVX2;
  1314. if (IS_ALIGNED(dst_width, 32)) {
  1315. InterpolateRow = InterpolateRow_16_AVX2;
  1316. }
  1317. }
  1318. #endif
  1319. #if defined(HAS_INTERPOLATEROW_16_NEON)
  1320. if (TestCpuFlag(kCpuHasNEON)) {
  1321. InterpolateRow = InterpolateRow_Any_16_NEON;
  1322. if (IS_ALIGNED(dst_width, 16)) {
  1323. InterpolateRow = InterpolateRow_16_NEON;
  1324. }
  1325. }
  1326. #endif
  1327. if (filtering && src_width >= 32768) {
  1328. ScaleFilterCols = ScaleFilterCols64_16_C;
  1329. }
  1330. #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
  1331. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1332. ScaleFilterCols = ScaleFilterCols_16_SSSE3;
  1333. }
  1334. #endif
  1335. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  1336. ScaleFilterCols = ScaleColsUp2_16_C;
  1337. #if defined(HAS_SCALECOLS_16_SSE2)
  1338. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1339. ScaleFilterCols = ScaleColsUp2_16_SSE2;
  1340. }
  1341. #endif
  1342. #if defined(HAS_SCALECOLS_16_MMI)
  1343. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
  1344. ScaleFilterCols = ScaleColsUp2_16_MMI;
  1345. }
  1346. #endif
  1347. }
  1348. if (y > max_y) {
  1349. y = max_y;
  1350. }
  1351. {
  1352. int yi = y >> 16;
  1353. const uint16_t* src = src_ptr + yi * src_stride;
  1354. // Allocate 2 row buffers.
  1355. const int kRowSize = (dst_width + 31) & ~31;
  1356. align_buffer_64(row, kRowSize * 4);
  1357. uint16_t* rowptr = (uint16_t*)row;
  1358. int rowstride = kRowSize;
  1359. int lasty = yi;
  1360. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1361. if (src_height > 1) {
  1362. src += src_stride;
  1363. }
  1364. ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  1365. src += src_stride;
  1366. for (j = 0; j < dst_height; ++j) {
  1367. yi = y >> 16;
  1368. if (yi != lasty) {
  1369. if (y > max_y) {
  1370. y = max_y;
  1371. yi = y >> 16;
  1372. src = src_ptr + yi * src_stride;
  1373. }
  1374. if (yi != lasty) {
  1375. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1376. rowptr += rowstride;
  1377. rowstride = -rowstride;
  1378. lasty = yi;
  1379. src += src_stride;
  1380. }
  1381. }
  1382. if (filtering == kFilterLinear) {
  1383. InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
  1384. } else {
  1385. int yf = (y >> 8) & 255;
  1386. InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
  1387. }
  1388. dst_ptr += dst_stride;
  1389. y += dy;
  1390. }
  1391. free_aligned_buffer_64(row);
  1392. }
  1393. }
  1394. // Scale Plane to/from any dimensions, without interpolation.
  1395. // Fixed point math is used for performance: The upper 16 bits
  1396. // of x and dx is the integer part of the source position and
  1397. // the lower 16 bits are the fixed decimal part.
  1398. static void ScalePlaneSimple(int src_width,
  1399. int src_height,
  1400. int dst_width,
  1401. int dst_height,
  1402. int src_stride,
  1403. int dst_stride,
  1404. const uint8_t* src_ptr,
  1405. uint8_t* dst_ptr) {
  1406. int i;
  1407. void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
  1408. int x, int dx) = ScaleCols_C;
  1409. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1410. int x = 0;
  1411. int y = 0;
  1412. int dx = 0;
  1413. int dy = 0;
  1414. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
  1415. &dx, &dy);
  1416. src_width = Abs(src_width);
  1417. if (src_width * 2 == dst_width && x < 0x8000) {
  1418. ScaleCols = ScaleColsUp2_C;
  1419. #if defined(HAS_SCALECOLS_SSE2)
  1420. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1421. ScaleCols = ScaleColsUp2_SSE2;
  1422. }
  1423. #endif
  1424. #if defined(HAS_SCALECOLS_MMI)
  1425. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
  1426. ScaleCols = ScaleColsUp2_MMI;
  1427. }
  1428. #endif
  1429. }
  1430. for (i = 0; i < dst_height; ++i) {
  1431. ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
  1432. dst_ptr += dst_stride;
  1433. y += dy;
  1434. }
  1435. }
  1436. static void ScalePlaneSimple_16(int src_width,
  1437. int src_height,
  1438. int dst_width,
  1439. int dst_height,
  1440. int src_stride,
  1441. int dst_stride,
  1442. const uint16_t* src_ptr,
  1443. uint16_t* dst_ptr) {
  1444. int i;
  1445. void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
  1446. int x, int dx) = ScaleCols_16_C;
  1447. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1448. int x = 0;
  1449. int y = 0;
  1450. int dx = 0;
  1451. int dy = 0;
  1452. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
  1453. &dx, &dy);
  1454. src_width = Abs(src_width);
  1455. if (src_width * 2 == dst_width && x < 0x8000) {
  1456. ScaleCols = ScaleColsUp2_16_C;
  1457. #if defined(HAS_SCALECOLS_16_SSE2)
  1458. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1459. ScaleCols = ScaleColsUp2_16_SSE2;
  1460. }
  1461. #endif
  1462. #if defined(HAS_SCALECOLS_16_MMI)
  1463. if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
  1464. ScaleCols = ScaleColsUp2_16_MMI;
  1465. }
  1466. #endif
  1467. }
  1468. for (i = 0; i < dst_height; ++i) {
  1469. ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
  1470. dst_ptr += dst_stride;
  1471. y += dy;
  1472. }
  1473. }
  1474. // Scale a plane.
  1475. // This function dispatches to a specialized scaler based on scale factor.
  1476. LIBYUV_API
  1477. void ScalePlane(const uint8_t* src,
  1478. int src_stride,
  1479. int src_width,
  1480. int src_height,
  1481. uint8_t* dst,
  1482. int dst_stride,
  1483. int dst_width,
  1484. int dst_height,
  1485. enum FilterMode filtering) {
  1486. // Simplify filtering when possible.
  1487. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
  1488. filtering);
  1489. // Negative height means invert the image.
  1490. if (src_height < 0) {
  1491. src_height = -src_height;
  1492. src = src + (src_height - 1) * src_stride;
  1493. src_stride = -src_stride;
  1494. }
  1495. // Use specialized scales to improve performance for common resolutions.
  1496. // For example, all the 1/2 scalings will use ScalePlaneDown2()
  1497. if (dst_width == src_width && dst_height == src_height) {
  1498. // Straight copy.
  1499. CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
  1500. return;
  1501. }
  1502. if (dst_width == src_width && filtering != kFilterBox) {
  1503. int dy = FixedDiv(src_height, dst_height);
  1504. // Arbitrary scale vertically, but unscaled horizontally.
  1505. ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
  1506. dst_stride, src, dst, 0, 0, dy, 1, filtering);
  1507. return;
  1508. }
  1509. if (dst_width <= Abs(src_width) && dst_height <= src_height) {
  1510. // Scale down.
  1511. if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
  1512. // optimized, 3/4
  1513. ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
  1514. dst_stride, src, dst, filtering);
  1515. return;
  1516. }
  1517. if (2 * dst_width == src_width && 2 * dst_height == src_height) {
  1518. // optimized, 1/2
  1519. ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
  1520. dst_stride, src, dst, filtering);
  1521. return;
  1522. }
  1523. // 3/8 rounded up for odd sized chroma height.
  1524. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
  1525. // optimized, 3/8
  1526. ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
  1527. dst_stride, src, dst, filtering);
  1528. return;
  1529. }
  1530. if (4 * dst_width == src_width && 4 * dst_height == src_height &&
  1531. (filtering == kFilterBox || filtering == kFilterNone)) {
  1532. // optimized, 1/4
  1533. ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
  1534. dst_stride, src, dst, filtering);
  1535. return;
  1536. }
  1537. }
  1538. if (filtering == kFilterBox && dst_height * 2 < src_height) {
  1539. ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
  1540. dst_stride, src, dst);
  1541. return;
  1542. }
  1543. if (filtering && dst_height > src_height) {
  1544. ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
  1545. src_stride, dst_stride, src, dst, filtering);
  1546. return;
  1547. }
  1548. if (filtering) {
  1549. ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
  1550. src_stride, dst_stride, src, dst, filtering);
  1551. return;
  1552. }
  1553. ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
  1554. dst_stride, src, dst);
  1555. }
  1556. LIBYUV_API
  1557. void ScalePlane_16(const uint16_t* src,
  1558. int src_stride,
  1559. int src_width,
  1560. int src_height,
  1561. uint16_t* dst,
  1562. int dst_stride,
  1563. int dst_width,
  1564. int dst_height,
  1565. enum FilterMode filtering) {
  1566. // Simplify filtering when possible.
  1567. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
  1568. filtering);
  1569. // Negative height means invert the image.
  1570. if (src_height < 0) {
  1571. src_height = -src_height;
  1572. src = src + (src_height - 1) * src_stride;
  1573. src_stride = -src_stride;
  1574. }
  1575. // Use specialized scales to improve performance for common resolutions.
  1576. // For example, all the 1/2 scalings will use ScalePlaneDown2()
  1577. if (dst_width == src_width && dst_height == src_height) {
  1578. // Straight copy.
  1579. CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
  1580. return;
  1581. }
  1582. if (dst_width == src_width && filtering != kFilterBox) {
  1583. int dy = FixedDiv(src_height, dst_height);
  1584. // Arbitrary scale vertically, but unscaled vertically.
  1585. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
  1586. dst_stride, src, dst, 0, 0, dy, 1, filtering);
  1587. return;
  1588. }
  1589. if (dst_width <= Abs(src_width) && dst_height <= src_height) {
  1590. // Scale down.
  1591. if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
  1592. // optimized, 3/4
  1593. ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
  1594. src_stride, dst_stride, src, dst, filtering);
  1595. return;
  1596. }
  1597. if (2 * dst_width == src_width && 2 * dst_height == src_height) {
  1598. // optimized, 1/2
  1599. ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
  1600. src_stride, dst_stride, src, dst, filtering);
  1601. return;
  1602. }
  1603. // 3/8 rounded up for odd sized chroma height.
  1604. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
  1605. // optimized, 3/8
  1606. ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
  1607. src_stride, dst_stride, src, dst, filtering);
  1608. return;
  1609. }
  1610. if (4 * dst_width == src_width && 4 * dst_height == src_height &&
  1611. (filtering == kFilterBox || filtering == kFilterNone)) {
  1612. // optimized, 1/4
  1613. ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
  1614. src_stride, dst_stride, src, dst, filtering);
  1615. return;
  1616. }
  1617. }
  1618. if (filtering == kFilterBox && dst_height * 2 < src_height) {
  1619. ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
  1620. dst_stride, src, dst);
  1621. return;
  1622. }
  1623. if (filtering && dst_height > src_height) {
  1624. ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
  1625. src_stride, dst_stride, src, dst, filtering);
  1626. return;
  1627. }
  1628. if (filtering) {
  1629. ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
  1630. src_stride, dst_stride, src, dst, filtering);
  1631. return;
  1632. }
  1633. ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
  1634. dst_stride, src, dst);
  1635. }
  1636. // Scale an I420 image.
  1637. // This function in turn calls a scaling function for each plane.
  1638. LIBYUV_API
  1639. int I420Scale(const uint8_t* src_y,
  1640. int src_stride_y,
  1641. const uint8_t* src_u,
  1642. int src_stride_u,
  1643. const uint8_t* src_v,
  1644. int src_stride_v,
  1645. int src_width,
  1646. int src_height,
  1647. uint8_t* dst_y,
  1648. int dst_stride_y,
  1649. uint8_t* dst_u,
  1650. int dst_stride_u,
  1651. uint8_t* dst_v,
  1652. int dst_stride_v,
  1653. int dst_width,
  1654. int dst_height,
  1655. enum FilterMode filtering) {
  1656. int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
  1657. int src_halfheight = SUBSAMPLE(src_height, 1, 1);
  1658. int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
  1659. int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
  1660. if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
  1661. src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
  1662. dst_width <= 0 || dst_height <= 0) {
  1663. return -1;
  1664. }
  1665. ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
  1666. dst_width, dst_height, filtering);
  1667. ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
  1668. dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
  1669. ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
  1670. dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
  1671. return 0;
  1672. }
  1673. LIBYUV_API
  1674. int I420Scale_16(const uint16_t* src_y,
  1675. int src_stride_y,
  1676. const uint16_t* src_u,
  1677. int src_stride_u,
  1678. const uint16_t* src_v,
  1679. int src_stride_v,
  1680. int src_width,
  1681. int src_height,
  1682. uint16_t* dst_y,
  1683. int dst_stride_y,
  1684. uint16_t* dst_u,
  1685. int dst_stride_u,
  1686. uint16_t* dst_v,
  1687. int dst_stride_v,
  1688. int dst_width,
  1689. int dst_height,
  1690. enum FilterMode filtering) {
  1691. int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
  1692. int src_halfheight = SUBSAMPLE(src_height, 1, 1);
  1693. int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
  1694. int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
  1695. if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
  1696. src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
  1697. dst_width <= 0 || dst_height <= 0) {
  1698. return -1;
  1699. }
  1700. ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
  1701. dst_width, dst_height, filtering);
  1702. ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
  1703. dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
  1704. ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
  1705. dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
  1706. return 0;
  1707. }
  1708. // Scale an I444 image.
  1709. // This function in turn calls a scaling function for each plane.
  1710. LIBYUV_API
  1711. int I444Scale(const uint8_t* src_y,
  1712. int src_stride_y,
  1713. const uint8_t* src_u,
  1714. int src_stride_u,
  1715. const uint8_t* src_v,
  1716. int src_stride_v,
  1717. int src_width,
  1718. int src_height,
  1719. uint8_t* dst_y,
  1720. int dst_stride_y,
  1721. uint8_t* dst_u,
  1722. int dst_stride_u,
  1723. uint8_t* dst_v,
  1724. int dst_stride_v,
  1725. int dst_width,
  1726. int dst_height,
  1727. enum FilterMode filtering) {
  1728. if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
  1729. src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
  1730. dst_width <= 0 || dst_height <= 0) {
  1731. return -1;
  1732. }
  1733. ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
  1734. dst_width, dst_height, filtering);
  1735. ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
  1736. dst_width, dst_height, filtering);
  1737. ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
  1738. dst_width, dst_height, filtering);
  1739. return 0;
  1740. }
  1741. LIBYUV_API
  1742. int I444Scale_16(const uint16_t* src_y,
  1743. int src_stride_y,
  1744. const uint16_t* src_u,
  1745. int src_stride_u,
  1746. const uint16_t* src_v,
  1747. int src_stride_v,
  1748. int src_width,
  1749. int src_height,
  1750. uint16_t* dst_y,
  1751. int dst_stride_y,
  1752. uint16_t* dst_u,
  1753. int dst_stride_u,
  1754. uint16_t* dst_v,
  1755. int dst_stride_v,
  1756. int dst_width,
  1757. int dst_height,
  1758. enum FilterMode filtering) {
  1759. if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
  1760. src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
  1761. dst_width <= 0 || dst_height <= 0) {
  1762. return -1;
  1763. }
  1764. ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
  1765. dst_width, dst_height, filtering);
  1766. ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
  1767. dst_width, dst_height, filtering);
  1768. ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
  1769. dst_width, dst_height, filtering);
  1770. return 0;
  1771. }
  1772. // Deprecated api
  1773. LIBYUV_API
  1774. int Scale(const uint8_t* src_y,
  1775. const uint8_t* src_u,
  1776. const uint8_t* src_v,
  1777. int src_stride_y,
  1778. int src_stride_u,
  1779. int src_stride_v,
  1780. int src_width,
  1781. int src_height,
  1782. uint8_t* dst_y,
  1783. uint8_t* dst_u,
  1784. uint8_t* dst_v,
  1785. int dst_stride_y,
  1786. int dst_stride_u,
  1787. int dst_stride_v,
  1788. int dst_width,
  1789. int dst_height,
  1790. LIBYUV_BOOL interpolate) {
  1791. return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
  1792. src_stride_v, src_width, src_height, dst_y, dst_stride_y,
  1793. dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
  1794. dst_height, interpolate ? kFilterBox : kFilterNone);
  1795. }
  1796. #ifdef __cplusplus
  1797. } // extern "C"
  1798. } // namespace libyuv
  1799. #endif