planar_test.cc 117 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdlib.h>
  12. #include <time.h>
  13. #include "../unit_test/unit_test.h"
  14. #include "libyuv/compare.h"
  15. #include "libyuv/convert.h"
  16. #include "libyuv/convert_argb.h"
  17. #include "libyuv/convert_from.h"
  18. #include "libyuv/convert_from_argb.h"
  19. #include "libyuv/cpu_id.h"
  20. #include "libyuv/planar_functions.h"
  21. #include "libyuv/rotate.h"
  22. #ifdef ENABLE_ROW_TESTS
  23. // row.h defines SIMD_ALIGNED, overriding unit_test.h
  24. // TODO(fbarchard): Remove row.h from unittests. Test public functions.
  25. #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
  26. #endif
  27. namespace libyuv {
  28. TEST_F(LibYUVPlanarTest, TestAttenuate) {
  29. const int kSize = 1280 * 4;
  30. align_buffer_page_end(orig_pixels, kSize);
  31. align_buffer_page_end(atten_pixels, kSize);
  32. align_buffer_page_end(unatten_pixels, kSize);
  33. align_buffer_page_end(atten2_pixels, kSize);
  34. // Test unattenuation clamps
  35. orig_pixels[0 * 4 + 0] = 200u;
  36. orig_pixels[0 * 4 + 1] = 129u;
  37. orig_pixels[0 * 4 + 2] = 127u;
  38. orig_pixels[0 * 4 + 3] = 128u;
  39. // Test unattenuation transparent and opaque are unaffected
  40. orig_pixels[1 * 4 + 0] = 16u;
  41. orig_pixels[1 * 4 + 1] = 64u;
  42. orig_pixels[1 * 4 + 2] = 192u;
  43. orig_pixels[1 * 4 + 3] = 0u;
  44. orig_pixels[2 * 4 + 0] = 16u;
  45. orig_pixels[2 * 4 + 1] = 64u;
  46. orig_pixels[2 * 4 + 2] = 192u;
  47. orig_pixels[2 * 4 + 3] = 255u;
  48. orig_pixels[3 * 4 + 0] = 16u;
  49. orig_pixels[3 * 4 + 1] = 64u;
  50. orig_pixels[3 * 4 + 2] = 192u;
  51. orig_pixels[3 * 4 + 3] = 128u;
  52. ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
  53. EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
  54. EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
  55. EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
  56. EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
  57. EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
  58. EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
  59. EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
  60. EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
  61. EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
  62. EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
  63. EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
  64. EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
  65. EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
  66. EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
  67. EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
  68. EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
  69. for (int i = 0; i < 1280; ++i) {
  70. orig_pixels[i * 4 + 0] = i;
  71. orig_pixels[i * 4 + 1] = i / 2;
  72. orig_pixels[i * 4 + 2] = i / 3;
  73. orig_pixels[i * 4 + 3] = i;
  74. }
  75. ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
  76. ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
  77. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  78. ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
  79. }
  80. for (int i = 0; i < 1280; ++i) {
  81. EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
  82. EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
  83. EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
  84. EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
  85. }
  86. // Make sure transparent, 50% and opaque are fully accurate.
  87. EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
  88. EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
  89. EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
  90. EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
  91. EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
  92. EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
  93. EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
  94. EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
  95. EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
  96. EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
  97. EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
  98. EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
  99. free_aligned_buffer_page_end(atten2_pixels);
  100. free_aligned_buffer_page_end(unatten_pixels);
  101. free_aligned_buffer_page_end(atten_pixels);
  102. free_aligned_buffer_page_end(orig_pixels);
  103. }
  104. static int TestAttenuateI(int width,
  105. int height,
  106. int benchmark_iterations,
  107. int disable_cpu_flags,
  108. int benchmark_cpu_info,
  109. int invert,
  110. int off) {
  111. if (width < 1) {
  112. width = 1;
  113. }
  114. const int kBpp = 4;
  115. const int kStride = width * kBpp;
  116. align_buffer_page_end(src_argb, kStride * height + off);
  117. align_buffer_page_end(dst_argb_c, kStride * height);
  118. align_buffer_page_end(dst_argb_opt, kStride * height);
  119. for (int i = 0; i < kStride * height; ++i) {
  120. src_argb[i + off] = (fastrand() & 0xff);
  121. }
  122. memset(dst_argb_c, 0, kStride * height);
  123. memset(dst_argb_opt, 0, kStride * height);
  124. MaskCpuFlags(disable_cpu_flags);
  125. ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
  126. invert * height);
  127. MaskCpuFlags(benchmark_cpu_info);
  128. for (int i = 0; i < benchmark_iterations; ++i) {
  129. ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
  130. invert * height);
  131. }
  132. int max_diff = 0;
  133. for (int i = 0; i < kStride * height; ++i) {
  134. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  135. static_cast<int>(dst_argb_opt[i]));
  136. if (abs_diff > max_diff) {
  137. max_diff = abs_diff;
  138. }
  139. }
  140. free_aligned_buffer_page_end(src_argb);
  141. free_aligned_buffer_page_end(dst_argb_c);
  142. free_aligned_buffer_page_end(dst_argb_opt);
  143. return max_diff;
  144. }
  145. TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
  146. int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_,
  147. benchmark_iterations_, disable_cpu_flags_,
  148. benchmark_cpu_info_, +1, 0);
  149. EXPECT_LE(max_diff, 2);
  150. }
  151. TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
  152. int max_diff =
  153. TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
  154. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  155. EXPECT_LE(max_diff, 2);
  156. }
  157. TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
  158. int max_diff =
  159. TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
  160. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  161. EXPECT_LE(max_diff, 2);
  162. }
  163. TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
  164. int max_diff =
  165. TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
  166. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  167. EXPECT_LE(max_diff, 2);
  168. }
  169. static int TestUnattenuateI(int width,
  170. int height,
  171. int benchmark_iterations,
  172. int disable_cpu_flags,
  173. int benchmark_cpu_info,
  174. int invert,
  175. int off) {
  176. if (width < 1) {
  177. width = 1;
  178. }
  179. const int kBpp = 4;
  180. const int kStride = width * kBpp;
  181. align_buffer_page_end(src_argb, kStride * height + off);
  182. align_buffer_page_end(dst_argb_c, kStride * height);
  183. align_buffer_page_end(dst_argb_opt, kStride * height);
  184. for (int i = 0; i < kStride * height; ++i) {
  185. src_argb[i + off] = (fastrand() & 0xff);
  186. }
  187. ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
  188. height);
  189. memset(dst_argb_c, 0, kStride * height);
  190. memset(dst_argb_opt, 0, kStride * height);
  191. MaskCpuFlags(disable_cpu_flags);
  192. ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
  193. invert * height);
  194. MaskCpuFlags(benchmark_cpu_info);
  195. for (int i = 0; i < benchmark_iterations; ++i) {
  196. ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
  197. invert * height);
  198. }
  199. int max_diff = 0;
  200. for (int i = 0; i < kStride * height; ++i) {
  201. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  202. static_cast<int>(dst_argb_opt[i]));
  203. if (abs_diff > max_diff) {
  204. max_diff = abs_diff;
  205. }
  206. }
  207. free_aligned_buffer_page_end(src_argb);
  208. free_aligned_buffer_page_end(dst_argb_c);
  209. free_aligned_buffer_page_end(dst_argb_opt);
  210. return max_diff;
  211. }
  212. TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
  213. int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
  214. benchmark_iterations_, disable_cpu_flags_,
  215. benchmark_cpu_info_, +1, 0);
  216. EXPECT_LE(max_diff, 2);
  217. }
  218. TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
  219. int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
  220. benchmark_iterations_, disable_cpu_flags_,
  221. benchmark_cpu_info_, +1, 1);
  222. EXPECT_LE(max_diff, 2);
  223. }
  224. TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
  225. int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
  226. benchmark_iterations_, disable_cpu_flags_,
  227. benchmark_cpu_info_, -1, 0);
  228. EXPECT_LE(max_diff, 2);
  229. }
  230. TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
  231. int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
  232. benchmark_iterations_, disable_cpu_flags_,
  233. benchmark_cpu_info_, +1, 0);
  234. EXPECT_LE(max_diff, 2);
  235. }
  236. TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
  237. SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
  238. SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
  239. for (int y = 0; y < 16; ++y) {
  240. for (int x = 0; x < 16; ++x) {
  241. orig_pixels[y][x][0] = 1u;
  242. orig_pixels[y][x][1] = 2u;
  243. orig_pixels[y][x][2] = 3u;
  244. orig_pixels[y][x][3] = 255u;
  245. }
  246. }
  247. ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
  248. &added_pixels[0][0][0], 16 * 4, 16, 16);
  249. for (int y = 0; y < 16; ++y) {
  250. for (int x = 0; x < 16; ++x) {
  251. EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
  252. EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
  253. EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
  254. EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
  255. }
  256. }
  257. }
  258. TEST_F(LibYUVPlanarTest, TestARGBGray) {
  259. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  260. memset(orig_pixels, 0, sizeof(orig_pixels));
  261. // Test blue
  262. orig_pixels[0][0] = 255u;
  263. orig_pixels[0][1] = 0u;
  264. orig_pixels[0][2] = 0u;
  265. orig_pixels[0][3] = 128u;
  266. // Test green
  267. orig_pixels[1][0] = 0u;
  268. orig_pixels[1][1] = 255u;
  269. orig_pixels[1][2] = 0u;
  270. orig_pixels[1][3] = 0u;
  271. // Test red
  272. orig_pixels[2][0] = 0u;
  273. orig_pixels[2][1] = 0u;
  274. orig_pixels[2][2] = 255u;
  275. orig_pixels[2][3] = 255u;
  276. // Test black
  277. orig_pixels[3][0] = 0u;
  278. orig_pixels[3][1] = 0u;
  279. orig_pixels[3][2] = 0u;
  280. orig_pixels[3][3] = 255u;
  281. // Test white
  282. orig_pixels[4][0] = 255u;
  283. orig_pixels[4][1] = 255u;
  284. orig_pixels[4][2] = 255u;
  285. orig_pixels[4][3] = 255u;
  286. // Test color
  287. orig_pixels[5][0] = 16u;
  288. orig_pixels[5][1] = 64u;
  289. orig_pixels[5][2] = 192u;
  290. orig_pixels[5][3] = 224u;
  291. // Do 16 to test asm version.
  292. ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
  293. EXPECT_EQ(30u, orig_pixels[0][0]);
  294. EXPECT_EQ(30u, orig_pixels[0][1]);
  295. EXPECT_EQ(30u, orig_pixels[0][2]);
  296. EXPECT_EQ(128u, orig_pixels[0][3]);
  297. EXPECT_EQ(149u, orig_pixels[1][0]);
  298. EXPECT_EQ(149u, orig_pixels[1][1]);
  299. EXPECT_EQ(149u, orig_pixels[1][2]);
  300. EXPECT_EQ(0u, orig_pixels[1][3]);
  301. EXPECT_EQ(76u, orig_pixels[2][0]);
  302. EXPECT_EQ(76u, orig_pixels[2][1]);
  303. EXPECT_EQ(76u, orig_pixels[2][2]);
  304. EXPECT_EQ(255u, orig_pixels[2][3]);
  305. EXPECT_EQ(0u, orig_pixels[3][0]);
  306. EXPECT_EQ(0u, orig_pixels[3][1]);
  307. EXPECT_EQ(0u, orig_pixels[3][2]);
  308. EXPECT_EQ(255u, orig_pixels[3][3]);
  309. EXPECT_EQ(255u, orig_pixels[4][0]);
  310. EXPECT_EQ(255u, orig_pixels[4][1]);
  311. EXPECT_EQ(255u, orig_pixels[4][2]);
  312. EXPECT_EQ(255u, orig_pixels[4][3]);
  313. EXPECT_EQ(96u, orig_pixels[5][0]);
  314. EXPECT_EQ(96u, orig_pixels[5][1]);
  315. EXPECT_EQ(96u, orig_pixels[5][2]);
  316. EXPECT_EQ(224u, orig_pixels[5][3]);
  317. for (int i = 0; i < 1280; ++i) {
  318. orig_pixels[i][0] = i;
  319. orig_pixels[i][1] = i / 2;
  320. orig_pixels[i][2] = i / 3;
  321. orig_pixels[i][3] = i;
  322. }
  323. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  324. ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
  325. }
  326. }
  327. TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
  328. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  329. SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
  330. memset(orig_pixels, 0, sizeof(orig_pixels));
  331. // Test blue
  332. orig_pixels[0][0] = 255u;
  333. orig_pixels[0][1] = 0u;
  334. orig_pixels[0][2] = 0u;
  335. orig_pixels[0][3] = 128u;
  336. // Test green
  337. orig_pixels[1][0] = 0u;
  338. orig_pixels[1][1] = 255u;
  339. orig_pixels[1][2] = 0u;
  340. orig_pixels[1][3] = 0u;
  341. // Test red
  342. orig_pixels[2][0] = 0u;
  343. orig_pixels[2][1] = 0u;
  344. orig_pixels[2][2] = 255u;
  345. orig_pixels[2][3] = 255u;
  346. // Test black
  347. orig_pixels[3][0] = 0u;
  348. orig_pixels[3][1] = 0u;
  349. orig_pixels[3][2] = 0u;
  350. orig_pixels[3][3] = 255u;
  351. // Test white
  352. orig_pixels[4][0] = 255u;
  353. orig_pixels[4][1] = 255u;
  354. orig_pixels[4][2] = 255u;
  355. orig_pixels[4][3] = 255u;
  356. // Test color
  357. orig_pixels[5][0] = 16u;
  358. orig_pixels[5][1] = 64u;
  359. orig_pixels[5][2] = 192u;
  360. orig_pixels[5][3] = 224u;
  361. // Do 16 to test asm version.
  362. ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
  363. EXPECT_EQ(30u, gray_pixels[0][0]);
  364. EXPECT_EQ(30u, gray_pixels[0][1]);
  365. EXPECT_EQ(30u, gray_pixels[0][2]);
  366. EXPECT_EQ(128u, gray_pixels[0][3]);
  367. EXPECT_EQ(149u, gray_pixels[1][0]);
  368. EXPECT_EQ(149u, gray_pixels[1][1]);
  369. EXPECT_EQ(149u, gray_pixels[1][2]);
  370. EXPECT_EQ(0u, gray_pixels[1][3]);
  371. EXPECT_EQ(76u, gray_pixels[2][0]);
  372. EXPECT_EQ(76u, gray_pixels[2][1]);
  373. EXPECT_EQ(76u, gray_pixels[2][2]);
  374. EXPECT_EQ(255u, gray_pixels[2][3]);
  375. EXPECT_EQ(0u, gray_pixels[3][0]);
  376. EXPECT_EQ(0u, gray_pixels[3][1]);
  377. EXPECT_EQ(0u, gray_pixels[3][2]);
  378. EXPECT_EQ(255u, gray_pixels[3][3]);
  379. EXPECT_EQ(255u, gray_pixels[4][0]);
  380. EXPECT_EQ(255u, gray_pixels[4][1]);
  381. EXPECT_EQ(255u, gray_pixels[4][2]);
  382. EXPECT_EQ(255u, gray_pixels[4][3]);
  383. EXPECT_EQ(96u, gray_pixels[5][0]);
  384. EXPECT_EQ(96u, gray_pixels[5][1]);
  385. EXPECT_EQ(96u, gray_pixels[5][2]);
  386. EXPECT_EQ(224u, gray_pixels[5][3]);
  387. for (int i = 0; i < 1280; ++i) {
  388. orig_pixels[i][0] = i;
  389. orig_pixels[i][1] = i / 2;
  390. orig_pixels[i][2] = i / 3;
  391. orig_pixels[i][3] = i;
  392. }
  393. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  394. ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
  395. }
  396. }
  397. TEST_F(LibYUVPlanarTest, TestARGBSepia) {
  398. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  399. memset(orig_pixels, 0, sizeof(orig_pixels));
  400. // Test blue
  401. orig_pixels[0][0] = 255u;
  402. orig_pixels[0][1] = 0u;
  403. orig_pixels[0][2] = 0u;
  404. orig_pixels[0][3] = 128u;
  405. // Test green
  406. orig_pixels[1][0] = 0u;
  407. orig_pixels[1][1] = 255u;
  408. orig_pixels[1][2] = 0u;
  409. orig_pixels[1][3] = 0u;
  410. // Test red
  411. orig_pixels[2][0] = 0u;
  412. orig_pixels[2][1] = 0u;
  413. orig_pixels[2][2] = 255u;
  414. orig_pixels[2][3] = 255u;
  415. // Test black
  416. orig_pixels[3][0] = 0u;
  417. orig_pixels[3][1] = 0u;
  418. orig_pixels[3][2] = 0u;
  419. orig_pixels[3][3] = 255u;
  420. // Test white
  421. orig_pixels[4][0] = 255u;
  422. orig_pixels[4][1] = 255u;
  423. orig_pixels[4][2] = 255u;
  424. orig_pixels[4][3] = 255u;
  425. // Test color
  426. orig_pixels[5][0] = 16u;
  427. orig_pixels[5][1] = 64u;
  428. orig_pixels[5][2] = 192u;
  429. orig_pixels[5][3] = 224u;
  430. // Do 16 to test asm version.
  431. ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
  432. EXPECT_EQ(33u, orig_pixels[0][0]);
  433. EXPECT_EQ(43u, orig_pixels[0][1]);
  434. EXPECT_EQ(47u, orig_pixels[0][2]);
  435. EXPECT_EQ(128u, orig_pixels[0][3]);
  436. EXPECT_EQ(135u, orig_pixels[1][0]);
  437. EXPECT_EQ(175u, orig_pixels[1][1]);
  438. EXPECT_EQ(195u, orig_pixels[1][2]);
  439. EXPECT_EQ(0u, orig_pixels[1][3]);
  440. EXPECT_EQ(69u, orig_pixels[2][0]);
  441. EXPECT_EQ(89u, orig_pixels[2][1]);
  442. EXPECT_EQ(99u, orig_pixels[2][2]);
  443. EXPECT_EQ(255u, orig_pixels[2][3]);
  444. EXPECT_EQ(0u, orig_pixels[3][0]);
  445. EXPECT_EQ(0u, orig_pixels[3][1]);
  446. EXPECT_EQ(0u, orig_pixels[3][2]);
  447. EXPECT_EQ(255u, orig_pixels[3][3]);
  448. EXPECT_EQ(239u, orig_pixels[4][0]);
  449. EXPECT_EQ(255u, orig_pixels[4][1]);
  450. EXPECT_EQ(255u, orig_pixels[4][2]);
  451. EXPECT_EQ(255u, orig_pixels[4][3]);
  452. EXPECT_EQ(88u, orig_pixels[5][0]);
  453. EXPECT_EQ(114u, orig_pixels[5][1]);
  454. EXPECT_EQ(127u, orig_pixels[5][2]);
  455. EXPECT_EQ(224u, orig_pixels[5][3]);
  456. for (int i = 0; i < 1280; ++i) {
  457. orig_pixels[i][0] = i;
  458. orig_pixels[i][1] = i / 2;
  459. orig_pixels[i][2] = i / 3;
  460. orig_pixels[i][3] = i;
  461. }
  462. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  463. ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
  464. }
  465. }
  466. TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
  467. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  468. SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
  469. SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
  470. // Matrix for Sepia.
  471. SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
  472. 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
  473. 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha.
  474. };
  475. memset(orig_pixels, 0, sizeof(orig_pixels));
  476. // Test blue
  477. orig_pixels[0][0] = 255u;
  478. orig_pixels[0][1] = 0u;
  479. orig_pixels[0][2] = 0u;
  480. orig_pixels[0][3] = 128u;
  481. // Test green
  482. orig_pixels[1][0] = 0u;
  483. orig_pixels[1][1] = 255u;
  484. orig_pixels[1][2] = 0u;
  485. orig_pixels[1][3] = 0u;
  486. // Test red
  487. orig_pixels[2][0] = 0u;
  488. orig_pixels[2][1] = 0u;
  489. orig_pixels[2][2] = 255u;
  490. orig_pixels[2][3] = 255u;
  491. // Test color
  492. orig_pixels[3][0] = 16u;
  493. orig_pixels[3][1] = 64u;
  494. orig_pixels[3][2] = 192u;
  495. orig_pixels[3][3] = 224u;
  496. // Do 16 to test asm version.
  497. ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  498. &kRGBToSepia[0], 16, 1);
  499. EXPECT_EQ(31u, dst_pixels_opt[0][0]);
  500. EXPECT_EQ(43u, dst_pixels_opt[0][1]);
  501. EXPECT_EQ(47u, dst_pixels_opt[0][2]);
  502. EXPECT_EQ(128u, dst_pixels_opt[0][3]);
  503. EXPECT_EQ(135u, dst_pixels_opt[1][0]);
  504. EXPECT_EQ(175u, dst_pixels_opt[1][1]);
  505. EXPECT_EQ(195u, dst_pixels_opt[1][2]);
  506. EXPECT_EQ(0u, dst_pixels_opt[1][3]);
  507. EXPECT_EQ(67u, dst_pixels_opt[2][0]);
  508. EXPECT_EQ(87u, dst_pixels_opt[2][1]);
  509. EXPECT_EQ(99u, dst_pixels_opt[2][2]);
  510. EXPECT_EQ(255u, dst_pixels_opt[2][3]);
  511. EXPECT_EQ(87u, dst_pixels_opt[3][0]);
  512. EXPECT_EQ(112u, dst_pixels_opt[3][1]);
  513. EXPECT_EQ(127u, dst_pixels_opt[3][2]);
  514. EXPECT_EQ(224u, dst_pixels_opt[3][3]);
  515. for (int i = 0; i < 1280; ++i) {
  516. orig_pixels[i][0] = i;
  517. orig_pixels[i][1] = i / 2;
  518. orig_pixels[i][2] = i / 3;
  519. orig_pixels[i][3] = i;
  520. }
  521. MaskCpuFlags(disable_cpu_flags_);
  522. ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
  523. &kRGBToSepia[0], 1280, 1);
  524. MaskCpuFlags(benchmark_cpu_info_);
  525. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  526. ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  527. &kRGBToSepia[0], 1280, 1);
  528. }
  529. for (int i = 0; i < 1280; ++i) {
  530. EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
  531. EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
  532. EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
  533. EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
  534. }
  535. }
  536. TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
  537. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  538. // Matrix for Sepia.
  539. SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
  540. 17, 68, 35, 0, 22, 88, 45, 0,
  541. 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes.
  542. };
  543. memset(orig_pixels, 0, sizeof(orig_pixels));
  544. // Test blue
  545. orig_pixels[0][0] = 255u;
  546. orig_pixels[0][1] = 0u;
  547. orig_pixels[0][2] = 0u;
  548. orig_pixels[0][3] = 128u;
  549. // Test green
  550. orig_pixels[1][0] = 0u;
  551. orig_pixels[1][1] = 255u;
  552. orig_pixels[1][2] = 0u;
  553. orig_pixels[1][3] = 0u;
  554. // Test red
  555. orig_pixels[2][0] = 0u;
  556. orig_pixels[2][1] = 0u;
  557. orig_pixels[2][2] = 255u;
  558. orig_pixels[2][3] = 255u;
  559. // Test color
  560. orig_pixels[3][0] = 16u;
  561. orig_pixels[3][1] = 64u;
  562. orig_pixels[3][2] = 192u;
  563. orig_pixels[3][3] = 224u;
  564. // Do 16 to test asm version.
  565. RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
  566. EXPECT_EQ(31u, orig_pixels[0][0]);
  567. EXPECT_EQ(43u, orig_pixels[0][1]);
  568. EXPECT_EQ(47u, orig_pixels[0][2]);
  569. EXPECT_EQ(128u, orig_pixels[0][3]);
  570. EXPECT_EQ(135u, orig_pixels[1][0]);
  571. EXPECT_EQ(175u, orig_pixels[1][1]);
  572. EXPECT_EQ(195u, orig_pixels[1][2]);
  573. EXPECT_EQ(0u, orig_pixels[1][3]);
  574. EXPECT_EQ(67u, orig_pixels[2][0]);
  575. EXPECT_EQ(87u, orig_pixels[2][1]);
  576. EXPECT_EQ(99u, orig_pixels[2][2]);
  577. EXPECT_EQ(255u, orig_pixels[2][3]);
  578. EXPECT_EQ(87u, orig_pixels[3][0]);
  579. EXPECT_EQ(112u, orig_pixels[3][1]);
  580. EXPECT_EQ(127u, orig_pixels[3][2]);
  581. EXPECT_EQ(224u, orig_pixels[3][3]);
  582. for (int i = 0; i < 1280; ++i) {
  583. orig_pixels[i][0] = i;
  584. orig_pixels[i][1] = i / 2;
  585. orig_pixels[i][2] = i / 3;
  586. orig_pixels[i][3] = i;
  587. }
  588. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  589. RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
  590. }
  591. }
  592. TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
  593. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  594. memset(orig_pixels, 0, sizeof(orig_pixels));
  595. // Matrix for Sepia.
  596. static const uint8_t kARGBTable[256 * 4] = {
  597. 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
  598. };
  599. orig_pixels[0][0] = 0u;
  600. orig_pixels[0][1] = 0u;
  601. orig_pixels[0][2] = 0u;
  602. orig_pixels[0][3] = 0u;
  603. orig_pixels[1][0] = 1u;
  604. orig_pixels[1][1] = 1u;
  605. orig_pixels[1][2] = 1u;
  606. orig_pixels[1][3] = 1u;
  607. orig_pixels[2][0] = 2u;
  608. orig_pixels[2][1] = 2u;
  609. orig_pixels[2][2] = 2u;
  610. orig_pixels[2][3] = 2u;
  611. orig_pixels[3][0] = 0u;
  612. orig_pixels[3][1] = 1u;
  613. orig_pixels[3][2] = 2u;
  614. orig_pixels[3][3] = 3u;
  615. // Do 16 to test asm version.
  616. ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
  617. EXPECT_EQ(1u, orig_pixels[0][0]);
  618. EXPECT_EQ(2u, orig_pixels[0][1]);
  619. EXPECT_EQ(3u, orig_pixels[0][2]);
  620. EXPECT_EQ(4u, orig_pixels[0][3]);
  621. EXPECT_EQ(5u, orig_pixels[1][0]);
  622. EXPECT_EQ(6u, orig_pixels[1][1]);
  623. EXPECT_EQ(7u, orig_pixels[1][2]);
  624. EXPECT_EQ(8u, orig_pixels[1][3]);
  625. EXPECT_EQ(9u, orig_pixels[2][0]);
  626. EXPECT_EQ(10u, orig_pixels[2][1]);
  627. EXPECT_EQ(11u, orig_pixels[2][2]);
  628. EXPECT_EQ(12u, orig_pixels[2][3]);
  629. EXPECT_EQ(1u, orig_pixels[3][0]);
  630. EXPECT_EQ(6u, orig_pixels[3][1]);
  631. EXPECT_EQ(11u, orig_pixels[3][2]);
  632. EXPECT_EQ(16u, orig_pixels[3][3]);
  633. for (int i = 0; i < 1280; ++i) {
  634. orig_pixels[i][0] = i;
  635. orig_pixels[i][1] = i / 2;
  636. orig_pixels[i][2] = i / 3;
  637. orig_pixels[i][3] = i;
  638. }
  639. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  640. ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
  641. }
  642. }
  643. // Same as TestARGBColorTable except alpha does not change.
  644. TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
  645. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  646. memset(orig_pixels, 0, sizeof(orig_pixels));
  647. // Matrix for Sepia.
  648. static const uint8_t kARGBTable[256 * 4] = {
  649. 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
  650. };
  651. orig_pixels[0][0] = 0u;
  652. orig_pixels[0][1] = 0u;
  653. orig_pixels[0][2] = 0u;
  654. orig_pixels[0][3] = 0u;
  655. orig_pixels[1][0] = 1u;
  656. orig_pixels[1][1] = 1u;
  657. orig_pixels[1][2] = 1u;
  658. orig_pixels[1][3] = 1u;
  659. orig_pixels[2][0] = 2u;
  660. orig_pixels[2][1] = 2u;
  661. orig_pixels[2][2] = 2u;
  662. orig_pixels[2][3] = 2u;
  663. orig_pixels[3][0] = 0u;
  664. orig_pixels[3][1] = 1u;
  665. orig_pixels[3][2] = 2u;
  666. orig_pixels[3][3] = 3u;
  667. // Do 16 to test asm version.
  668. RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
  669. EXPECT_EQ(1u, orig_pixels[0][0]);
  670. EXPECT_EQ(2u, orig_pixels[0][1]);
  671. EXPECT_EQ(3u, orig_pixels[0][2]);
  672. EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged.
  673. EXPECT_EQ(5u, orig_pixels[1][0]);
  674. EXPECT_EQ(6u, orig_pixels[1][1]);
  675. EXPECT_EQ(7u, orig_pixels[1][2]);
  676. EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged.
  677. EXPECT_EQ(9u, orig_pixels[2][0]);
  678. EXPECT_EQ(10u, orig_pixels[2][1]);
  679. EXPECT_EQ(11u, orig_pixels[2][2]);
  680. EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged.
  681. EXPECT_EQ(1u, orig_pixels[3][0]);
  682. EXPECT_EQ(6u, orig_pixels[3][1]);
  683. EXPECT_EQ(11u, orig_pixels[3][2]);
  684. EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged.
  685. for (int i = 0; i < 1280; ++i) {
  686. orig_pixels[i][0] = i;
  687. orig_pixels[i][1] = i / 2;
  688. orig_pixels[i][2] = i / 3;
  689. orig_pixels[i][3] = i;
  690. }
  691. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  692. RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
  693. }
  694. }
  695. TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
  696. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  697. for (int i = 0; i < 1280; ++i) {
  698. orig_pixels[i][0] = i;
  699. orig_pixels[i][1] = i / 2;
  700. orig_pixels[i][2] = i / 3;
  701. orig_pixels[i][3] = i;
  702. }
  703. ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
  704. 1280, 1);
  705. for (int i = 0; i < 1280; ++i) {
  706. EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
  707. EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
  708. EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
  709. EXPECT_EQ(i & 255, orig_pixels[i][3]);
  710. }
  711. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  712. ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
  713. 1280, 1);
  714. }
  715. }
  716. TEST_F(LibYUVPlanarTest, TestARGBMirror) {
  717. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  718. SIMD_ALIGNED(uint8_t dst_pixels[1280][4]);
  719. for (int i = 0; i < 1280; ++i) {
  720. orig_pixels[i][0] = i;
  721. orig_pixels[i][1] = i / 2;
  722. orig_pixels[i][2] = i / 3;
  723. orig_pixels[i][3] = i / 4;
  724. }
  725. ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
  726. for (int i = 0; i < 1280; ++i) {
  727. EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]);
  728. EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]);
  729. EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]);
  730. EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]);
  731. }
  732. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  733. ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
  734. }
  735. }
  736. TEST_F(LibYUVPlanarTest, TestShade) {
  737. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  738. SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
  739. memset(orig_pixels, 0, sizeof(orig_pixels));
  740. orig_pixels[0][0] = 10u;
  741. orig_pixels[0][1] = 20u;
  742. orig_pixels[0][2] = 40u;
  743. orig_pixels[0][3] = 80u;
  744. orig_pixels[1][0] = 0u;
  745. orig_pixels[1][1] = 0u;
  746. orig_pixels[1][2] = 0u;
  747. orig_pixels[1][3] = 255u;
  748. orig_pixels[2][0] = 0u;
  749. orig_pixels[2][1] = 0u;
  750. orig_pixels[2][2] = 0u;
  751. orig_pixels[2][3] = 0u;
  752. orig_pixels[3][0] = 0u;
  753. orig_pixels[3][1] = 0u;
  754. orig_pixels[3][2] = 0u;
  755. orig_pixels[3][3] = 0u;
  756. // Do 8 pixels to allow opt version to be used.
  757. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
  758. EXPECT_EQ(10u, shade_pixels[0][0]);
  759. EXPECT_EQ(20u, shade_pixels[0][1]);
  760. EXPECT_EQ(40u, shade_pixels[0][2]);
  761. EXPECT_EQ(40u, shade_pixels[0][3]);
  762. EXPECT_EQ(0u, shade_pixels[1][0]);
  763. EXPECT_EQ(0u, shade_pixels[1][1]);
  764. EXPECT_EQ(0u, shade_pixels[1][2]);
  765. EXPECT_EQ(128u, shade_pixels[1][3]);
  766. EXPECT_EQ(0u, shade_pixels[2][0]);
  767. EXPECT_EQ(0u, shade_pixels[2][1]);
  768. EXPECT_EQ(0u, shade_pixels[2][2]);
  769. EXPECT_EQ(0u, shade_pixels[2][3]);
  770. EXPECT_EQ(0u, shade_pixels[3][0]);
  771. EXPECT_EQ(0u, shade_pixels[3][1]);
  772. EXPECT_EQ(0u, shade_pixels[3][2]);
  773. EXPECT_EQ(0u, shade_pixels[3][3]);
  774. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
  775. EXPECT_EQ(5u, shade_pixels[0][0]);
  776. EXPECT_EQ(10u, shade_pixels[0][1]);
  777. EXPECT_EQ(20u, shade_pixels[0][2]);
  778. EXPECT_EQ(40u, shade_pixels[0][3]);
  779. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
  780. EXPECT_EQ(5u, shade_pixels[0][0]);
  781. EXPECT_EQ(5u, shade_pixels[0][1]);
  782. EXPECT_EQ(5u, shade_pixels[0][2]);
  783. EXPECT_EQ(5u, shade_pixels[0][3]);
  784. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  785. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
  786. 0x80808080);
  787. }
  788. }
  789. TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
  790. SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
  791. SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
  792. SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
  793. memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
  794. memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
  795. orig_pixels_0[0][0] = 16u;
  796. orig_pixels_0[0][1] = 32u;
  797. orig_pixels_0[0][2] = 64u;
  798. orig_pixels_0[0][3] = 128u;
  799. orig_pixels_0[1][0] = 0u;
  800. orig_pixels_0[1][1] = 0u;
  801. orig_pixels_0[1][2] = 0u;
  802. orig_pixels_0[1][3] = 255u;
  803. orig_pixels_0[2][0] = 0u;
  804. orig_pixels_0[2][1] = 0u;
  805. orig_pixels_0[2][2] = 0u;
  806. orig_pixels_0[2][3] = 0u;
  807. orig_pixels_0[3][0] = 0u;
  808. orig_pixels_0[3][1] = 0u;
  809. orig_pixels_0[3][2] = 0u;
  810. orig_pixels_0[3][3] = 0u;
  811. orig_pixels_1[0][0] = 0u;
  812. orig_pixels_1[0][1] = 0u;
  813. orig_pixels_1[0][2] = 0u;
  814. orig_pixels_1[0][3] = 0u;
  815. orig_pixels_1[1][0] = 0u;
  816. orig_pixels_1[1][1] = 0u;
  817. orig_pixels_1[1][2] = 0u;
  818. orig_pixels_1[1][3] = 0u;
  819. orig_pixels_1[2][0] = 0u;
  820. orig_pixels_1[2][1] = 0u;
  821. orig_pixels_1[2][2] = 0u;
  822. orig_pixels_1[2][3] = 0u;
  823. orig_pixels_1[3][0] = 255u;
  824. orig_pixels_1[3][1] = 255u;
  825. orig_pixels_1[3][2] = 255u;
  826. orig_pixels_1[3][3] = 255u;
  827. ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
  828. &interpolate_pixels[0][0], 0, 4, 1, 128);
  829. EXPECT_EQ(8u, interpolate_pixels[0][0]);
  830. EXPECT_EQ(16u, interpolate_pixels[0][1]);
  831. EXPECT_EQ(32u, interpolate_pixels[0][2]);
  832. EXPECT_EQ(64u, interpolate_pixels[0][3]);
  833. EXPECT_EQ(0u, interpolate_pixels[1][0]);
  834. EXPECT_EQ(0u, interpolate_pixels[1][1]);
  835. EXPECT_EQ(0u, interpolate_pixels[1][2]);
  836. EXPECT_EQ(128u, interpolate_pixels[1][3]);
  837. EXPECT_EQ(0u, interpolate_pixels[2][0]);
  838. EXPECT_EQ(0u, interpolate_pixels[2][1]);
  839. EXPECT_EQ(0u, interpolate_pixels[2][2]);
  840. EXPECT_EQ(0u, interpolate_pixels[2][3]);
  841. EXPECT_EQ(128u, interpolate_pixels[3][0]);
  842. EXPECT_EQ(128u, interpolate_pixels[3][1]);
  843. EXPECT_EQ(128u, interpolate_pixels[3][2]);
  844. EXPECT_EQ(128u, interpolate_pixels[3][3]);
  845. ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
  846. &interpolate_pixels[0][0], 0, 4, 1, 0);
  847. EXPECT_EQ(16u, interpolate_pixels[0][0]);
  848. EXPECT_EQ(32u, interpolate_pixels[0][1]);
  849. EXPECT_EQ(64u, interpolate_pixels[0][2]);
  850. EXPECT_EQ(128u, interpolate_pixels[0][3]);
  851. ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
  852. &interpolate_pixels[0][0], 0, 4, 1, 192);
  853. EXPECT_EQ(4u, interpolate_pixels[0][0]);
  854. EXPECT_EQ(8u, interpolate_pixels[0][1]);
  855. EXPECT_EQ(16u, interpolate_pixels[0][2]);
  856. EXPECT_EQ(32u, interpolate_pixels[0][3]);
  857. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  858. ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
  859. &interpolate_pixels[0][0], 0, 1280, 1, 128);
  860. }
  861. }
  862. TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
  863. SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
  864. SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
  865. SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
  866. memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
  867. memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
  868. orig_pixels_0[0] = 16u;
  869. orig_pixels_0[1] = 32u;
  870. orig_pixels_0[2] = 64u;
  871. orig_pixels_0[3] = 128u;
  872. orig_pixels_0[4] = 0u;
  873. orig_pixels_0[5] = 0u;
  874. orig_pixels_0[6] = 0u;
  875. orig_pixels_0[7] = 255u;
  876. orig_pixels_0[8] = 0u;
  877. orig_pixels_0[9] = 0u;
  878. orig_pixels_0[10] = 0u;
  879. orig_pixels_0[11] = 0u;
  880. orig_pixels_0[12] = 0u;
  881. orig_pixels_0[13] = 0u;
  882. orig_pixels_0[14] = 0u;
  883. orig_pixels_0[15] = 0u;
  884. orig_pixels_1[0] = 0u;
  885. orig_pixels_1[1] = 0u;
  886. orig_pixels_1[2] = 0u;
  887. orig_pixels_1[3] = 0u;
  888. orig_pixels_1[4] = 0u;
  889. orig_pixels_1[5] = 0u;
  890. orig_pixels_1[6] = 0u;
  891. orig_pixels_1[7] = 0u;
  892. orig_pixels_1[8] = 0u;
  893. orig_pixels_1[9] = 0u;
  894. orig_pixels_1[10] = 0u;
  895. orig_pixels_1[11] = 0u;
  896. orig_pixels_1[12] = 255u;
  897. orig_pixels_1[13] = 255u;
  898. orig_pixels_1[14] = 255u;
  899. orig_pixels_1[15] = 255u;
  900. InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
  901. &interpolate_pixels[0], 0, 16, 1, 128);
  902. EXPECT_EQ(8u, interpolate_pixels[0]);
  903. EXPECT_EQ(16u, interpolate_pixels[1]);
  904. EXPECT_EQ(32u, interpolate_pixels[2]);
  905. EXPECT_EQ(64u, interpolate_pixels[3]);
  906. EXPECT_EQ(0u, interpolate_pixels[4]);
  907. EXPECT_EQ(0u, interpolate_pixels[5]);
  908. EXPECT_EQ(0u, interpolate_pixels[6]);
  909. EXPECT_EQ(128u, interpolate_pixels[7]);
  910. EXPECT_EQ(0u, interpolate_pixels[8]);
  911. EXPECT_EQ(0u, interpolate_pixels[9]);
  912. EXPECT_EQ(0u, interpolate_pixels[10]);
  913. EXPECT_EQ(0u, interpolate_pixels[11]);
  914. EXPECT_EQ(128u, interpolate_pixels[12]);
  915. EXPECT_EQ(128u, interpolate_pixels[13]);
  916. EXPECT_EQ(128u, interpolate_pixels[14]);
  917. EXPECT_EQ(128u, interpolate_pixels[15]);
  918. InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
  919. &interpolate_pixels[0], 0, 16, 1, 0);
  920. EXPECT_EQ(16u, interpolate_pixels[0]);
  921. EXPECT_EQ(32u, interpolate_pixels[1]);
  922. EXPECT_EQ(64u, interpolate_pixels[2]);
  923. EXPECT_EQ(128u, interpolate_pixels[3]);
  924. InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
  925. &interpolate_pixels[0], 0, 16, 1, 192);
  926. EXPECT_EQ(4u, interpolate_pixels[0]);
  927. EXPECT_EQ(8u, interpolate_pixels[1]);
  928. EXPECT_EQ(16u, interpolate_pixels[2]);
  929. EXPECT_EQ(32u, interpolate_pixels[3]);
  930. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  931. InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
  932. &interpolate_pixels[0], 0, 1280, 1, 123);
  933. }
  934. }
  935. #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
  936. N, NEG, OFF) \
  937. TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
  938. const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
  939. const int kHeight = benchmark_height_; \
  940. const int kStrideA = \
  941. (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
  942. const int kStrideB = \
  943. (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
  944. align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
  945. align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \
  946. align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
  947. align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
  948. for (int i = 0; i < kStrideA * kHeight; ++i) { \
  949. src_argb_a[i + OFF] = (fastrand() & 0xff); \
  950. src_argb_b[i + OFF] = (fastrand() & 0xff); \
  951. } \
  952. MaskCpuFlags(disable_cpu_flags_); \
  953. ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
  954. dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP); \
  955. MaskCpuFlags(benchmark_cpu_info_); \
  956. for (int i = 0; i < benchmark_iterations_; ++i) { \
  957. ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
  958. dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \
  959. } \
  960. for (int i = 0; i < kStrideB * kHeight; ++i) { \
  961. EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
  962. } \
  963. free_aligned_buffer_page_end(src_argb_a); \
  964. free_aligned_buffer_page_end(src_argb_b); \
  965. free_aligned_buffer_page_end(dst_argb_c); \
  966. free_aligned_buffer_page_end(dst_argb_opt); \
  967. }
  968. #define TESTINTERPOLATE(TERP) \
  969. TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \
  970. TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
  971. TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
  972. TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
  973. TESTINTERPOLATE(0)
  974. TESTINTERPOLATE(64)
  975. TESTINTERPOLATE(128)
  976. TESTINTERPOLATE(192)
  977. TESTINTERPOLATE(255)
  978. static int TestBlend(int width,
  979. int height,
  980. int benchmark_iterations,
  981. int disable_cpu_flags,
  982. int benchmark_cpu_info,
  983. int invert,
  984. int off) {
  985. if (width < 1) {
  986. width = 1;
  987. }
  988. const int kBpp = 4;
  989. const int kStride = width * kBpp;
  990. align_buffer_page_end(src_argb_a, kStride * height + off);
  991. align_buffer_page_end(src_argb_b, kStride * height + off);
  992. align_buffer_page_end(dst_argb_c, kStride * height);
  993. align_buffer_page_end(dst_argb_opt, kStride * height);
  994. for (int i = 0; i < kStride * height; ++i) {
  995. src_argb_a[i + off] = (fastrand() & 0xff);
  996. src_argb_b[i + off] = (fastrand() & 0xff);
  997. }
  998. ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
  999. height);
  1000. ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width,
  1001. height);
  1002. memset(dst_argb_c, 255, kStride * height);
  1003. memset(dst_argb_opt, 255, kStride * height);
  1004. MaskCpuFlags(disable_cpu_flags);
  1005. ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
  1006. kStride, width, invert * height);
  1007. MaskCpuFlags(benchmark_cpu_info);
  1008. for (int i = 0; i < benchmark_iterations; ++i) {
  1009. ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
  1010. dst_argb_opt, kStride, width, invert * height);
  1011. }
  1012. int max_diff = 0;
  1013. for (int i = 0; i < kStride * height; ++i) {
  1014. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1015. static_cast<int>(dst_argb_opt[i]));
  1016. if (abs_diff > max_diff) {
  1017. max_diff = abs_diff;
  1018. }
  1019. }
  1020. free_aligned_buffer_page_end(src_argb_a);
  1021. free_aligned_buffer_page_end(src_argb_b);
  1022. free_aligned_buffer_page_end(dst_argb_c);
  1023. free_aligned_buffer_page_end(dst_argb_opt);
  1024. return max_diff;
  1025. }
  1026. TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
  1027. int max_diff =
  1028. TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
  1029. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1030. EXPECT_LE(max_diff, 1);
  1031. }
  1032. TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
  1033. int max_diff =
  1034. TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1035. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1036. EXPECT_LE(max_diff, 1);
  1037. }
  1038. TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
  1039. int max_diff =
  1040. TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1041. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1042. EXPECT_LE(max_diff, 1);
  1043. }
  1044. TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
  1045. int max_diff =
  1046. TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1047. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1048. EXPECT_LE(max_diff, 1);
  1049. }
  1050. static void TestBlendPlane(int width,
  1051. int height,
  1052. int benchmark_iterations,
  1053. int disable_cpu_flags,
  1054. int benchmark_cpu_info,
  1055. int invert,
  1056. int off) {
  1057. if (width < 1) {
  1058. width = 1;
  1059. }
  1060. const int kBpp = 1;
  1061. const int kStride = width * kBpp;
  1062. align_buffer_page_end(src_argb_a, kStride * height + off);
  1063. align_buffer_page_end(src_argb_b, kStride * height + off);
  1064. align_buffer_page_end(src_argb_alpha, kStride * height + off);
  1065. align_buffer_page_end(dst_argb_c, kStride * height + off);
  1066. align_buffer_page_end(dst_argb_opt, kStride * height + off);
  1067. memset(dst_argb_c, 255, kStride * height + off);
  1068. memset(dst_argb_opt, 255, kStride * height + off);
  1069. // Test source is maintained exactly if alpha is 255.
  1070. for (int i = 0; i < width; ++i) {
  1071. src_argb_a[i + off] = i & 255;
  1072. src_argb_b[i + off] = 255 - (i & 255);
  1073. }
  1074. memset(src_argb_alpha + off, 255, width);
  1075. BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
  1076. src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
  1077. for (int i = 0; i < width; ++i) {
  1078. EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
  1079. }
  1080. // Test destination is maintained exactly if alpha is 0.
  1081. memset(src_argb_alpha + off, 0, width);
  1082. BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
  1083. src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
  1084. for (int i = 0; i < width; ++i) {
  1085. EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
  1086. }
  1087. for (int i = 0; i < kStride * height; ++i) {
  1088. src_argb_a[i + off] = (fastrand() & 0xff);
  1089. src_argb_b[i + off] = (fastrand() & 0xff);
  1090. src_argb_alpha[i + off] = (fastrand() & 0xff);
  1091. }
  1092. MaskCpuFlags(disable_cpu_flags);
  1093. BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
  1094. src_argb_alpha + off, width, dst_argb_c + off, width, width,
  1095. invert * height);
  1096. MaskCpuFlags(benchmark_cpu_info);
  1097. for (int i = 0; i < benchmark_iterations; ++i) {
  1098. BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
  1099. src_argb_alpha + off, width, dst_argb_opt + off, width, width,
  1100. invert * height);
  1101. }
  1102. for (int i = 0; i < kStride * height; ++i) {
  1103. EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
  1104. }
  1105. free_aligned_buffer_page_end(src_argb_a);
  1106. free_aligned_buffer_page_end(src_argb_b);
  1107. free_aligned_buffer_page_end(src_argb_alpha);
  1108. free_aligned_buffer_page_end(dst_argb_c);
  1109. free_aligned_buffer_page_end(dst_argb_opt);
  1110. }
  1111. TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
  1112. TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1113. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1114. }
  1115. TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
  1116. TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1117. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1118. }
  1119. TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
  1120. TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
  1121. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1122. }
  1123. TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
  1124. TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1125. disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
  1126. }
  1127. #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
  1128. static void TestI420Blend(int width,
  1129. int height,
  1130. int benchmark_iterations,
  1131. int disable_cpu_flags,
  1132. int benchmark_cpu_info,
  1133. int invert,
  1134. int off) {
  1135. width = ((width) > 0) ? (width) : 1;
  1136. const int kStrideUV = SUBSAMPLE(width, 2);
  1137. const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
  1138. align_buffer_page_end(src_y0, width * height + off);
  1139. align_buffer_page_end(src_u0, kSizeUV + off);
  1140. align_buffer_page_end(src_v0, kSizeUV + off);
  1141. align_buffer_page_end(src_y1, width * height + off);
  1142. align_buffer_page_end(src_u1, kSizeUV + off);
  1143. align_buffer_page_end(src_v1, kSizeUV + off);
  1144. align_buffer_page_end(src_a, width * height + off);
  1145. align_buffer_page_end(dst_y_c, width * height + off);
  1146. align_buffer_page_end(dst_u_c, kSizeUV + off);
  1147. align_buffer_page_end(dst_v_c, kSizeUV + off);
  1148. align_buffer_page_end(dst_y_opt, width * height + off);
  1149. align_buffer_page_end(dst_u_opt, kSizeUV + off);
  1150. align_buffer_page_end(dst_v_opt, kSizeUV + off);
  1151. MemRandomize(src_y0, width * height + off);
  1152. MemRandomize(src_u0, kSizeUV + off);
  1153. MemRandomize(src_v0, kSizeUV + off);
  1154. MemRandomize(src_y1, width * height + off);
  1155. MemRandomize(src_u1, kSizeUV + off);
  1156. MemRandomize(src_v1, kSizeUV + off);
  1157. MemRandomize(src_a, width * height + off);
  1158. memset(dst_y_c, 255, width * height + off);
  1159. memset(dst_u_c, 255, kSizeUV + off);
  1160. memset(dst_v_c, 255, kSizeUV + off);
  1161. memset(dst_y_opt, 255, width * height + off);
  1162. memset(dst_u_opt, 255, kSizeUV + off);
  1163. memset(dst_v_opt, 255, kSizeUV + off);
  1164. MaskCpuFlags(disable_cpu_flags);
  1165. I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
  1166. kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
  1167. src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
  1168. dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
  1169. invert * height);
  1170. MaskCpuFlags(benchmark_cpu_info);
  1171. for (int i = 0; i < benchmark_iterations; ++i) {
  1172. I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
  1173. kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
  1174. src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
  1175. width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
  1176. width, invert * height);
  1177. }
  1178. for (int i = 0; i < width * height; ++i) {
  1179. EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
  1180. }
  1181. for (int i = 0; i < kSizeUV; ++i) {
  1182. EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
  1183. EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
  1184. }
  1185. free_aligned_buffer_page_end(src_y0);
  1186. free_aligned_buffer_page_end(src_u0);
  1187. free_aligned_buffer_page_end(src_v0);
  1188. free_aligned_buffer_page_end(src_y1);
  1189. free_aligned_buffer_page_end(src_u1);
  1190. free_aligned_buffer_page_end(src_v1);
  1191. free_aligned_buffer_page_end(src_a);
  1192. free_aligned_buffer_page_end(dst_y_c);
  1193. free_aligned_buffer_page_end(dst_u_c);
  1194. free_aligned_buffer_page_end(dst_v_c);
  1195. free_aligned_buffer_page_end(dst_y_opt);
  1196. free_aligned_buffer_page_end(dst_u_opt);
  1197. free_aligned_buffer_page_end(dst_v_opt);
  1198. }
  1199. TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
  1200. TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1201. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1202. }
  1203. TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
  1204. TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1205. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1206. }
  1207. // TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
  1208. TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
  1209. TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
  1210. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1211. }
  1212. TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
  1213. TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1214. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1215. }
  1216. TEST_F(LibYUVPlanarTest, TestAffine) {
  1217. SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
  1218. SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
  1219. for (int i = 0; i < 1280; ++i) {
  1220. for (int j = 0; j < 4; ++j) {
  1221. orig_pixels_0[i][j] = i;
  1222. }
  1223. }
  1224. float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
  1225. ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
  1226. 1280);
  1227. EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
  1228. EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
  1229. EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
  1230. #if defined(HAS_ARGBAFFINEROW_SSE2)
  1231. SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
  1232. ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
  1233. uv_step, 1280);
  1234. EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
  1235. int has_sse2 = TestCpuFlag(kCpuHasSSE2);
  1236. if (has_sse2) {
  1237. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  1238. ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
  1239. uv_step, 1280);
  1240. }
  1241. }
  1242. #endif
  1243. }
  1244. TEST_F(LibYUVPlanarTest, TestCopyPlane) {
  1245. int err = 0;
  1246. int yw = benchmark_width_;
  1247. int yh = benchmark_height_;
  1248. int b = 12;
  1249. int i, j;
  1250. int y_plane_size = (yw + b * 2) * (yh + b * 2);
  1251. align_buffer_page_end(orig_y, y_plane_size);
  1252. align_buffer_page_end(dst_c, y_plane_size);
  1253. align_buffer_page_end(dst_opt, y_plane_size);
  1254. memset(orig_y, 0, y_plane_size);
  1255. memset(dst_c, 0, y_plane_size);
  1256. memset(dst_opt, 0, y_plane_size);
  1257. // Fill image buffers with random data.
  1258. for (i = b; i < (yh + b); ++i) {
  1259. for (j = b; j < (yw + b); ++j) {
  1260. orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
  1261. }
  1262. }
  1263. // Fill destination buffers with random data.
  1264. for (i = 0; i < y_plane_size; ++i) {
  1265. uint8_t random_number = fastrand() & 0x7f;
  1266. dst_c[i] = random_number;
  1267. dst_opt[i] = dst_c[i];
  1268. }
  1269. int y_off = b * (yw + b * 2) + b;
  1270. int y_st = yw + b * 2;
  1271. int stride = 8;
  1272. // Disable all optimizations.
  1273. MaskCpuFlags(disable_cpu_flags_);
  1274. for (j = 0; j < benchmark_iterations_; j++) {
  1275. CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
  1276. }
  1277. // Enable optimizations.
  1278. MaskCpuFlags(benchmark_cpu_info_);
  1279. for (j = 0; j < benchmark_iterations_; j++) {
  1280. CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
  1281. }
  1282. for (i = 0; i < y_plane_size; ++i) {
  1283. if (dst_c[i] != dst_opt[i]) {
  1284. ++err;
  1285. }
  1286. }
  1287. free_aligned_buffer_page_end(orig_y);
  1288. free_aligned_buffer_page_end(dst_c);
  1289. free_aligned_buffer_page_end(dst_opt);
  1290. EXPECT_EQ(0, err);
  1291. }
  1292. static int TestMultiply(int width,
  1293. int height,
  1294. int benchmark_iterations,
  1295. int disable_cpu_flags,
  1296. int benchmark_cpu_info,
  1297. int invert,
  1298. int off) {
  1299. if (width < 1) {
  1300. width = 1;
  1301. }
  1302. const int kBpp = 4;
  1303. const int kStride = width * kBpp;
  1304. align_buffer_page_end(src_argb_a, kStride * height + off);
  1305. align_buffer_page_end(src_argb_b, kStride * height + off);
  1306. align_buffer_page_end(dst_argb_c, kStride * height);
  1307. align_buffer_page_end(dst_argb_opt, kStride * height);
  1308. for (int i = 0; i < kStride * height; ++i) {
  1309. src_argb_a[i + off] = (fastrand() & 0xff);
  1310. src_argb_b[i + off] = (fastrand() & 0xff);
  1311. }
  1312. memset(dst_argb_c, 0, kStride * height);
  1313. memset(dst_argb_opt, 0, kStride * height);
  1314. MaskCpuFlags(disable_cpu_flags);
  1315. ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
  1316. kStride, width, invert * height);
  1317. MaskCpuFlags(benchmark_cpu_info);
  1318. for (int i = 0; i < benchmark_iterations; ++i) {
  1319. ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
  1320. dst_argb_opt, kStride, width, invert * height);
  1321. }
  1322. int max_diff = 0;
  1323. for (int i = 0; i < kStride * height; ++i) {
  1324. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1325. static_cast<int>(dst_argb_opt[i]));
  1326. if (abs_diff > max_diff) {
  1327. max_diff = abs_diff;
  1328. }
  1329. }
  1330. free_aligned_buffer_page_end(src_argb_a);
  1331. free_aligned_buffer_page_end(src_argb_b);
  1332. free_aligned_buffer_page_end(dst_argb_c);
  1333. free_aligned_buffer_page_end(dst_argb_opt);
  1334. return max_diff;
  1335. }
  1336. TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
  1337. int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_,
  1338. benchmark_iterations_, disable_cpu_flags_,
  1339. benchmark_cpu_info_, +1, 0);
  1340. EXPECT_LE(max_diff, 1);
  1341. }
  1342. TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
  1343. int max_diff =
  1344. TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1345. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1346. EXPECT_LE(max_diff, 1);
  1347. }
  1348. TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
  1349. int max_diff =
  1350. TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1351. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1352. EXPECT_LE(max_diff, 1);
  1353. }
  1354. TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
  1355. int max_diff =
  1356. TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1357. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1358. EXPECT_LE(max_diff, 1);
  1359. }
  1360. static int TestAdd(int width,
  1361. int height,
  1362. int benchmark_iterations,
  1363. int disable_cpu_flags,
  1364. int benchmark_cpu_info,
  1365. int invert,
  1366. int off) {
  1367. if (width < 1) {
  1368. width = 1;
  1369. }
  1370. const int kBpp = 4;
  1371. const int kStride = width * kBpp;
  1372. align_buffer_page_end(src_argb_a, kStride * height + off);
  1373. align_buffer_page_end(src_argb_b, kStride * height + off);
  1374. align_buffer_page_end(dst_argb_c, kStride * height);
  1375. align_buffer_page_end(dst_argb_opt, kStride * height);
  1376. for (int i = 0; i < kStride * height; ++i) {
  1377. src_argb_a[i + off] = (fastrand() & 0xff);
  1378. src_argb_b[i + off] = (fastrand() & 0xff);
  1379. }
  1380. memset(dst_argb_c, 0, kStride * height);
  1381. memset(dst_argb_opt, 0, kStride * height);
  1382. MaskCpuFlags(disable_cpu_flags);
  1383. ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
  1384. kStride, width, invert * height);
  1385. MaskCpuFlags(benchmark_cpu_info);
  1386. for (int i = 0; i < benchmark_iterations; ++i) {
  1387. ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
  1388. kStride, width, invert * height);
  1389. }
  1390. int max_diff = 0;
  1391. for (int i = 0; i < kStride * height; ++i) {
  1392. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1393. static_cast<int>(dst_argb_opt[i]));
  1394. if (abs_diff > max_diff) {
  1395. max_diff = abs_diff;
  1396. }
  1397. }
  1398. free_aligned_buffer_page_end(src_argb_a);
  1399. free_aligned_buffer_page_end(src_argb_b);
  1400. free_aligned_buffer_page_end(dst_argb_c);
  1401. free_aligned_buffer_page_end(dst_argb_opt);
  1402. return max_diff;
  1403. }
  1404. TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
  1405. int max_diff =
  1406. TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
  1407. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1408. EXPECT_LE(max_diff, 1);
  1409. }
  1410. TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
  1411. int max_diff =
  1412. TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1413. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1414. EXPECT_LE(max_diff, 1);
  1415. }
  1416. TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
  1417. int max_diff =
  1418. TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1419. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1420. EXPECT_LE(max_diff, 1);
  1421. }
  1422. TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
  1423. int max_diff =
  1424. TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1425. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1426. EXPECT_LE(max_diff, 1);
  1427. }
  1428. static int TestSubtract(int width,
  1429. int height,
  1430. int benchmark_iterations,
  1431. int disable_cpu_flags,
  1432. int benchmark_cpu_info,
  1433. int invert,
  1434. int off) {
  1435. if (width < 1) {
  1436. width = 1;
  1437. }
  1438. const int kBpp = 4;
  1439. const int kStride = width * kBpp;
  1440. align_buffer_page_end(src_argb_a, kStride * height + off);
  1441. align_buffer_page_end(src_argb_b, kStride * height + off);
  1442. align_buffer_page_end(dst_argb_c, kStride * height);
  1443. align_buffer_page_end(dst_argb_opt, kStride * height);
  1444. for (int i = 0; i < kStride * height; ++i) {
  1445. src_argb_a[i + off] = (fastrand() & 0xff);
  1446. src_argb_b[i + off] = (fastrand() & 0xff);
  1447. }
  1448. memset(dst_argb_c, 0, kStride * height);
  1449. memset(dst_argb_opt, 0, kStride * height);
  1450. MaskCpuFlags(disable_cpu_flags);
  1451. ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
  1452. kStride, width, invert * height);
  1453. MaskCpuFlags(benchmark_cpu_info);
  1454. for (int i = 0; i < benchmark_iterations; ++i) {
  1455. ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
  1456. dst_argb_opt, kStride, width, invert * height);
  1457. }
  1458. int max_diff = 0;
  1459. for (int i = 0; i < kStride * height; ++i) {
  1460. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1461. static_cast<int>(dst_argb_opt[i]));
  1462. if (abs_diff > max_diff) {
  1463. max_diff = abs_diff;
  1464. }
  1465. }
  1466. free_aligned_buffer_page_end(src_argb_a);
  1467. free_aligned_buffer_page_end(src_argb_b);
  1468. free_aligned_buffer_page_end(dst_argb_c);
  1469. free_aligned_buffer_page_end(dst_argb_opt);
  1470. return max_diff;
  1471. }
  1472. TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
  1473. int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_,
  1474. benchmark_iterations_, disable_cpu_flags_,
  1475. benchmark_cpu_info_, +1, 0);
  1476. EXPECT_LE(max_diff, 1);
  1477. }
  1478. TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
  1479. int max_diff =
  1480. TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1481. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1482. EXPECT_LE(max_diff, 1);
  1483. }
  1484. TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
  1485. int max_diff =
  1486. TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1487. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1488. EXPECT_LE(max_diff, 1);
  1489. }
  1490. TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
  1491. int max_diff =
  1492. TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1493. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1494. EXPECT_LE(max_diff, 1);
  1495. }
  1496. static int TestSobel(int width,
  1497. int height,
  1498. int benchmark_iterations,
  1499. int disable_cpu_flags,
  1500. int benchmark_cpu_info,
  1501. int invert,
  1502. int off) {
  1503. if (width < 1) {
  1504. width = 1;
  1505. }
  1506. const int kBpp = 4;
  1507. const int kStride = width * kBpp;
  1508. align_buffer_page_end(src_argb_a, kStride * height + off);
  1509. align_buffer_page_end(dst_argb_c, kStride * height);
  1510. align_buffer_page_end(dst_argb_opt, kStride * height);
  1511. memset(src_argb_a, 0, kStride * height + off);
  1512. for (int i = 0; i < kStride * height; ++i) {
  1513. src_argb_a[i + off] = (fastrand() & 0xff);
  1514. }
  1515. memset(dst_argb_c, 0, kStride * height);
  1516. memset(dst_argb_opt, 0, kStride * height);
  1517. MaskCpuFlags(disable_cpu_flags);
  1518. ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
  1519. invert * height);
  1520. MaskCpuFlags(benchmark_cpu_info);
  1521. for (int i = 0; i < benchmark_iterations; ++i) {
  1522. ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
  1523. invert * height);
  1524. }
  1525. int max_diff = 0;
  1526. for (int i = 0; i < kStride * height; ++i) {
  1527. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1528. static_cast<int>(dst_argb_opt[i]));
  1529. if (abs_diff > max_diff) {
  1530. max_diff = abs_diff;
  1531. }
  1532. }
  1533. free_aligned_buffer_page_end(src_argb_a);
  1534. free_aligned_buffer_page_end(dst_argb_c);
  1535. free_aligned_buffer_page_end(dst_argb_opt);
  1536. return max_diff;
  1537. }
  1538. TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
  1539. int max_diff =
  1540. TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
  1541. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1542. EXPECT_EQ(0, max_diff);
  1543. }
  1544. TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
  1545. int max_diff =
  1546. TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1547. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1548. EXPECT_EQ(0, max_diff);
  1549. }
  1550. TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
  1551. int max_diff =
  1552. TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1553. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1554. EXPECT_EQ(0, max_diff);
  1555. }
  1556. TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
  1557. int max_diff =
  1558. TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1559. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1560. EXPECT_EQ(0, max_diff);
  1561. }
  1562. static int TestSobelToPlane(int width,
  1563. int height,
  1564. int benchmark_iterations,
  1565. int disable_cpu_flags,
  1566. int benchmark_cpu_info,
  1567. int invert,
  1568. int off) {
  1569. if (width < 1) {
  1570. width = 1;
  1571. }
  1572. const int kSrcBpp = 4;
  1573. const int kDstBpp = 1;
  1574. const int kSrcStride = (width * kSrcBpp + 15) & ~15;
  1575. const int kDstStride = (width * kDstBpp + 15) & ~15;
  1576. align_buffer_page_end(src_argb_a, kSrcStride * height + off);
  1577. align_buffer_page_end(dst_argb_c, kDstStride * height);
  1578. align_buffer_page_end(dst_argb_opt, kDstStride * height);
  1579. memset(src_argb_a, 0, kSrcStride * height + off);
  1580. for (int i = 0; i < kSrcStride * height; ++i) {
  1581. src_argb_a[i + off] = (fastrand() & 0xff);
  1582. }
  1583. memset(dst_argb_c, 0, kDstStride * height);
  1584. memset(dst_argb_opt, 0, kDstStride * height);
  1585. MaskCpuFlags(disable_cpu_flags);
  1586. ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
  1587. invert * height);
  1588. MaskCpuFlags(benchmark_cpu_info);
  1589. for (int i = 0; i < benchmark_iterations; ++i) {
  1590. ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
  1591. width, invert * height);
  1592. }
  1593. int max_diff = 0;
  1594. for (int i = 0; i < kDstStride * height; ++i) {
  1595. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1596. static_cast<int>(dst_argb_opt[i]));
  1597. if (abs_diff > max_diff) {
  1598. max_diff = abs_diff;
  1599. }
  1600. }
  1601. free_aligned_buffer_page_end(src_argb_a);
  1602. free_aligned_buffer_page_end(dst_argb_c);
  1603. free_aligned_buffer_page_end(dst_argb_opt);
  1604. return max_diff;
  1605. }
  1606. TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
  1607. int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
  1608. benchmark_iterations_, disable_cpu_flags_,
  1609. benchmark_cpu_info_, +1, 0);
  1610. EXPECT_EQ(0, max_diff);
  1611. }
  1612. TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
  1613. int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
  1614. benchmark_iterations_, disable_cpu_flags_,
  1615. benchmark_cpu_info_, +1, 1);
  1616. EXPECT_EQ(0, max_diff);
  1617. }
  1618. TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
  1619. int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
  1620. benchmark_iterations_, disable_cpu_flags_,
  1621. benchmark_cpu_info_, -1, 0);
  1622. EXPECT_EQ(0, max_diff);
  1623. }
  1624. TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
  1625. int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
  1626. benchmark_iterations_, disable_cpu_flags_,
  1627. benchmark_cpu_info_, +1, 0);
  1628. EXPECT_EQ(0, max_diff);
  1629. }
  1630. static int TestSobelXY(int width,
  1631. int height,
  1632. int benchmark_iterations,
  1633. int disable_cpu_flags,
  1634. int benchmark_cpu_info,
  1635. int invert,
  1636. int off) {
  1637. if (width < 1) {
  1638. width = 1;
  1639. }
  1640. const int kBpp = 4;
  1641. const int kStride = width * kBpp;
  1642. align_buffer_page_end(src_argb_a, kStride * height + off);
  1643. align_buffer_page_end(dst_argb_c, kStride * height);
  1644. align_buffer_page_end(dst_argb_opt, kStride * height);
  1645. memset(src_argb_a, 0, kStride * height + off);
  1646. for (int i = 0; i < kStride * height; ++i) {
  1647. src_argb_a[i + off] = (fastrand() & 0xff);
  1648. }
  1649. memset(dst_argb_c, 0, kStride * height);
  1650. memset(dst_argb_opt, 0, kStride * height);
  1651. MaskCpuFlags(disable_cpu_flags);
  1652. ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
  1653. invert * height);
  1654. MaskCpuFlags(benchmark_cpu_info);
  1655. for (int i = 0; i < benchmark_iterations; ++i) {
  1656. ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
  1657. invert * height);
  1658. }
  1659. int max_diff = 0;
  1660. for (int i = 0; i < kStride * height; ++i) {
  1661. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1662. static_cast<int>(dst_argb_opt[i]));
  1663. if (abs_diff > max_diff) {
  1664. max_diff = abs_diff;
  1665. }
  1666. }
  1667. free_aligned_buffer_page_end(src_argb_a);
  1668. free_aligned_buffer_page_end(dst_argb_c);
  1669. free_aligned_buffer_page_end(dst_argb_opt);
  1670. return max_diff;
  1671. }
  1672. TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
  1673. int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
  1674. benchmark_iterations_, disable_cpu_flags_,
  1675. benchmark_cpu_info_, +1, 0);
  1676. EXPECT_EQ(0, max_diff);
  1677. }
  1678. TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
  1679. int max_diff =
  1680. TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1681. disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
  1682. EXPECT_EQ(0, max_diff);
  1683. }
  1684. TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
  1685. int max_diff =
  1686. TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1687. disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
  1688. EXPECT_EQ(0, max_diff);
  1689. }
  1690. TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
  1691. int max_diff =
  1692. TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1693. disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
  1694. EXPECT_EQ(0, max_diff);
  1695. }
  1696. static int TestBlur(int width,
  1697. int height,
  1698. int benchmark_iterations,
  1699. int disable_cpu_flags,
  1700. int benchmark_cpu_info,
  1701. int invert,
  1702. int off,
  1703. int radius) {
  1704. if (width < 1) {
  1705. width = 1;
  1706. }
  1707. const int kBpp = 4;
  1708. const int kStride = width * kBpp;
  1709. align_buffer_page_end(src_argb_a, kStride * height + off);
  1710. align_buffer_page_end(dst_cumsum, width * height * 16);
  1711. align_buffer_page_end(dst_argb_c, kStride * height);
  1712. align_buffer_page_end(dst_argb_opt, kStride * height);
  1713. for (int i = 0; i < kStride * height; ++i) {
  1714. src_argb_a[i + off] = (fastrand() & 0xff);
  1715. }
  1716. memset(dst_cumsum, 0, width * height * 16);
  1717. memset(dst_argb_c, 0, kStride * height);
  1718. memset(dst_argb_opt, 0, kStride * height);
  1719. MaskCpuFlags(disable_cpu_flags);
  1720. ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
  1721. reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
  1722. invert * height, radius);
  1723. MaskCpuFlags(benchmark_cpu_info);
  1724. for (int i = 0; i < benchmark_iterations; ++i) {
  1725. ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
  1726. reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
  1727. invert * height, radius);
  1728. }
  1729. int max_diff = 0;
  1730. for (int i = 0; i < kStride * height; ++i) {
  1731. int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
  1732. static_cast<int>(dst_argb_opt[i]));
  1733. if (abs_diff > max_diff) {
  1734. max_diff = abs_diff;
  1735. }
  1736. }
  1737. free_aligned_buffer_page_end(src_argb_a);
  1738. free_aligned_buffer_page_end(dst_cumsum);
  1739. free_aligned_buffer_page_end(dst_argb_c);
  1740. free_aligned_buffer_page_end(dst_argb_opt);
  1741. return max_diff;
  1742. }
  1743. static const int kBlurSize = 55;
  1744. TEST_F(LibYUVPlanarTest, ARGBBlur_Any) {
  1745. int max_diff =
  1746. TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
  1747. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
  1748. EXPECT_LE(max_diff, 1);
  1749. }
  1750. TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) {
  1751. int max_diff =
  1752. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1753. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
  1754. EXPECT_LE(max_diff, 1);
  1755. }
  1756. TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) {
  1757. int max_diff =
  1758. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1759. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
  1760. EXPECT_LE(max_diff, 1);
  1761. }
  1762. TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
  1763. int max_diff =
  1764. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1765. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
  1766. EXPECT_LE(max_diff, 1);
  1767. }
  1768. static const int kBlurSmallSize = 5;
  1769. TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) {
  1770. int max_diff =
  1771. TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
  1772. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
  1773. EXPECT_LE(max_diff, 1);
  1774. }
  1775. TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) {
  1776. int max_diff =
  1777. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1778. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
  1779. EXPECT_LE(max_diff, 1);
  1780. }
  1781. TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) {
  1782. int max_diff =
  1783. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1784. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
  1785. EXPECT_LE(max_diff, 1);
  1786. }
  1787. TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
  1788. int max_diff =
  1789. TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
  1790. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
  1791. EXPECT_LE(max_diff, 1);
  1792. }
  1793. TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
  1794. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  1795. SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
  1796. SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
  1797. memset(orig_pixels, 0, sizeof(orig_pixels));
  1798. SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
  1799. 0.94230f, -3.03300f, -2.92500f, 0.f, // C0
  1800. 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
  1801. 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
  1802. 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
  1803. };
  1804. // Test blue
  1805. orig_pixels[0][0] = 255u;
  1806. orig_pixels[0][1] = 0u;
  1807. orig_pixels[0][2] = 0u;
  1808. orig_pixels[0][3] = 128u;
  1809. // Test green
  1810. orig_pixels[1][0] = 0u;
  1811. orig_pixels[1][1] = 255u;
  1812. orig_pixels[1][2] = 0u;
  1813. orig_pixels[1][3] = 0u;
  1814. // Test red
  1815. orig_pixels[2][0] = 0u;
  1816. orig_pixels[2][1] = 0u;
  1817. orig_pixels[2][2] = 255u;
  1818. orig_pixels[2][3] = 255u;
  1819. // Test white
  1820. orig_pixels[3][0] = 255u;
  1821. orig_pixels[3][1] = 255u;
  1822. orig_pixels[3][2] = 255u;
  1823. orig_pixels[3][3] = 255u;
  1824. // Test color
  1825. orig_pixels[4][0] = 16u;
  1826. orig_pixels[4][1] = 64u;
  1827. orig_pixels[4][2] = 192u;
  1828. orig_pixels[4][3] = 224u;
  1829. // Do 16 to test asm version.
  1830. ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  1831. &kWarmifyPolynomial[0], 16, 1);
  1832. EXPECT_EQ(235u, dst_pixels_opt[0][0]);
  1833. EXPECT_EQ(0u, dst_pixels_opt[0][1]);
  1834. EXPECT_EQ(0u, dst_pixels_opt[0][2]);
  1835. EXPECT_EQ(128u, dst_pixels_opt[0][3]);
  1836. EXPECT_EQ(0u, dst_pixels_opt[1][0]);
  1837. EXPECT_EQ(233u, dst_pixels_opt[1][1]);
  1838. EXPECT_EQ(0u, dst_pixels_opt[1][2]);
  1839. EXPECT_EQ(0u, dst_pixels_opt[1][3]);
  1840. EXPECT_EQ(0u, dst_pixels_opt[2][0]);
  1841. EXPECT_EQ(0u, dst_pixels_opt[2][1]);
  1842. EXPECT_EQ(241u, dst_pixels_opt[2][2]);
  1843. EXPECT_EQ(255u, dst_pixels_opt[2][3]);
  1844. EXPECT_EQ(235u, dst_pixels_opt[3][0]);
  1845. EXPECT_EQ(233u, dst_pixels_opt[3][1]);
  1846. EXPECT_EQ(241u, dst_pixels_opt[3][2]);
  1847. EXPECT_EQ(255u, dst_pixels_opt[3][3]);
  1848. EXPECT_EQ(10u, dst_pixels_opt[4][0]);
  1849. EXPECT_EQ(59u, dst_pixels_opt[4][1]);
  1850. EXPECT_EQ(188u, dst_pixels_opt[4][2]);
  1851. EXPECT_EQ(224u, dst_pixels_opt[4][3]);
  1852. for (int i = 0; i < 1280; ++i) {
  1853. orig_pixels[i][0] = i;
  1854. orig_pixels[i][1] = i / 2;
  1855. orig_pixels[i][2] = i / 3;
  1856. orig_pixels[i][3] = i;
  1857. }
  1858. MaskCpuFlags(disable_cpu_flags_);
  1859. ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
  1860. &kWarmifyPolynomial[0], 1280, 1);
  1861. MaskCpuFlags(benchmark_cpu_info_);
  1862. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  1863. ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  1864. &kWarmifyPolynomial[0], 1280, 1);
  1865. }
  1866. for (int i = 0; i < 1280; ++i) {
  1867. EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
  1868. EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
  1869. EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
  1870. EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
  1871. }
  1872. }
  1873. int TestHalfFloatPlane(int benchmark_width,
  1874. int benchmark_height,
  1875. int benchmark_iterations,
  1876. int disable_cpu_flags,
  1877. int benchmark_cpu_info,
  1878. float scale,
  1879. int mask) {
  1880. int i, j;
  1881. const int y_plane_size = benchmark_width * benchmark_height * 2;
  1882. align_buffer_page_end(orig_y, y_plane_size * 3);
  1883. uint8_t* dst_opt = orig_y + y_plane_size;
  1884. uint8_t* dst_c = orig_y + y_plane_size * 2;
  1885. MemRandomize(orig_y, y_plane_size);
  1886. memset(dst_c, 0, y_plane_size);
  1887. memset(dst_opt, 1, y_plane_size);
  1888. for (i = 0; i < y_plane_size / 2; ++i) {
  1889. reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
  1890. }
  1891. // Disable all optimizations.
  1892. MaskCpuFlags(disable_cpu_flags);
  1893. for (j = 0; j < benchmark_iterations; j++) {
  1894. HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
  1895. reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
  1896. scale, benchmark_width, benchmark_height);
  1897. }
  1898. // Enable optimizations.
  1899. MaskCpuFlags(benchmark_cpu_info);
  1900. for (j = 0; j < benchmark_iterations; j++) {
  1901. HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
  1902. reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
  1903. scale, benchmark_width, benchmark_height);
  1904. }
  1905. int max_diff = 0;
  1906. for (i = 0; i < y_plane_size / 2; ++i) {
  1907. int abs_diff =
  1908. abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
  1909. static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
  1910. if (abs_diff > max_diff) {
  1911. max_diff = abs_diff;
  1912. }
  1913. }
  1914. free_aligned_buffer_page_end(orig_y);
  1915. return max_diff;
  1916. }
  1917. #if defined(__arm__)
  1918. static void EnableFlushDenormalToZero(void) {
  1919. uint32_t cw;
  1920. __asm__ __volatile__(
  1921. "vmrs %0, fpscr \n"
  1922. "orr %0, %0, #0x1000000 \n"
  1923. "vmsr fpscr, %0 \n"
  1924. : "=r"(cw)::"memory");
  1925. }
  1926. #endif
  1927. // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
  1928. // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
  1929. // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
  1930. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
  1931. // 32 bit arm rounding on denormal case is off by 1 compared to C.
  1932. #if defined(__arm__)
  1933. EnableFlushDenormalToZero();
  1934. #endif
  1935. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1936. benchmark_iterations_, disable_cpu_flags_,
  1937. benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
  1938. EXPECT_EQ(0, diff);
  1939. }
  1940. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
  1941. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1942. benchmark_iterations_, disable_cpu_flags_,
  1943. benchmark_cpu_info_, 1.0f, 65535);
  1944. EXPECT_LE(diff, 1);
  1945. }
  1946. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
  1947. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1948. benchmark_iterations_, disable_cpu_flags_,
  1949. benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
  1950. EXPECT_EQ(0, diff);
  1951. }
  1952. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
  1953. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1954. benchmark_iterations_, disable_cpu_flags_,
  1955. benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
  1956. EXPECT_EQ(0, diff);
  1957. }
  1958. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
  1959. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1960. benchmark_iterations_, disable_cpu_flags_,
  1961. benchmark_cpu_info_, 1.0f / 512.0f, 511);
  1962. EXPECT_EQ(0, diff);
  1963. }
  1964. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
  1965. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1966. benchmark_iterations_, disable_cpu_flags_,
  1967. benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
  1968. EXPECT_EQ(0, diff);
  1969. }
  1970. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
  1971. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1972. benchmark_iterations_, disable_cpu_flags_,
  1973. benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
  1974. EXPECT_EQ(0, diff);
  1975. }
  1976. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
  1977. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1978. benchmark_iterations_, disable_cpu_flags_,
  1979. benchmark_cpu_info_, 1.0f, 2047);
  1980. EXPECT_EQ(0, diff);
  1981. }
  1982. TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
  1983. int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
  1984. benchmark_iterations_, disable_cpu_flags_,
  1985. benchmark_cpu_info_, 1.0f, 4095);
  1986. EXPECT_LE(diff, 1);
  1987. }
  1988. float TestByteToFloat(int benchmark_width,
  1989. int benchmark_height,
  1990. int benchmark_iterations,
  1991. int disable_cpu_flags,
  1992. int benchmark_cpu_info,
  1993. float scale) {
  1994. int i, j;
  1995. const int y_plane_size = benchmark_width * benchmark_height;
  1996. align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
  1997. float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
  1998. float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
  1999. MemRandomize(orig_y, y_plane_size);
  2000. memset(dst_c, 0, y_plane_size * 4);
  2001. memset(dst_opt, 1, y_plane_size * 4);
  2002. // Disable all optimizations.
  2003. MaskCpuFlags(disable_cpu_flags);
  2004. ByteToFloat(orig_y, dst_c, scale, y_plane_size);
  2005. // Enable optimizations.
  2006. MaskCpuFlags(benchmark_cpu_info);
  2007. for (j = 0; j < benchmark_iterations; j++) {
  2008. ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
  2009. }
  2010. float max_diff = 0;
  2011. for (i = 0; i < y_plane_size; ++i) {
  2012. float abs_diff = fabs(dst_c[i] - dst_opt[i]);
  2013. if (abs_diff > max_diff) {
  2014. max_diff = abs_diff;
  2015. }
  2016. }
  2017. free_aligned_buffer_page_end(orig_y);
  2018. return max_diff;
  2019. }
  2020. TEST_F(LibYUVPlanarTest, TestByteToFloat) {
  2021. float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
  2022. benchmark_iterations_, disable_cpu_flags_,
  2023. benchmark_cpu_info_, 1.0f);
  2024. EXPECT_EQ(0.f, diff);
  2025. }
  2026. TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
  2027. SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
  2028. SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
  2029. SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
  2030. memset(orig_pixels, 0, sizeof(orig_pixels));
  2031. align_buffer_page_end(lumacolortable, 32768);
  2032. int v = 0;
  2033. for (int i = 0; i < 32768; ++i) {
  2034. lumacolortable[i] = v;
  2035. v += 3;
  2036. }
  2037. // Test blue
  2038. orig_pixels[0][0] = 255u;
  2039. orig_pixels[0][1] = 0u;
  2040. orig_pixels[0][2] = 0u;
  2041. orig_pixels[0][3] = 128u;
  2042. // Test green
  2043. orig_pixels[1][0] = 0u;
  2044. orig_pixels[1][1] = 255u;
  2045. orig_pixels[1][2] = 0u;
  2046. orig_pixels[1][3] = 0u;
  2047. // Test red
  2048. orig_pixels[2][0] = 0u;
  2049. orig_pixels[2][1] = 0u;
  2050. orig_pixels[2][2] = 255u;
  2051. orig_pixels[2][3] = 255u;
  2052. // Test color
  2053. orig_pixels[3][0] = 16u;
  2054. orig_pixels[3][1] = 64u;
  2055. orig_pixels[3][2] = 192u;
  2056. orig_pixels[3][3] = 224u;
  2057. // Do 16 to test asm version.
  2058. ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  2059. &lumacolortable[0], 16, 1);
  2060. EXPECT_EQ(253u, dst_pixels_opt[0][0]);
  2061. EXPECT_EQ(0u, dst_pixels_opt[0][1]);
  2062. EXPECT_EQ(0u, dst_pixels_opt[0][2]);
  2063. EXPECT_EQ(128u, dst_pixels_opt[0][3]);
  2064. EXPECT_EQ(0u, dst_pixels_opt[1][0]);
  2065. EXPECT_EQ(253u, dst_pixels_opt[1][1]);
  2066. EXPECT_EQ(0u, dst_pixels_opt[1][2]);
  2067. EXPECT_EQ(0u, dst_pixels_opt[1][3]);
  2068. EXPECT_EQ(0u, dst_pixels_opt[2][0]);
  2069. EXPECT_EQ(0u, dst_pixels_opt[2][1]);
  2070. EXPECT_EQ(253u, dst_pixels_opt[2][2]);
  2071. EXPECT_EQ(255u, dst_pixels_opt[2][3]);
  2072. EXPECT_EQ(48u, dst_pixels_opt[3][0]);
  2073. EXPECT_EQ(192u, dst_pixels_opt[3][1]);
  2074. EXPECT_EQ(64u, dst_pixels_opt[3][2]);
  2075. EXPECT_EQ(224u, dst_pixels_opt[3][3]);
  2076. for (int i = 0; i < 1280; ++i) {
  2077. orig_pixels[i][0] = i;
  2078. orig_pixels[i][1] = i / 2;
  2079. orig_pixels[i][2] = i / 3;
  2080. orig_pixels[i][3] = i;
  2081. }
  2082. MaskCpuFlags(disable_cpu_flags_);
  2083. ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
  2084. lumacolortable, 1280, 1);
  2085. MaskCpuFlags(benchmark_cpu_info_);
  2086. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  2087. ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
  2088. lumacolortable, 1280, 1);
  2089. }
  2090. for (int i = 0; i < 1280; ++i) {
  2091. EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
  2092. EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
  2093. EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
  2094. EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
  2095. }
  2096. free_aligned_buffer_page_end(lumacolortable);
  2097. }
  2098. TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
  2099. const int kSize = benchmark_width_ * benchmark_height_ * 4;
  2100. align_buffer_page_end(orig_pixels, kSize);
  2101. align_buffer_page_end(dst_pixels_opt, kSize);
  2102. align_buffer_page_end(dst_pixels_c, kSize);
  2103. MemRandomize(orig_pixels, kSize);
  2104. MemRandomize(dst_pixels_opt, kSize);
  2105. memcpy(dst_pixels_c, dst_pixels_opt, kSize);
  2106. MaskCpuFlags(disable_cpu_flags_);
  2107. ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
  2108. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  2109. MaskCpuFlags(benchmark_cpu_info_);
  2110. for (int i = 0; i < benchmark_iterations_; ++i) {
  2111. ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
  2112. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  2113. }
  2114. for (int i = 0; i < kSize; ++i) {
  2115. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2116. }
  2117. free_aligned_buffer_page_end(dst_pixels_c);
  2118. free_aligned_buffer_page_end(dst_pixels_opt);
  2119. free_aligned_buffer_page_end(orig_pixels);
  2120. }
  2121. TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
  2122. // Round count up to multiple of 16
  2123. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2124. align_buffer_page_end(src_pixels, kPixels * 4);
  2125. align_buffer_page_end(dst_pixels_opt, kPixels);
  2126. align_buffer_page_end(dst_pixels_c, kPixels);
  2127. MemRandomize(src_pixels, kPixels * 4);
  2128. MemRandomize(dst_pixels_opt, kPixels);
  2129. memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
  2130. MaskCpuFlags(disable_cpu_flags_);
  2131. ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
  2132. benchmark_width_, benchmark_width_, benchmark_height_);
  2133. MaskCpuFlags(benchmark_cpu_info_);
  2134. for (int i = 0; i < benchmark_iterations_; ++i) {
  2135. ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
  2136. benchmark_width_, benchmark_width_, benchmark_height_);
  2137. }
  2138. for (int i = 0; i < kPixels; ++i) {
  2139. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2140. }
  2141. free_aligned_buffer_page_end(dst_pixels_c);
  2142. free_aligned_buffer_page_end(dst_pixels_opt);
  2143. free_aligned_buffer_page_end(src_pixels);
  2144. }
  2145. TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
  2146. // Round count up to multiple of 16
  2147. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2148. align_buffer_page_end(orig_pixels, kPixels);
  2149. align_buffer_page_end(dst_pixels_opt, kPixels * 4);
  2150. align_buffer_page_end(dst_pixels_c, kPixels * 4);
  2151. MemRandomize(orig_pixels, kPixels);
  2152. MemRandomize(dst_pixels_opt, kPixels * 4);
  2153. memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
  2154. MaskCpuFlags(disable_cpu_flags_);
  2155. ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
  2156. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  2157. MaskCpuFlags(benchmark_cpu_info_);
  2158. for (int i = 0; i < benchmark_iterations_; ++i) {
  2159. ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
  2160. benchmark_width_ * 4, benchmark_width_, benchmark_height_);
  2161. }
  2162. for (int i = 0; i < kPixels * 4; ++i) {
  2163. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2164. }
  2165. free_aligned_buffer_page_end(dst_pixels_c);
  2166. free_aligned_buffer_page_end(dst_pixels_opt);
  2167. free_aligned_buffer_page_end(orig_pixels);
  2168. }
  2169. static int TestARGBRect(int width,
  2170. int height,
  2171. int benchmark_iterations,
  2172. int disable_cpu_flags,
  2173. int benchmark_cpu_info,
  2174. int invert,
  2175. int off,
  2176. int bpp) {
  2177. if (width < 1) {
  2178. width = 1;
  2179. }
  2180. const int kStride = width * bpp;
  2181. const int kSize = kStride * height;
  2182. const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
  2183. align_buffer_page_end(dst_argb_c, kSize + off);
  2184. align_buffer_page_end(dst_argb_opt, kSize + off);
  2185. MemRandomize(dst_argb_c + off, kSize);
  2186. memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
  2187. MaskCpuFlags(disable_cpu_flags);
  2188. if (bpp == 4) {
  2189. ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
  2190. } else {
  2191. SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
  2192. }
  2193. MaskCpuFlags(benchmark_cpu_info);
  2194. for (int i = 0; i < benchmark_iterations; ++i) {
  2195. if (bpp == 4) {
  2196. ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
  2197. } else {
  2198. SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
  2199. }
  2200. }
  2201. int max_diff = 0;
  2202. for (int i = 0; i < kStride * height; ++i) {
  2203. int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
  2204. static_cast<int>(dst_argb_opt[i + off]));
  2205. if (abs_diff > max_diff) {
  2206. max_diff = abs_diff;
  2207. }
  2208. }
  2209. free_aligned_buffer_page_end(dst_argb_c);
  2210. free_aligned_buffer_page_end(dst_argb_opt);
  2211. return max_diff;
  2212. }
  2213. TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
  2214. int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
  2215. benchmark_iterations_, disable_cpu_flags_,
  2216. benchmark_cpu_info_, +1, 0, 4);
  2217. EXPECT_EQ(0, max_diff);
  2218. }
  2219. TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
  2220. int max_diff =
  2221. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2222. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
  2223. EXPECT_EQ(0, max_diff);
  2224. }
  2225. TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
  2226. int max_diff =
  2227. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2228. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
  2229. EXPECT_EQ(0, max_diff);
  2230. }
  2231. TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
  2232. int max_diff =
  2233. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2234. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
  2235. EXPECT_EQ(0, max_diff);
  2236. }
  2237. TEST_F(LibYUVPlanarTest, SetPlane_Any) {
  2238. int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
  2239. benchmark_iterations_, disable_cpu_flags_,
  2240. benchmark_cpu_info_, +1, 0, 1);
  2241. EXPECT_EQ(0, max_diff);
  2242. }
  2243. TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
  2244. int max_diff =
  2245. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2246. disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
  2247. EXPECT_EQ(0, max_diff);
  2248. }
  2249. TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
  2250. int max_diff =
  2251. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2252. disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
  2253. EXPECT_EQ(0, max_diff);
  2254. }
  2255. TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
  2256. int max_diff =
  2257. TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
  2258. disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
  2259. EXPECT_EQ(0, max_diff);
  2260. }
  2261. TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
  2262. // Round count up to multiple of 16
  2263. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2264. align_buffer_page_end(src_pixels, kPixels * 2);
  2265. align_buffer_page_end(tmp_pixels_u, kPixels);
  2266. align_buffer_page_end(tmp_pixels_v, kPixels);
  2267. align_buffer_page_end(dst_pixels_opt, kPixels * 2);
  2268. align_buffer_page_end(dst_pixels_c, kPixels * 2);
  2269. MemRandomize(src_pixels, kPixels * 2);
  2270. MemRandomize(tmp_pixels_u, kPixels);
  2271. MemRandomize(tmp_pixels_v, kPixels);
  2272. MemRandomize(dst_pixels_opt, kPixels * 2);
  2273. MemRandomize(dst_pixels_c, kPixels * 2);
  2274. MaskCpuFlags(disable_cpu_flags_);
  2275. SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
  2276. tmp_pixels_v, benchmark_width_, benchmark_width_,
  2277. benchmark_height_);
  2278. MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
  2279. dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
  2280. benchmark_height_);
  2281. MaskCpuFlags(benchmark_cpu_info_);
  2282. SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
  2283. tmp_pixels_v, benchmark_width_, benchmark_width_,
  2284. benchmark_height_);
  2285. for (int i = 0; i < benchmark_iterations_; ++i) {
  2286. MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
  2287. dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
  2288. benchmark_height_);
  2289. }
  2290. for (int i = 0; i < kPixels * 2; ++i) {
  2291. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2292. }
  2293. free_aligned_buffer_page_end(src_pixels);
  2294. free_aligned_buffer_page_end(tmp_pixels_u);
  2295. free_aligned_buffer_page_end(tmp_pixels_v);
  2296. free_aligned_buffer_page_end(dst_pixels_opt);
  2297. free_aligned_buffer_page_end(dst_pixels_c);
  2298. }
  2299. TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
  2300. // Round count up to multiple of 16
  2301. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2302. align_buffer_page_end(src_pixels, kPixels * 2);
  2303. align_buffer_page_end(tmp_pixels_u, kPixels);
  2304. align_buffer_page_end(tmp_pixels_v, kPixels);
  2305. align_buffer_page_end(dst_pixels_opt, kPixels * 2);
  2306. align_buffer_page_end(dst_pixels_c, kPixels * 2);
  2307. MemRandomize(src_pixels, kPixels * 2);
  2308. MemRandomize(tmp_pixels_u, kPixels);
  2309. MemRandomize(tmp_pixels_v, kPixels);
  2310. MemRandomize(dst_pixels_opt, kPixels * 2);
  2311. MemRandomize(dst_pixels_c, kPixels * 2);
  2312. MaskCpuFlags(disable_cpu_flags_);
  2313. SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
  2314. tmp_pixels_v, benchmark_width_, benchmark_width_,
  2315. benchmark_height_);
  2316. MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
  2317. dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
  2318. benchmark_height_);
  2319. MaskCpuFlags(benchmark_cpu_info_);
  2320. for (int i = 0; i < benchmark_iterations_; ++i) {
  2321. SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
  2322. benchmark_width_, tmp_pixels_v, benchmark_width_,
  2323. benchmark_width_, benchmark_height_);
  2324. }
  2325. MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
  2326. dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
  2327. benchmark_height_);
  2328. for (int i = 0; i < kPixels * 2; ++i) {
  2329. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2330. }
  2331. free_aligned_buffer_page_end(src_pixels);
  2332. free_aligned_buffer_page_end(tmp_pixels_u);
  2333. free_aligned_buffer_page_end(tmp_pixels_v);
  2334. free_aligned_buffer_page_end(dst_pixels_opt);
  2335. free_aligned_buffer_page_end(dst_pixels_c);
  2336. }
  2337. TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
  2338. // Round count up to multiple of 16
  2339. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2340. align_buffer_page_end(src_pixels, kPixels * 2);
  2341. align_buffer_page_end(dst_pixels_opt, kPixels * 2);
  2342. align_buffer_page_end(dst_pixels_c, kPixels * 2);
  2343. MemRandomize(src_pixels, kPixels * 2);
  2344. MemRandomize(dst_pixels_opt, kPixels * 2);
  2345. MemRandomize(dst_pixels_c, kPixels * 2);
  2346. MaskCpuFlags(disable_cpu_flags_);
  2347. SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
  2348. benchmark_width_ * 2, benchmark_width_, benchmark_height_);
  2349. MaskCpuFlags(benchmark_cpu_info_);
  2350. for (int i = 0; i < benchmark_iterations_; ++i) {
  2351. SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
  2352. benchmark_width_ * 2, benchmark_width_, benchmark_height_);
  2353. }
  2354. for (int i = 0; i < kPixels * 2; ++i) {
  2355. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2356. }
  2357. free_aligned_buffer_page_end(src_pixels);
  2358. free_aligned_buffer_page_end(dst_pixels_opt);
  2359. free_aligned_buffer_page_end(dst_pixels_c);
  2360. }
  2361. TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
  2362. // Round count up to multiple of 16
  2363. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2364. align_buffer_page_end(src_pixels, kPixels * 3);
  2365. align_buffer_page_end(tmp_pixels_r, kPixels);
  2366. align_buffer_page_end(tmp_pixels_g, kPixels);
  2367. align_buffer_page_end(tmp_pixels_b, kPixels);
  2368. align_buffer_page_end(dst_pixels_opt, kPixels * 3);
  2369. align_buffer_page_end(dst_pixels_c, kPixels * 3);
  2370. MemRandomize(src_pixels, kPixels * 3);
  2371. MemRandomize(tmp_pixels_r, kPixels);
  2372. MemRandomize(tmp_pixels_g, kPixels);
  2373. MemRandomize(tmp_pixels_b, kPixels);
  2374. MemRandomize(dst_pixels_opt, kPixels * 3);
  2375. MemRandomize(dst_pixels_c, kPixels * 3);
  2376. MaskCpuFlags(disable_cpu_flags_);
  2377. SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
  2378. benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
  2379. benchmark_width_, benchmark_width_, benchmark_height_);
  2380. MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
  2381. tmp_pixels_b, benchmark_width_, dst_pixels_c,
  2382. benchmark_width_ * 3, benchmark_width_, benchmark_height_);
  2383. MaskCpuFlags(benchmark_cpu_info_);
  2384. SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
  2385. benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
  2386. benchmark_width_, benchmark_width_, benchmark_height_);
  2387. for (int i = 0; i < benchmark_iterations_; ++i) {
  2388. MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
  2389. benchmark_width_, tmp_pixels_b, benchmark_width_,
  2390. dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
  2391. benchmark_height_);
  2392. }
  2393. for (int i = 0; i < kPixels * 3; ++i) {
  2394. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2395. }
  2396. free_aligned_buffer_page_end(src_pixels);
  2397. free_aligned_buffer_page_end(tmp_pixels_r);
  2398. free_aligned_buffer_page_end(tmp_pixels_g);
  2399. free_aligned_buffer_page_end(tmp_pixels_b);
  2400. free_aligned_buffer_page_end(dst_pixels_opt);
  2401. free_aligned_buffer_page_end(dst_pixels_c);
  2402. }
  2403. TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
  2404. // Round count up to multiple of 16
  2405. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2406. align_buffer_page_end(src_pixels, kPixels * 3);
  2407. align_buffer_page_end(tmp_pixels_r, kPixels);
  2408. align_buffer_page_end(tmp_pixels_g, kPixels);
  2409. align_buffer_page_end(tmp_pixels_b, kPixels);
  2410. align_buffer_page_end(dst_pixels_opt, kPixels * 3);
  2411. align_buffer_page_end(dst_pixels_c, kPixels * 3);
  2412. MemRandomize(src_pixels, kPixels * 3);
  2413. MemRandomize(tmp_pixels_r, kPixels);
  2414. MemRandomize(tmp_pixels_g, kPixels);
  2415. MemRandomize(tmp_pixels_b, kPixels);
  2416. MemRandomize(dst_pixels_opt, kPixels * 3);
  2417. MemRandomize(dst_pixels_c, kPixels * 3);
  2418. MaskCpuFlags(disable_cpu_flags_);
  2419. SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
  2420. benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
  2421. benchmark_width_, benchmark_width_, benchmark_height_);
  2422. MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
  2423. tmp_pixels_b, benchmark_width_, dst_pixels_c,
  2424. benchmark_width_ * 3, benchmark_width_, benchmark_height_);
  2425. MaskCpuFlags(benchmark_cpu_info_);
  2426. for (int i = 0; i < benchmark_iterations_; ++i) {
  2427. SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
  2428. benchmark_width_, tmp_pixels_g, benchmark_width_,
  2429. tmp_pixels_b, benchmark_width_, benchmark_width_,
  2430. benchmark_height_);
  2431. }
  2432. MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
  2433. tmp_pixels_b, benchmark_width_, dst_pixels_opt,
  2434. benchmark_width_ * 3, benchmark_width_, benchmark_height_);
  2435. for (int i = 0; i < kPixels * 3; ++i) {
  2436. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2437. }
  2438. free_aligned_buffer_page_end(src_pixels);
  2439. free_aligned_buffer_page_end(tmp_pixels_r);
  2440. free_aligned_buffer_page_end(tmp_pixels_g);
  2441. free_aligned_buffer_page_end(tmp_pixels_b);
  2442. free_aligned_buffer_page_end(dst_pixels_opt);
  2443. free_aligned_buffer_page_end(dst_pixels_c);
  2444. }
  2445. // TODO(fbarchard): improve test for platforms and cpu detect
  2446. #ifdef HAS_MERGEUVROW_16_AVX2
  2447. TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
  2448. // Round count up to multiple of 16
  2449. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2450. align_buffer_page_end(src_pixels_u, kPixels * 2);
  2451. align_buffer_page_end(src_pixels_v, kPixels * 2);
  2452. align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
  2453. align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
  2454. MemRandomize(src_pixels_u, kPixels * 2);
  2455. MemRandomize(src_pixels_v, kPixels * 2);
  2456. memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
  2457. memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
  2458. MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
  2459. reinterpret_cast<const uint16_t*>(src_pixels_v),
  2460. reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels);
  2461. int has_avx2 = TestCpuFlag(kCpuHasAVX2);
  2462. for (int i = 0; i < benchmark_iterations_; ++i) {
  2463. if (has_avx2) {
  2464. MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
  2465. reinterpret_cast<const uint16_t*>(src_pixels_v),
  2466. reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
  2467. kPixels);
  2468. } else {
  2469. MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
  2470. reinterpret_cast<const uint16_t*>(src_pixels_v),
  2471. reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
  2472. kPixels);
  2473. }
  2474. }
  2475. for (int i = 0; i < kPixels * 2 * 2; ++i) {
  2476. EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
  2477. }
  2478. free_aligned_buffer_page_end(src_pixels_u);
  2479. free_aligned_buffer_page_end(src_pixels_v);
  2480. free_aligned_buffer_page_end(dst_pixels_uv_opt);
  2481. free_aligned_buffer_page_end(dst_pixels_uv_c);
  2482. }
  2483. #endif
  2484. // TODO(fbarchard): Improve test for more platforms.
  2485. #ifdef HAS_MULTIPLYROW_16_AVX2
  2486. TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
  2487. // Round count up to multiple of 16
  2488. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2489. align_buffer_page_end(src_pixels_y, kPixels * 2);
  2490. align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
  2491. align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
  2492. MemRandomize(src_pixels_y, kPixels * 2);
  2493. memset(dst_pixels_y_opt, 0, kPixels * 2);
  2494. memset(dst_pixels_y_c, 1, kPixels * 2);
  2495. MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2496. reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
  2497. int has_avx2 = TestCpuFlag(kCpuHasAVX2);
  2498. for (int i = 0; i < benchmark_iterations_; ++i) {
  2499. if (has_avx2) {
  2500. MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2501. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
  2502. kPixels);
  2503. } else {
  2504. MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2505. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
  2506. kPixels);
  2507. }
  2508. }
  2509. for (int i = 0; i < kPixels * 2; ++i) {
  2510. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2511. }
  2512. free_aligned_buffer_page_end(src_pixels_y);
  2513. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2514. free_aligned_buffer_page_end(dst_pixels_y_c);
  2515. }
  2516. #endif // HAS_MULTIPLYROW_16_AVX2
  2517. TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
  2518. // Round count up to multiple of 16
  2519. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2520. align_buffer_page_end(src_pixels_y, kPixels * 2);
  2521. align_buffer_page_end(dst_pixels_y_opt, kPixels);
  2522. align_buffer_page_end(dst_pixels_y_c, kPixels);
  2523. MemRandomize(src_pixels_y, kPixels * 2);
  2524. memset(dst_pixels_y_opt, 0, kPixels);
  2525. memset(dst_pixels_y_c, 1, kPixels);
  2526. MaskCpuFlags(disable_cpu_flags_);
  2527. Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2528. benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
  2529. benchmark_width_, benchmark_height_);
  2530. MaskCpuFlags(benchmark_cpu_info_);
  2531. for (int i = 0; i < benchmark_iterations_; ++i) {
  2532. Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2533. benchmark_width_, dst_pixels_y_opt, benchmark_width_,
  2534. 16384, benchmark_width_, benchmark_height_);
  2535. }
  2536. for (int i = 0; i < kPixels; ++i) {
  2537. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2538. }
  2539. free_aligned_buffer_page_end(src_pixels_y);
  2540. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2541. free_aligned_buffer_page_end(dst_pixels_y_c);
  2542. }
  2543. #ifdef ENABLE_ROW_TESTS
  2544. // TODO(fbarchard): Improve test for more platforms.
  2545. #ifdef HAS_CONVERT16TO8ROW_AVX2
  2546. TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
  2547. // AVX2 does multiple of 32, so round count up
  2548. const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
  2549. align_buffer_page_end(src_pixels_y, kPixels * 2);
  2550. align_buffer_page_end(dst_pixels_y_opt, kPixels);
  2551. align_buffer_page_end(dst_pixels_y_c, kPixels);
  2552. MemRandomize(src_pixels_y, kPixels * 2);
  2553. // clamp source range to 10 bits.
  2554. for (int i = 0; i < kPixels; ++i) {
  2555. reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
  2556. }
  2557. memset(dst_pixels_y_opt, 0, kPixels);
  2558. memset(dst_pixels_y_c, 1, kPixels);
  2559. Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2560. dst_pixels_y_c, 16384, kPixels);
  2561. int has_avx2 = TestCpuFlag(kCpuHasAVX2);
  2562. int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
  2563. for (int i = 0; i < benchmark_iterations_; ++i) {
  2564. if (has_avx2) {
  2565. Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2566. dst_pixels_y_opt, 16384, kPixels);
  2567. } else if (has_ssse3) {
  2568. Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2569. dst_pixels_y_opt, 16384, kPixels);
  2570. } else {
  2571. Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
  2572. dst_pixels_y_opt, 16384, kPixels);
  2573. }
  2574. }
  2575. for (int i = 0; i < kPixels; ++i) {
  2576. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2577. }
  2578. free_aligned_buffer_page_end(src_pixels_y);
  2579. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2580. free_aligned_buffer_page_end(dst_pixels_y_c);
  2581. }
  2582. #endif // HAS_CONVERT16TO8ROW_AVX2
  2583. #endif // ENABLE_ROW_TESTS
  2584. TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
  2585. // Round count up to multiple of 16
  2586. const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
  2587. align_buffer_page_end(src_pixels_y, kPixels);
  2588. align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
  2589. align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
  2590. MemRandomize(src_pixels_y, kPixels);
  2591. memset(dst_pixels_y_opt, 0, kPixels * 2);
  2592. memset(dst_pixels_y_c, 1, kPixels * 2);
  2593. MaskCpuFlags(disable_cpu_flags_);
  2594. Convert8To16Plane(src_pixels_y, benchmark_width_,
  2595. reinterpret_cast<uint16_t*>(dst_pixels_y_c),
  2596. benchmark_width_, 1024, benchmark_width_,
  2597. benchmark_height_);
  2598. MaskCpuFlags(benchmark_cpu_info_);
  2599. for (int i = 0; i < benchmark_iterations_; ++i) {
  2600. Convert8To16Plane(src_pixels_y, benchmark_width_,
  2601. reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
  2602. benchmark_width_, 1024, benchmark_width_,
  2603. benchmark_height_);
  2604. }
  2605. for (int i = 0; i < kPixels * 2; ++i) {
  2606. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2607. }
  2608. free_aligned_buffer_page_end(src_pixels_y);
  2609. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2610. free_aligned_buffer_page_end(dst_pixels_y_c);
  2611. }
  2612. #ifdef ENABLE_ROW_TESTS
  2613. // TODO(fbarchard): Improve test for more platforms.
  2614. #ifdef HAS_CONVERT8TO16ROW_AVX2
  2615. TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
  2616. const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
  2617. align_buffer_page_end(src_pixels_y, kPixels);
  2618. align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
  2619. align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
  2620. MemRandomize(src_pixels_y, kPixels);
  2621. memset(dst_pixels_y_opt, 0, kPixels * 2);
  2622. memset(dst_pixels_y_c, 1, kPixels * 2);
  2623. Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
  2624. 1024, kPixels);
  2625. int has_avx2 = TestCpuFlag(kCpuHasAVX2);
  2626. int has_sse2 = TestCpuFlag(kCpuHasSSE2);
  2627. for (int i = 0; i < benchmark_iterations_; ++i) {
  2628. if (has_avx2) {
  2629. Convert8To16Row_AVX2(src_pixels_y,
  2630. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
  2631. kPixels);
  2632. } else if (has_sse2) {
  2633. Convert8To16Row_SSE2(src_pixels_y,
  2634. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
  2635. kPixels);
  2636. } else {
  2637. Convert8To16Row_C(src_pixels_y,
  2638. reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
  2639. kPixels);
  2640. }
  2641. }
  2642. for (int i = 0; i < kPixels * 2; ++i) {
  2643. EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
  2644. }
  2645. free_aligned_buffer_page_end(src_pixels_y);
  2646. free_aligned_buffer_page_end(dst_pixels_y_opt);
  2647. free_aligned_buffer_page_end(dst_pixels_y_c);
  2648. }
  2649. #endif // HAS_CONVERT8TO16ROW_AVX2
  2650. float TestScaleMaxSamples(int benchmark_width,
  2651. int benchmark_height,
  2652. int benchmark_iterations,
  2653. float scale,
  2654. bool opt) {
  2655. int i, j;
  2656. float max_c, max_opt = 0.f;
  2657. // NEON does multiple of 8, so round count up
  2658. const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
  2659. align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
  2660. uint8_t* dst_c = orig_y + kPixels * 4 + 16;
  2661. uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
  2662. // Randomize works but may contain some denormals affecting performance.
  2663. // MemRandomize(orig_y, kPixels * 4);
  2664. // large values are problematic. audio is really -1 to 1.
  2665. for (i = 0; i < kPixels; ++i) {
  2666. (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
  2667. }
  2668. memset(dst_c, 0, kPixels * 4);
  2669. memset(dst_opt, 1, kPixels * 4);
  2670. max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
  2671. reinterpret_cast<float*>(dst_c), scale, kPixels);
  2672. for (j = 0; j < benchmark_iterations; j++) {
  2673. if (opt) {
  2674. #ifdef HAS_SCALESUMSAMPLES_NEON
  2675. max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
  2676. reinterpret_cast<float*>(dst_opt), scale,
  2677. kPixels);
  2678. #else
  2679. max_opt =
  2680. ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
  2681. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2682. #endif
  2683. } else {
  2684. max_opt =
  2685. ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
  2686. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2687. }
  2688. }
  2689. float max_diff = FAbs(max_opt - max_c);
  2690. for (i = 0; i < kPixels; ++i) {
  2691. float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
  2692. (reinterpret_cast<float*>(dst_opt)[i]));
  2693. if (abs_diff > max_diff) {
  2694. max_diff = abs_diff;
  2695. }
  2696. }
  2697. free_aligned_buffer_page_end(orig_y);
  2698. return max_diff;
  2699. }
  2700. TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
  2701. float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
  2702. benchmark_iterations_, 1.2f, false);
  2703. EXPECT_EQ(0, diff);
  2704. }
  2705. TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
  2706. float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
  2707. benchmark_iterations_, 1.2f, true);
  2708. EXPECT_EQ(0, diff);
  2709. }
  2710. float TestScaleSumSamples(int benchmark_width,
  2711. int benchmark_height,
  2712. int benchmark_iterations,
  2713. float scale,
  2714. bool opt) {
  2715. int i, j;
  2716. float sum_c, sum_opt = 0.f;
  2717. // NEON does multiple of 8, so round count up
  2718. const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
  2719. align_buffer_page_end(orig_y, kPixels * 4 * 3);
  2720. uint8_t* dst_c = orig_y + kPixels * 4;
  2721. uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
  2722. // Randomize works but may contain some denormals affecting performance.
  2723. // MemRandomize(orig_y, kPixels * 4);
  2724. // large values are problematic. audio is really -1 to 1.
  2725. for (i = 0; i < kPixels; ++i) {
  2726. (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
  2727. }
  2728. memset(dst_c, 0, kPixels * 4);
  2729. memset(dst_opt, 1, kPixels * 4);
  2730. sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
  2731. reinterpret_cast<float*>(dst_c), scale, kPixels);
  2732. for (j = 0; j < benchmark_iterations; j++) {
  2733. if (opt) {
  2734. #ifdef HAS_SCALESUMSAMPLES_NEON
  2735. sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
  2736. reinterpret_cast<float*>(dst_opt), scale,
  2737. kPixels);
  2738. #else
  2739. sum_opt =
  2740. ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
  2741. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2742. #endif
  2743. } else {
  2744. sum_opt =
  2745. ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
  2746. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2747. }
  2748. }
  2749. float mse_opt = sum_opt / kPixels * 4;
  2750. float mse_c = sum_c / kPixels * 4;
  2751. float mse_error = FAbs(mse_opt - mse_c) / mse_c;
  2752. // If the sum of a float is more than 4 million, small adds are round down on
  2753. // float and produce different results with vectorized sum vs scalar sum.
  2754. // Ignore the difference if the sum is large.
  2755. float max_diff = 0.f;
  2756. if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse
  2757. max_diff = mse_error;
  2758. }
  2759. for (i = 0; i < kPixels; ++i) {
  2760. float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
  2761. (reinterpret_cast<float*>(dst_opt)[i]));
  2762. if (abs_diff > max_diff) {
  2763. max_diff = abs_diff;
  2764. }
  2765. }
  2766. free_aligned_buffer_page_end(orig_y);
  2767. return max_diff;
  2768. }
  2769. TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
  2770. float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
  2771. benchmark_iterations_, 1.2f, false);
  2772. EXPECT_EQ(0, diff);
  2773. }
  2774. TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
  2775. float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
  2776. benchmark_iterations_, 1.2f, true);
  2777. EXPECT_EQ(0, diff);
  2778. }
  2779. float TestScaleSamples(int benchmark_width,
  2780. int benchmark_height,
  2781. int benchmark_iterations,
  2782. float scale,
  2783. bool opt) {
  2784. int i, j;
  2785. // NEON does multiple of 8, so round count up
  2786. const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
  2787. align_buffer_page_end(orig_y, kPixels * 4 * 3);
  2788. uint8_t* dst_c = orig_y + kPixels * 4;
  2789. uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
  2790. // Randomize works but may contain some denormals affecting performance.
  2791. // MemRandomize(orig_y, kPixels * 4);
  2792. // large values are problematic. audio is really -1 to 1.
  2793. for (i = 0; i < kPixels; ++i) {
  2794. (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
  2795. }
  2796. memset(dst_c, 0, kPixels * 4);
  2797. memset(dst_opt, 1, kPixels * 4);
  2798. ScaleSamples_C(reinterpret_cast<float*>(orig_y),
  2799. reinterpret_cast<float*>(dst_c), scale, kPixels);
  2800. for (j = 0; j < benchmark_iterations; j++) {
  2801. if (opt) {
  2802. #ifdef HAS_SCALESUMSAMPLES_NEON
  2803. ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
  2804. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2805. #else
  2806. ScaleSamples_C(reinterpret_cast<float*>(orig_y),
  2807. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2808. #endif
  2809. } else {
  2810. ScaleSamples_C(reinterpret_cast<float*>(orig_y),
  2811. reinterpret_cast<float*>(dst_opt), scale, kPixels);
  2812. }
  2813. }
  2814. float max_diff = 0.f;
  2815. for (i = 0; i < kPixels; ++i) {
  2816. float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
  2817. (reinterpret_cast<float*>(dst_opt)[i]));
  2818. if (abs_diff > max_diff) {
  2819. max_diff = abs_diff;
  2820. }
  2821. }
  2822. free_aligned_buffer_page_end(orig_y);
  2823. return max_diff;
  2824. }
  2825. TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
  2826. float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
  2827. benchmark_iterations_, 1.2f, false);
  2828. EXPECT_EQ(0, diff);
  2829. }
  2830. TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
  2831. float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
  2832. benchmark_iterations_, 1.2f, true);
  2833. EXPECT_EQ(0, diff);
  2834. }
  2835. float TestCopySamples(int benchmark_width,
  2836. int benchmark_height,
  2837. int benchmark_iterations,
  2838. bool opt) {
  2839. int i, j;
  2840. // NEON does multiple of 16 floats, so round count up
  2841. const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
  2842. align_buffer_page_end(orig_y, kPixels * 4 * 3);
  2843. uint8_t* dst_c = orig_y + kPixels * 4;
  2844. uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
  2845. // Randomize works but may contain some denormals affecting performance.
  2846. // MemRandomize(orig_y, kPixels * 4);
  2847. // large values are problematic. audio is really -1 to 1.
  2848. for (i = 0; i < kPixels; ++i) {
  2849. (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
  2850. }
  2851. memset(dst_c, 0, kPixels * 4);
  2852. memset(dst_opt, 1, kPixels * 4);
  2853. memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
  2854. kPixels * 4);
  2855. for (j = 0; j < benchmark_iterations; j++) {
  2856. if (opt) {
  2857. #ifdef HAS_COPYROW_NEON
  2858. CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
  2859. #else
  2860. CopyRow_C(orig_y, dst_opt, kPixels * 4);
  2861. #endif
  2862. } else {
  2863. CopyRow_C(orig_y, dst_opt, kPixels * 4);
  2864. }
  2865. }
  2866. float max_diff = 0.f;
  2867. for (i = 0; i < kPixels; ++i) {
  2868. float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
  2869. (reinterpret_cast<float*>(dst_opt)[i]));
  2870. if (abs_diff > max_diff) {
  2871. max_diff = abs_diff;
  2872. }
  2873. }
  2874. free_aligned_buffer_page_end(orig_y);
  2875. return max_diff;
  2876. }
  2877. TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
  2878. float diff = TestCopySamples(benchmark_width_, benchmark_height_,
  2879. benchmark_iterations_, false);
  2880. EXPECT_EQ(0, diff);
  2881. }
  2882. TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
  2883. float diff = TestCopySamples(benchmark_width_, benchmark_height_,
  2884. benchmark_iterations_, true);
  2885. EXPECT_EQ(0, diff);
  2886. }
  2887. extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
  2888. extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
  2889. TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
  2890. SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]);
  2891. SIMD_ALIGNED(uint16_t dst_pixels_c[640]);
  2892. SIMD_ALIGNED(uint16_t dst_pixels_opt[640]);
  2893. memset(orig_pixels, 0, sizeof(orig_pixels));
  2894. memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
  2895. memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
  2896. for (int i = 0; i < 640 + 4; ++i) {
  2897. orig_pixels[i] = i * 256;
  2898. }
  2899. GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
  2900. for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
  2901. #if !defined(LIBYUV_DISABLE_NEON) && \
  2902. (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
  2903. int has_neon = TestCpuFlag(kCpuHasNEON);
  2904. if (has_neon) {
  2905. GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
  2906. } else {
  2907. GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
  2908. }
  2909. #else
  2910. GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
  2911. #endif
  2912. }
  2913. for (int i = 0; i < 640; ++i) {
  2914. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2915. }
  2916. EXPECT_EQ(dst_pixels_c[0],
  2917. static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
  2918. EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
  2919. }
  2920. extern "C" void GaussCol_NEON(const uint16_t* src0,
  2921. const uint16_t* src1,
  2922. const uint16_t* src2,
  2923. const uint16_t* src3,
  2924. const uint16_t* src4,
  2925. uint32_t* dst,
  2926. int width);
  2927. extern "C" void GaussCol_C(const uint16_t* src0,
  2928. const uint16_t* src1,
  2929. const uint16_t* src2,
  2930. const uint16_t* src3,
  2931. const uint16_t* src4,
  2932. uint32_t* dst,
  2933. int width);
  2934. TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
  2935. SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]);
  2936. SIMD_ALIGNED(uint32_t dst_pixels_c[640]);
  2937. SIMD_ALIGNED(uint32_t dst_pixels_opt[640]);
  2938. memset(orig_pixels, 0, sizeof(orig_pixels));
  2939. memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
  2940. memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
  2941. for (int i = 0; i < 640 * 5; ++i) {
  2942. orig_pixels[i] = i;
  2943. }
  2944. GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
  2945. &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
  2946. 640);
  2947. for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
  2948. #if !defined(LIBYUV_DISABLE_NEON) && \
  2949. (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
  2950. int has_neon = TestCpuFlag(kCpuHasNEON);
  2951. if (has_neon) {
  2952. GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
  2953. &orig_pixels[640 * 3], &orig_pixels[640 * 4],
  2954. &dst_pixels_opt[0], 640);
  2955. } else {
  2956. GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
  2957. &orig_pixels[640 * 3], &orig_pixels[640 * 4],
  2958. &dst_pixels_opt[0], 640);
  2959. }
  2960. #else
  2961. GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
  2962. &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_opt[0],
  2963. 640);
  2964. #endif
  2965. }
  2966. for (int i = 0; i < 640; ++i) {
  2967. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  2968. }
  2969. EXPECT_EQ(dst_pixels_c[0],
  2970. static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 +
  2971. 640 * 4 * 1));
  2972. EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
  2973. }
  2974. TEST_F(LibYUVPlanarTest, SwapUVRow) {
  2975. const int kPixels = benchmark_width_ * benchmark_height_;
  2976. void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
  2977. SwapUVRow_C;
  2978. align_buffer_page_end(src_pixels_vu, kPixels * 2);
  2979. align_buffer_page_end(dst_pixels_uv, kPixels * 2);
  2980. MemRandomize(src_pixels_vu, kPixels * 2);
  2981. memset(dst_pixels_uv, 1, kPixels * 2);
  2982. #if defined(HAS_SWAPUVROW_NEON)
  2983. if (TestCpuFlag(kCpuHasNEON)) {
  2984. SwapUVRow = SwapUVRow_Any_NEON;
  2985. if (IS_ALIGNED(kPixels, 16)) {
  2986. SwapUVRow = SwapUVRow_NEON;
  2987. }
  2988. }
  2989. #endif
  2990. for (int j = 0; j < benchmark_iterations_; j++) {
  2991. SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
  2992. }
  2993. for (int i = 0; i < kPixels; ++i) {
  2994. EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
  2995. EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
  2996. }
  2997. free_aligned_buffer_page_end(src_pixels_vu);
  2998. free_aligned_buffer_page_end(dst_pixels_uv);
  2999. }
  3000. #endif
  3001. } // namespace libyuv