123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042 |
- /*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
- #include "libyuv/row.h"
- #include <string.h> // For memcpy and memset.
- #include "libyuv/basic_types.h"
- #ifdef __cplusplus
- namespace libyuv {
- extern "C" {
- #endif
- // This module is for Mips MMI.
- #if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
- void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
- const uint64_t mask = 0xff000000ULL;
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
- "or %[src0], %[src0], %[mask] \n\t"
- "or %[src1], %[src1], %[mask] \n\t"
- "punpcklwd %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
- "or %[src0], %[src0], %[mask] \n\t"
- "or %[src1], %[src1], %[mask] \n\t"
- "punpcklwd %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [mask] "f"(mask)
- : "memory");
- }
- void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
- uint64_t src0, src1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0xff000000ULL;
- const uint64_t mask2 = 0xc6;
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
- "or %[src0], %[src0], %[mask1] \n\t"
- "punpcklbh %[src0], %[src0], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask2] \n\t"
- "or %[src1], %[src1], %[mask1] \n\t"
- "punpcklbh %[src1], %[src1], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask2] \n\t"
- "packushb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
- "or %[src0], %[src0], %[mask1] \n\t"
- "punpcklbh %[src0], %[src0], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask2] \n\t"
- "or %[src1], %[src1], %[mask1] \n\t"
- "punpcklbh %[src1], %[src1], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask2] \n\t"
- "packushb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width)
- : "memory");
- }
- void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
- uint64_t mask0 = 0xc6;
- uint64_t mask1 = 0x6c;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t"
- "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t"
- "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t"
- "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
- "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
- "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
- "punpcklbh %[src1], %[src1], %[zero] \n\t"
- "pextrh %[ftmp2], %[ftmp0], %[three] \n\t"
- "pextrh %[ftmp3], %[ftmp1], %[one] \n\t"
- "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
- "pextrh %[ftmp3], %[ftmp1], %[two] \n\t"
- "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "pextrh %[ftmp2], %[src1], %[zero] \n\t"
- "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t"
- "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
- "packushb %[src1], %[src1], %[zero] \n\t"
- "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t"
- "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t"
- "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t"
- "daddiu %[src_raw], %[src_raw], 0x0c \n\t"
- "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
- [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3])
- : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width),
- [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
- [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03)
- : "memory");
- }
- void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[5];
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[c1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4])
- : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02),
- [four] "f"(0x04)
- : "memory");
- }
- void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[6];
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- uint64_t c4 = 0x0001000100010001;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psrlh %[a], %[src1], %[seven] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "xor %[a], %[a], %[c1] \n\t"
- "paddb %[a], %[a], %[c4] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[a] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
- : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05),
- [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
- : "memory");
- }
- void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[6];
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psrlh %[a], %[src1], %[four] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "psllh %[src0], %[a], %[four] \n\t"
- "or %[a], %[src0], %[a] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[a] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
- : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08),
- [four] "f"(0x04)
- : "memory");
- }
- void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src;
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t"
- "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t"
- "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t"
- "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width)
- : "memory");
- }
- void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
- uint64_t mask0 = 0xc6;
- uint64_t mask1 = 0x18;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
- "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
- "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
- "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
- "punpcklbh %[ftmp2], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pextrh %[src0], %[ftmp1], %[two] \n\t"
- "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t"
- "pshufh %[ftmp1], %[ftmp1], %[one] \n\t"
- "pextrh %[src0], %[ftmp2], %[two] \n\t"
- "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t"
- "pextrh %[src0], %[ftmp2], %[one] \n\t"
- "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t"
- "pextrh %[src0], %[ftmp2], %[zero] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "pinsrh_0 %[src1], %[src1], %[src0] \n\t"
- "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
- "packushb %[src1], %[src1], %[zero] \n\t"
- "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t"
- "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t"
- "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t"
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
- [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
- [one] "f"(0x01), [two] "f"(0x02)
- : "memory");
- }
- void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[two] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[eleven] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05),
- [eleven] "f"(0x0b)
- : "memory");
- }
- // dither4 is a row of 4 values from 4x4 dither matrix.
- // The 4x4 matrix contains values to increase RGB. When converting to
- // fewer bits (565) this provides an ordered dither.
- // The order in the 4x4 matrix in first byte is upper left.
- // The 4 values are passed as an int, then referenced as an array, so
- // endian will not affect order of the original matrix. But the dither4
- // will containing the first pixel in the lower byte for little endian
- // or the upper byte for big endian.
- void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- const uint32_t dither4,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
- uint64_t c0 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "punpcklbh %[dither], %[dither], %[zero] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
- "paddh %[b], %[b], %[dither] \n\t"
- "paddh %[g], %[g], %[dither] \n\t"
- "paddh %[r], %[r], %[dither] \n\t"
- "pcmpgth %[src0], %[b], %[c0] \n\t"
- "or %[src0], %[src0], %[b] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "pcmpgth %[src0], %[g], %[c0] \n\t"
- "or %[src0], %[src0], %[g] \n\t"
- "and %[g], %[src0], %[c0] \n\t"
- "pcmpgth %[src0], %[r], %[c0] \n\t"
- "or %[src0], %[src0], %[r] \n\t"
- "and %[r], %[src0], %[c0] \n\t"
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[two] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[eleven] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02),
- [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b)
- : "memory");
- }
- void ARGBToARGB1555Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
- "punpckhbh %[a], %[src1], %[zero] \n\t"
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[three] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
- "psrlh %[a], %[a], %[seven] \n\t"
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[ten] \n\t"
- "psllh %[a], %[a], %[fifteen] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
- "or %[b], %[b], %[a] \n\t"
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05),
- [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f)
- : "memory");
- }
- void ARGBToARGB4444Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
- "punpckhbh %[a], %[src1], %[zero] \n\t"
- "psrlh %[b], %[b], %[four] \n\t"
- "psrlh %[g], %[g], %[four] \n\t"
- "psrlh %[r], %[r], %[four] \n\t"
- "psrlh %[a], %[a], %[four] \n\t"
- "psllh %[g], %[g], %[four] \n\t"
- "psllh %[r], %[r], %[eight] \n\t"
- "psllh %[a], %[a], %[twelve] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
- "or %[b], %[b], %[a] \n\t"
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08),
- [twelve] "f"(0x0c)
- : "memory");
- }
- void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001004200810019;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
- }
- void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
- [sixteen] "f"(0x10)
- : "memory");
- }
- void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0019008100420001;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
- }
- void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x00020070004a0026;
- const uint64_t mask_v = 0x0012005e00700002;
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsrl %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsrl %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsrl %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsrl %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
- [sixteen] "f"(0x10)
- : "memory");
- }
- void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001001900810042;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
- }
- void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x00020070004a0026;
- const uint64_t mask_v = 0x0012005e00700002;
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
- "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
- "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
- "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
- "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
- [sixteen] "f"(0x10)
- : "memory");
- }
- void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0042008100190001;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
- }
- void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t"
- "dsrl %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t"
- "dsrl %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t"
- "dsrl %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t"
- "dsrl %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
- [sixteen] "f"(0x10)
- : "memory");
- }
- void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001004200810019;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
- }
- void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
- [sixteen] "f"(0x10)
- : "memory");
- }
- void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001001900810042;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t"
- "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
- }
- void RAWToUVRow_MMI(const uint8_t* src_rgb0,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x00020070004a0026;
- const uint64_t mask_v = 0x0012005e00700002;
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
- "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
- "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
- "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
- "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
- [sixteen] "f"(0x10)
- : "memory");
- }
- void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest, dest0, dest1, dest2, dest3;
- uint64_t tmp0, tmp1;
- const uint64_t shift = 0x07;
- const uint64_t value = 0x0040;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x00010026004B000FULL;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest2], %[dest2], %[shift] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest3], %[dest3], %[shift] \n\t"
- "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
- "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
- "packushb %[dest], %[tmp0], %[tmp1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
- [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0),
- [tmp1] "=&f"(tmp1)
- : [src_ptr] "r"(src_argb0), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value),
- [width] "r"(width)
- : "memory");
- }
- void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x002b0054007f0002;
- const uint64_t mask_v = 0x0002007f006b0014;
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "pavgh %[src0], %[src_lo], %[src0] \n\t"
- "pavgh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
- [sixteen] "f"(0x10)
- : "memory");
- }
- void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
- uint64_t ftmp[11];
- const uint64_t value = 0x1080108010801080;
- const uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05),
- [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04)
- : "memory");
- }
- void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555,
- uint8_t* dst_y,
- int width) {
- uint64_t ftmp[11];
- const uint64_t value = 0x1080108010801080;
- const uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y),
- [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
- [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08),
- [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
- : "memory");
- }
- void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444,
- uint8_t* dst_y,
- int width) {
- uint64_t ftmp[11];
- uint64_t value = 0x1080108010801080;
- uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
- "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y),
- [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
- [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04)
- : "memory");
- }
- void RGB565ToUVRow_MMI(const uint8_t* src_rgb565,
- int src_stride_rgb565,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[13];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest0_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest0_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest1_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest1_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest2_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest2_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest2_v], %[src0], %[c2] \n\t"
- "psllh %[dest2_v], %[dest2_v], %[three] \n\t"
- "or %[dest2_v], %[src1], %[dest2_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest2_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest3_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest3_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest3_v], %[src0], %[c2] \n\t"
- "psllh %[dest3_v], %[dest3_v], %[three] \n\t"
- "or %[dest3_v], %[src1], %[dest3_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest3_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t"
- "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
- [dest3_v] "=&f"(ftmp[12])
- : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
- [one] "f"(0x01)
- : "memory");
- }
- void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555,
- int src_stride_argb1555,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[11];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- __asm__ volatile(
- "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest0_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest0_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest1_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest1_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest2_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest2_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest3_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest3_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[dest0_u], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t"
- "packushb %[dest0_v], %[dest1_u], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t"
- "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10])
- : [src_argb1555] "r"(src_argb1555),
- [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u),
- [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
- [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3),
- [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
- [two] "f"(0x02), [one] "f"(0x01)
- : "memory");
- }
- void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444,
- int src_stride_argb4444,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[13];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest0_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest0_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest1_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest1_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest2_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest2_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest2_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest3_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest3_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest3_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
- "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t"
- "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
- [dest3_v] "=&f"(ftmp[12])
- : [src_argb4444] "r"(src_argb4444),
- [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u),
- [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
- [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u),
- [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04),
- [two] "f"(0x02)
- : "memory");
- }
- void ARGBToUV444Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
- "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]),
- [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]),
- [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]),
- [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]),
- [dest3_v] "=&f"(ftmp[11])
- : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10),
- [eight] "f"(0x08)
- : "memory");
- }
- void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
- uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi;
- uint64_t tmp0, tmp1;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x01;
- const uint64_t mask2 = 0x00400026004B000FULL;
- const uint64_t mask3 = 0xFF000000FF000000ULL;
- const uint64_t mask4 = ~mask3;
- const uint64_t shift = 0x07;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "and %[src37], %[src], %[mask3] \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t"
- "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t"
- "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t"
- "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask4] \n\t"
- "or %[dest], %[dest], %[src37] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0),
- [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest),
- [src37] "=&f"(src37)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4)
- : "memory");
- }
- // Convert a row of image to Sepia tone.
- void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) {
- uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2;
- uint64_t tmp0, tmp1;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x002300440011ULL;
- const uint64_t mask2 = 0x002D00580016ULL;
- const uint64_t mask3 = 0x003200620018ULL;
- const uint64_t mask4 = 0xFF000000FF000000ULL;
- const uint64_t shift = 0x07;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "and %[dest37], %[dest], %[mask4] \n\t"
- "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t"
- "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t"
- "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t"
- "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t"
- "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
- "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
- "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
- "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t"
- "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t"
- "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t"
- "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t"
- "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
- "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
- "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "or %[dest], %[dest], %[dest37] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1),
- [dest] "=&f"(dest)
- : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
- [mask4] "f"(mask4), [shift] "f"(shift)
- : "memory");
- }
- // Apply color matrix to a row of image. Matrix is signed.
- // TODO(fbarchard): Consider adding rounding (+32).
- void ARGBColorMatrixRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- const int8_t* matrix_argb,
- int width) {
- uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2,
- dest3;
- uint64_t matrix, matrix_hi, matrix_lo;
- uint64_t tmp0, tmp1;
- const uint64_t shift0 = 0x06;
- const uint64_t shift1 = 0x08;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest0], %[dest0], %[shift0] \n\t"
- "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest1], %[dest1], %[shift0] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest2], %[dest2], %[shift0] \n\t"
- "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest3], %[dest3], %[shift0] \n\t"
- "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
- "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
- "packushb %[dest], %[tmp0], %[tmp1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest),
- [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi),
- [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix)
- : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb),
- [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0),
- [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1)
- : "memory");
- }
- void ARGBShadeRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- int width,
- uint32_t value) {
- uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi;
- const uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[src] \n\t"
- "punpckhbh %[src_hi], %[src], %[src] \n\t"
- "punpcklbh %[value], %[value], %[value] \n\t"
- "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src),
- [dest] "=&f"(dest)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [value] "f"(value), [shift] "f"(shift)
- : "memory");
- }
- void ARGBMultiplyRow_MMI(const uint8_t* src_argb0,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo;
- uint64_t dest, dest_lo, dest_hi;
- const uint64_t mask = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[src0] \n\t"
- "punpckhbh %[src0_hi], %[src0], %[src0] \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask] \n\t"
- "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t"
- "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0),
- [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask)
- : "memory");
- }
- void ARGBAddRow_MMI(const uint8_t* src_argb0,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "paddusb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width)
- : "memory");
- }
- void ARGBSubtractRow_MMI(const uint8_t* src_argb0,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "psubusb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width)
- : "memory");
- }
- // Sobel functions which mimics SSSE3.
- void SobelXRow_MMI(const uint8_t* src_y0,
- const uint8_t* src_y1,
- const uint8_t* src_y2,
- uint8_t* dst_sobelx,
- int width) {
- uint64_t y00 = 0, y10 = 0, y20 = 0;
- uint64_t y02 = 0, y12 = 0, y22 = 0;
- uint64_t zero = 0x0;
- uint64_t sobel = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
- "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2]
- "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
- "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i]
- "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2]
- "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
- "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i]
- "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t"
- "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2]
- "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t"
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y20], %[y20], %[zero] \n\t"
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
- "punpcklbh %[y22], %[y22], %[zero] \n\t"
- "paddh %[y00], %[y00], %[y10] \n\t" // a+b
- "paddh %[y20], %[y20], %[y10] \n\t" // c+b
- "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c
- "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub
- "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub
- "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub
- "pmaxsh %[y10], %[y00], %[y02] \n\t"
- "pminsh %[y20], %[y00], %[y02] \n\t"
- "psubh %[sobel], %[y10], %[y20] \n\t" // Abs
- "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
- "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
- "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
- "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
- "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
- "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
- "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t"
- "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t"
- "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t"
- "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t"
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y20], %[y20], %[zero] \n\t"
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
- "punpcklbh %[y22], %[y22], %[zero] \n\t"
- "paddh %[y00], %[y00], %[y10] \n\t"
- "paddh %[y20], %[y20], %[y10] \n\t"
- "paddh %[y00], %[y00], %[y20] \n\t"
- "paddh %[y02], %[y02], %[y12] \n\t"
- "paddh %[y22], %[y22], %[y12] \n\t"
- "paddh %[y02], %[y02], %[y22] \n\t"
- "pmaxsh %[y10], %[y00], %[y02] \n\t"
- "pminsh %[y20], %[y00], %[y02] \n\t"
- "psubh %[y00], %[y10], %[y20] \n\t"
- "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
- "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t"
- "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t"
- "daddiu %[src_y0], %[src_y0], 8 \n\t"
- "daddiu %[src_y1], %[src_y1], 8 \n\t"
- "daddiu %[src_y2], %[src_y2], 8 \n\t"
- "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10),
- [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22)
- : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2),
- [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero)
- : "memory");
- }
- void SobelYRow_MMI(const uint8_t* src_y0,
- const uint8_t* src_y1,
- uint8_t* dst_sobely,
- int width) {
- uint64_t y00 = 0, y01 = 0, y02 = 0;
- uint64_t y10 = 0, y11 = 0, y12 = 0;
- uint64_t zero = 0x0;
- uint64_t sobel = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
- "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
- "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1]
- "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2]
- "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
- "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i]
- "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
- "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1]
- "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2]
- "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y01], %[y01], %[zero] \n\t"
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y11], %[y11], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
- "paddh %[y00], %[y00], %[y01] \n\t" // a+b
- "paddh %[y02], %[y02], %[y01] \n\t" // c+b
- "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c
- "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub
- "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub
- "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub
- "pmaxsh %[y02], %[y00], %[y10] \n\t"
- "pminsh %[y12], %[y00], %[y10] \n\t"
- "psubh %[sobel], %[y02], %[y12] \n\t" // Abs
- "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
- "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
- "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t"
- "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
- "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
- "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
- "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
- "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t"
- "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
- "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y01], %[y01], %[zero] \n\t"
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y11], %[y11], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
- "paddh %[y00], %[y00], %[y01] \n\t"
- "paddh %[y02], %[y02], %[y01] \n\t"
- "paddh %[y00], %[y00], %[y02] \n\t"
- "paddh %[y10], %[y10], %[y11] \n\t"
- "paddh %[y12], %[y12], %[y11] \n\t"
- "paddh %[y10], %[y10], %[y12] \n\t"
- "pmaxsh %[y02], %[y00], %[y10] \n\t"
- "pminsh %[y12], %[y00], %[y10] \n\t"
- "psubh %[y00], %[y02], %[y12] \n\t"
- "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
- "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t"
- "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t"
- "daddiu %[src_y0], %[src_y0], 8 \n\t"
- "daddiu %[src_y1], %[src_y1], 8 \n\t"
- "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01),
- [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12)
- : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1),
- [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero)
- : "memory");
- }
- void SobelRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_argb,
- int width) {
- double temp[3];
- uint64_t c1 = 0xff000000ff000000;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i]
- "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
- "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t"
- // s7 s6 s5 s4 s3 s2 s1 s0 = a+b
- "paddusb %[t2] , %[t0], %[t1] \n\t"
- // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0
- "punpcklbh %[t0], %[t2], %[t2] \n\t"
- // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0
- "punpcklbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- // 255 s1 s1 s1 s55 s0 s0 s0
- "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t"
- // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2
- "punpckhbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- // 255 s3 s3 s3 255 s2 s2 s2
- "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t"
- // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4
- "punpckhbh %[t0], %[t2], %[t2] \n\t"
- // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4
- "punpcklbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t"
- // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6
- "punpckhbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t"
- "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
- : "memory");
- }
- void SobelToPlaneRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_y,
- int width) {
- uint64_t tr = 0;
- uint64_t tb = 0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t"
- "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t"
- "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i]
- "paddusb %[tr], %[tr], %[tb] \n\t" // g
- "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t"
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [tr] "=&f"(tr), [tb] "=&f"(tb)
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_y] "r"(dst_y), [width] "r"(width)
- : "memory");
- }
- void SobelXYRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_argb,
- int width) {
- uint64_t temp[3];
- uint64_t result = 0;
- uint64_t gb = 0;
- uint64_t cr = 0;
- uint64_t c1 = 0xffffffffffffffff;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t"
- "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
- "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t"
- "paddusb %[tg] , %[tr], %[tb] \n\t" // g
- // g3 b3 g2 b2 g1 b1 g0 b0
- "punpcklbh %[gb], %[tb], %[tg] \n\t"
- // c3 r3 r2 r2 c1 r1 c0 r0
- "punpcklbh %[cr], %[tr], %[c1] \n\t"
- // c1 r1 g1 b1 c0 r0 g0 b0
- "punpcklhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t"
- // c3 r3 g3 b3 c2 r2 g2 b2
- "punpckhhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t"
- // g7 b7 g6 b6 g5 b5 g4 b4
- "punpckhbh %[gb], %[tb], %[tg] \n\t"
- // c7 r7 c6 r6 c5 r5 c4 r4
- "punpckhbh %[cr], %[tr], %[c1] \n\t"
- // c5 r5 g5 b5 c4 r4 g4 b4
- "punpcklhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t"
- // c7 r7 g7 b7 c6 r6 g6 b6
- "punpckhhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t"
- "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]),
- [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result)
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
- : "memory");
- }
- void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
- // Copy a Y to RGB.
- uint64_t src, dest;
- const uint64_t mask0 = 0x00ffffff00ffffffULL;
- const uint64_t mask1 = ~mask0;
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src], %[src], %[src] \n\t"
- "punpcklhw %[dest], %[src], %[src] \n\t"
- "and %[dest], %[dest], %[mask0] \n\t"
- "or %[dest], %[dest], %[mask1] \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "punpckhhw %[dest], %[src], %[src] \n\t"
- "and %[dest], %[dest], %[mask0] \n\t"
- "or %[dest], %[dest], %[mask1] \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
- }
- void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
- uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x55;
- const uint64_t mask2 = 0xAA;
- const uint64_t mask3 = 0xFF;
- const uint64_t mask4 = 0x4A354A354A354A35ULL;
- const uint64_t mask5 = 0x0488048804880488ULL;
- const uint64_t shift0 = 0x08;
- const uint64_t shift1 = 0x06;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pshufh %[src], %[src_lo], %[mask0] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_lo], %[mask1] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "pshufh %[src], %[src_lo], %[mask2] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_lo], %[mask3] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "pshufh %[src], %[src_hi], %[mask0] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_hi], %[mask1] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
- "pshufh %[src], %[src_hi], %[mask2] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_hi], %[mask3] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
- "daddi %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo)
- : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
- [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0),
- [shift1] "f"(shift1), [width] "r"(width)
- : "memory");
- }
- void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, src0, src1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x1b;
- src += width - 1;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[source], 0(%[src_ptr]) \n\t"
- "gsldrc1 %[source], -7(%[src_ptr]) \n\t"
- "punpcklbh %[src0], %[source], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask1] \n\t"
- "punpckhbh %[src1], %[source], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "packushb %[dest], %[src1], %[src0] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddi %[src_ptr], %[src_ptr], -0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0),
- [src1] "=&f"(src1)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
- }
- void MirrorUVRow_MMI(const uint8_t* src_uv,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src0, src1, dest0, dest1;
- const uint64_t mask0 = 0x00ff00ff00ff00ffULL;
- const uint64_t mask1 = 0x1b;
- const uint64_t shift = 0x08;
- src_uv += (width - 1) << 1;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 1(%[src_ptr]) \n\t"
- "gsldrc1 %[src0], -6(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], -7(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], -14(%[src_ptr]) \n\t"
- "and %[dest0], %[src0], %[mask0] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "and %[dest1], %[src1], %[mask0] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest0], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t"
- "psrlh %[dest0], %[src0], %[shift] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "psrlh %[dest1], %[src1], %[shift] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest0], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t"
- "daddi %[src_ptr], %[src_ptr], -0x10 \n\t"
- "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t"
- "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0),
- [src1] "=&f"(src1)
- : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v),
- [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [shift] "f"(shift)
- : "memory");
- }
- void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- src += (width - 1) * 4;
- uint64_t temp = 0x0;
- uint64_t shuff = 0x4e; // 01 00 11 10
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[temp], 3(%[src]) \n\t"
- "gsldrc1 %[temp], -4(%[src]) \n\t"
- "pshufh %[temp], %[temp], %[shuff] \n\t"
- "gssdrc1 %[temp], 0x0(%[dst]) \n\t"
- "gssdlc1 %[temp], 0x7(%[dst]) \n\t"
- "daddiu %[src], %[src], -0x08 \n\t"
- "daddiu %[dst], %[dst], 0x08 \n\t"
- "daddiu %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [temp] "=&f"(temp)
- : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff)
- : "memory");
- }
- void SplitUVRow_MMI(const uint8_t* src_uv,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t"
- "and %[t2], %[t0], %[c0] \n\t"
- "and %[t3], %[t1], %[c0] \n\t"
- "packushb %[t2], %[t2], %[t3] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t"
- "psrlh %[t2], %[t0], %[shift] \n\t"
- "psrlh %[t3], %[t1], %[shift] \n\t"
- "packushb %[t2], %[t2], %[t3] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_uv], %[src_uv], 16 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [t3] "=&f"(temp[3])
- : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
- }
- void MergeUVRow_MMI(const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_uv,
- int width) {
- uint64_t temp[3];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x0(%[src_u]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_u]) \n\t"
- "gsldrc1 %[t1], 0x0(%[src_v]) \n\t"
- "gsldlc1 %[t1], 0x7(%[src_v]) \n\t"
- "punpcklbh %[t2], %[t0], %[t1] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t"
- "punpckhbh %[t2], %[t0], %[t1] \n\t"
- "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t"
- "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t"
- "daddiu %[src_u], %[src_u], 8 \n\t"
- "daddiu %[src_v], %[src_v], 8 \n\t"
- "daddiu %[dst_uv], %[dst_uv], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
- : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [width] "r"(width)
- : "memory");
- }
- void SplitRGBRow_MMI(const uint8_t* src_rgb,
- uint8_t* dst_r,
- uint8_t* dst_g,
- uint8_t* dst_b,
- int width) {
- uint64_t src[4];
- uint64_t dest_hi, dest_lo, dest;
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
- "punpcklbh %[dest_lo], %[src0], %[src1] \n\t"
- "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t"
- "punpcklbh %[dest_hi], %[src2], %[src3] \n\t"
- "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t"
- "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t"
- "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t"
- "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]),
- [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g),
- [dstb_ptr] "r"(dst_b), [width] "r"(width)
- : "memory");
- }
- void MergeRGBRow_MMI(const uint8_t* src_r,
- const uint8_t* src_g,
- const uint8_t* src_b,
- uint8_t* dst_rgb,
- int width) {
- uint64_t srcr, srcg, srcb, dest;
- uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo;
- const uint64_t temp = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t"
- "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t"
- "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t"
- "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t"
- "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t"
- "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t"
- "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t"
- "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t"
- "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t"
- "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t"
- "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
- "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t"
- "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
- "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t"
- "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
- "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
- "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t"
- "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t"
- "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t"
- "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb),
- [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi),
- [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi),
- [srcbz_lo] "=&f"(srcbz_lo)
- : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b),
- [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp)
- : "memory");
- }
- // Filter 2 rows of YUY2 UV's (422) into U and V (420).
- void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
- int src_stride_yuy2,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0xff00ff00ff00ff00;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t temp[3];
- uint64_t data[4];
- uint64_t shift = 0x08;
- uint64_t src_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t"
- "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
- "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c1] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
- "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
- "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t"
- "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c1] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
- [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
- : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
- : "memory");
- }
- // Copy row of YUY2 UV's (422) into U and V (422).
- void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0xff00ff00ff00ff00;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- uint64_t data[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c1] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
- "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c1] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
- [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
- : "memory");
- }
- // Copy row of YUY2 Y's (422) into Y (420/422).
- void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t"
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
- : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width),
- [c0] "f"(c0)
- : "memory");
- }
- // Filter 2 rows of UYVY UV's (422) into U and V (420).
- void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
- int src_stride_uyvy,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- // Output a row of UV values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[3];
- uint64_t data[4];
- uint64_t shift = 0x08;
- uint64_t src_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t"
- "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
- "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c0] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
- "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
- "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t"
- "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c0] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
- [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
- : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
- }
- // Copy row of UYVY UV's (422) into U and V (422).
- void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- // Output a row of UV values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- uint64_t data[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c0] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
- "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c0] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
- [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
- }
- // Copy row of UYVY Y's (422) into Y (420/422).
- void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
- // Output a row of Y values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t shift = 0x08;
- uint64_t temp[2];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
- "dsrl %[t0], %[t0], %[shift] \n\t"
- "dsrl %[t1], %[t1], %[shift] \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t"
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
- : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width),
- [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
- }
- // Blend src_argb0 over src_argb1 and store to dst_argb.
- // dst_argb may be src_argb0 or src_argb1.
- // This code mimics the SSSE3 version for better testability.
- void ARGBBlendRow_MMI(const uint8_t* src_argb0,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi,
- dest_lo;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL;
- const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
- const uint64_t mask3 = 0xFF;
- const uint64_t mask4 = ~mask1;
- const uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
- "psubush %[alpha], %[mask2], %[src0_lo] \n\t"
- "pshufh %[alpha], %[alpha], %[mask3] \n\t"
- "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t"
- "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
- "psubush %[alpha], %[mask2], %[src0_hi] \n\t"
- "pshufh %[alpha], %[alpha], %[mask3] \n\t"
- "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[mask4] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha),
- [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo)
- : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4),
- [shift] "f"(shift), [width] "r"(width)
- : "memory");
- }
- void BlendPlaneRow_MMI(const uint8_t* src0,
- const uint8_t* src1,
- const uint8_t* alpha,
- uint8_t* dst,
- int width) {
- uint64_t source0, source1, dest, alph;
- uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi,
- dest_lo;
- uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL;
- const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
- const uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
- "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
- "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t"
- "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t"
- "psubusb %[alpha_r], %[mask1], %[alpha] \n\t"
- "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t"
- "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t"
- "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t"
- "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t"
- "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t"
- "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[dest] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t"
- "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[dest] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph),
- [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi),
- [alpha_r] "=&f"(alpha_rev)
- : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha),
- [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width)
- : "memory");
- }
- // Multiply source RGB by alpha and store to destination.
- // This code mimics the SSSE3 version for better testability.
- void ARGBAttenuateRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- int width) {
- uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha;
- const uint64_t mask0 = 0xFF;
- const uint64_t mask1 = 0xFF000000FF000000ULL;
- const uint64_t mask2 = ~mask1;
- const uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[src] \n\t"
- "punpckhbh %[src_hi], %[src], %[src] \n\t"
- "pshufh %[alpha], %[src_lo], %[mask0] \n\t"
- "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "pshufh %[alpha], %[src_hi], %[mask0] \n\t"
- "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask2] \n\t"
- "and %[src], %[src], %[mask1] \n\t"
- "or %[dest], %[dest], %[src] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift),
- [width] "r"(width)
- : "memory");
- }
- void ComputeCumulativeSumRow_MMI(const uint8_t* row,
- int32_t* cumsum,
- const int32_t* previous_cumsum,
- int width) {
- int64_t row_sum[2] = {0, 0};
- uint64_t src, dest0, dest1, presrc0, presrc1, dest;
- const uint64_t mask = 0x0;
- __asm__ volatile(
- "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t"
- "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t"
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t"
- "punpcklbh %[src], %[src], %[mask] \n\t"
- "punpcklhw %[dest0], %[src], %[mask] \n\t"
- "punpckhhw %[dest1], %[src], %[mask] \n\t"
- "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t"
- "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t"
- "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t"
- "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t"
- "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t"
- "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t"
- "paddw %[dest0], %[row_sum0], %[presrc0] \n\t"
- "paddw %[dest1], %[row_sum1], %[presrc1] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
- "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t"
- "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x01 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]),
- [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0),
- [presrc1] "=&f"(presrc1)
- : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum),
- [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask)
- : "memory");
- }
- // C version 2x2 -> 2x1.
- void InterpolateRow_MMI(uint8_t* dst_ptr,
- const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- int width,
- int source_y_fraction) {
- if (source_y_fraction == 0) {
- __asm__ volatile(
- "1: \n\t"
- "ld $t0, 0x0(%[src_ptr]) \n\t"
- "sd $t0, 0x0(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- :
- : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width)
- : "memory");
- return;
- }
- if (source_y_fraction == 128) {
- uint64_t uv = 0x0;
- uint64_t uv_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t"
- "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t"
- "daddu $t0, %[src_ptr], %[stride] \n\t"
- "gsldrc1 %[uv_stride], 0x0($t0) \n\t"
- "gsldlc1 %[uv_stride], 0x7($t0) \n\t"
- "pavgb %[uv], %[uv], %[uv_stride] \n\t"
- "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t"
- "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width),
- [stride] "r"((int64_t)src_stride)
- : "memory");
- return;
- }
- const uint8_t* src_ptr1 = src_ptr + src_stride;
- uint64_t temp;
- uint64_t data[4];
- uint64_t zero = 0x0;
- uint64_t c0 = 0x0080008000800080;
- uint64_t fy0 = 0x0100010001000100;
- uint64_t shift = 0x8;
- __asm__ volatile(
- "pshufh %[fy1], %[fy1], %[zero] \n\t"
- "psubh %[fy0], %[fy0], %[fy1] \n\t"
- "1: \n\t"
- "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t"
- "punpcklbh %[d0], %[t0], %[zero] \n\t"
- "punpckhbh %[d1], %[t0], %[zero] \n\t"
- "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t"
- "punpcklbh %[d2], %[t0], %[zero] \n\t"
- "punpckhbh %[d3], %[t0], %[zero] \n\t"
- "pmullh %[d0], %[d0], %[fy0] \n\t"
- "pmullh %[d2], %[d2], %[fy1] \n\t"
- "paddh %[d0], %[d0], %[d2] \n\t"
- "paddh %[d0], %[d0], %[c0] \n\t"
- "psrlh %[d0], %[d0], %[shift] \n\t"
- "pmullh %[d1], %[d1], %[fy0] \n\t"
- "pmullh %[d3], %[d3], %[fy1] \n\t"
- "paddh %[d1], %[d1], %[d3] \n\t"
- "paddh %[d1], %[d1], %[c0] \n\t"
- "psrlh %[d1], %[d1], %[shift] \n\t"
- "packushb %[d0], %[d0], %[d1] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]),
- [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1),
- [dst_ptr] "r"(dst_ptr), [width] "r"(width),
- [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0),
- [shift] "f"(shift), [zero] "f"(zero)
- : "memory");
- }
- // Use first 4 shuffler values to reorder ARGB channels.
- void ARGBShuffleRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- const uint8_t* shuffler,
- int width) {
- uint64_t source, dest0, dest1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) |
- ((shuffler[2] & 0x03) << 4) |
- ((shuffler[3] & 0x03) << 6);
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[dest0], %[src], %[mask0] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "punpckhbh %[dest1], %[src], %[mask0] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
- }
- void I422ToYUY2Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_frame,
- int width) {
- uint64_t temp[3];
- uint64_t vu = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
- "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
- "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
- "punpcklbh %[tu], %[ty], %[vu] \n\t" // g
- "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
- "punpckhbh %[tu], %[ty], %[vu] \n\t" // g
- "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
- "daddiu %[src_y], %[src_y], 8 \n\t"
- "daddiu %[src_u], %[src_u], 4 \n\t"
- "daddiu %[src_v], %[src_v], 4 \n\t"
- "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
- [vu] "=&f"(vu)
- : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [dst_frame] "r"(dst_frame), [width] "r"(width)
- : "memory");
- }
- void I422ToUYVYRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_frame,
- int width) {
- uint64_t temp[3];
- uint64_t vu = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
- "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
- "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
- "punpcklbh %[tu], %[vu], %[ty] \n\t" // g
- "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
- "punpckhbh %[tu], %[vu], %[ty] \n\t" // g
- "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
- "daddiu %[src_y], %[src_y], 8 \n\t"
- "daddiu %[src_u], %[src_u], 4 \n\t"
- "daddiu %[src_v], %[src_v], 4 \n\t"
- "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
- [vu] "=&f"(vu)
- : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [dst_frame] "r"(dst_frame), [width] "r"(width)
- : "memory");
- }
- void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, dest;
- const uint64_t mask0 = 0xff000000ff000000ULL;
- const uint64_t mask1 = ~mask0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "and %[src], %[src], %[mask0] \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[src], %[dest] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
- }
- void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_a,
- int width) {
- uint64_t src, dest0, dest1, dest_lo, dest_hi, dest;
- const uint64_t mask = 0xff000000ff000000ULL;
- const uint64_t shift = 0x18;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "and %[dest0], %[src], %[mask] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "and %[dest1], %[src], %[mask] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
- "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
- "and %[dest0], %[src], %[mask] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
- "and %[dest1], %[src], %[mask] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask),
- [shift] "f"(shift), [width] "r"(width)
- : "memory");
- }
- void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, dest0, dest1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x00ffffff00ffffffULL;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[dest0], %[mask0], %[src] \n\t"
- "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "punpckhbh %[dest0], %[mask0], %[src] \n\t"
- "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
- "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
- }
- #endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
- #ifdef __cplusplus
- } // extern "C"
- } // namespace libyuv
- #endif
|