vp9_encodeframe.c 242 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <float.h>
  11. #include <limits.h>
  12. #include <math.h>
  13. #include <stdio.h>
  14. #include "./vp9_rtcd.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "./vpx_config.h"
  17. #include "vpx_dsp/vpx_dsp_common.h"
  18. #include "vpx_ports/mem.h"
  19. #include "vpx_ports/vpx_timer.h"
  20. #include "vpx_ports/system_state.h"
  21. #if CONFIG_MISMATCH_DEBUG
  22. #include "vpx_util/vpx_debug_util.h"
  23. #endif // CONFIG_MISMATCH_DEBUG
  24. #include "vp9/common/vp9_common.h"
  25. #include "vp9/common/vp9_entropy.h"
  26. #include "vp9/common/vp9_entropymode.h"
  27. #include "vp9/common/vp9_idct.h"
  28. #include "vp9/common/vp9_mvref_common.h"
  29. #include "vp9/common/vp9_pred_common.h"
  30. #include "vp9/common/vp9_quant_common.h"
  31. #include "vp9/common/vp9_reconintra.h"
  32. #include "vp9/common/vp9_reconinter.h"
  33. #include "vp9/common/vp9_seg_common.h"
  34. #include "vp9/common/vp9_tile_common.h"
  35. #if !CONFIG_REALTIME_ONLY
  36. #include "vp9/encoder/vp9_aq_360.h"
  37. #include "vp9/encoder/vp9_aq_complexity.h"
  38. #endif
  39. #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  40. #if !CONFIG_REALTIME_ONLY
  41. #include "vp9/encoder/vp9_aq_variance.h"
  42. #endif
  43. #include "vp9/encoder/vp9_encodeframe.h"
  44. #include "vp9/encoder/vp9_encodemb.h"
  45. #include "vp9/encoder/vp9_encodemv.h"
  46. #include "vp9/encoder/vp9_ethread.h"
  47. #include "vp9/encoder/vp9_extend.h"
  48. #include "vp9/encoder/vp9_multi_thread.h"
  49. #include "vp9/encoder/vp9_partition_models.h"
  50. #include "vp9/encoder/vp9_pickmode.h"
  51. #include "vp9/encoder/vp9_rd.h"
  52. #include "vp9/encoder/vp9_rdopt.h"
  53. #include "vp9/encoder/vp9_segmentation.h"
  54. #include "vp9/encoder/vp9_tokenize.h"
  55. static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
  56. int output_enabled, int mi_row, int mi_col,
  57. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
  58. // This is used as a reference when computing the source variance for the
  59. // purpose of activity masking.
  60. // Eventually this should be replaced by custom no-reference routines,
  61. // which will be faster.
  62. static const uint8_t VP9_VAR_OFFS[64] = {
  63. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  64. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  65. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  66. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  67. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
  68. };
  69. #if CONFIG_VP9_HIGHBITDEPTH
  70. static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
  71. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  72. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  73. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  74. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  75. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
  76. };
  77. static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
  78. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  79. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  80. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  81. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  82. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  83. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  84. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  85. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
  86. };
  87. static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
  88. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  89. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  90. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  91. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  92. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  93. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  94. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  95. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  96. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  97. 128 * 16
  98. };
  99. #endif // CONFIG_VP9_HIGHBITDEPTH
  100. unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref,
  101. BLOCK_SIZE bs) {
  102. unsigned int sse;
  103. const unsigned int var =
  104. cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
  105. return var;
  106. }
  107. #if CONFIG_VP9_HIGHBITDEPTH
  108. unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref,
  109. BLOCK_SIZE bs, int bd) {
  110. unsigned int var, sse;
  111. switch (bd) {
  112. case 10:
  113. var =
  114. cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
  115. CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
  116. break;
  117. case 12:
  118. var =
  119. cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
  120. CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
  121. break;
  122. case 8:
  123. default:
  124. var =
  125. cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
  126. CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
  127. break;
  128. }
  129. return var;
  130. }
  131. #endif // CONFIG_VP9_HIGHBITDEPTH
  132. unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
  133. const struct buf_2d *ref,
  134. BLOCK_SIZE bs) {
  135. return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs),
  136. num_pels_log2_lookup[bs]);
  137. }
  138. #if CONFIG_VP9_HIGHBITDEPTH
  139. unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
  140. const struct buf_2d *ref,
  141. BLOCK_SIZE bs, int bd) {
  142. return (unsigned int)ROUND64_POWER_OF_TWO(
  143. (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd),
  144. num_pels_log2_lookup[bs]);
  145. }
  146. #endif // CONFIG_VP9_HIGHBITDEPTH
  147. #if !CONFIG_REALTIME_ONLY
  148. static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
  149. const struct buf_2d *ref,
  150. int mi_row, int mi_col,
  151. BLOCK_SIZE bs) {
  152. unsigned int sse, var;
  153. uint8_t *last_y;
  154. const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
  155. assert(last != NULL);
  156. last_y =
  157. &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
  158. var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
  159. return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
  160. }
  161. static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
  162. int mi_row, int mi_col) {
  163. unsigned int var = get_sby_perpixel_diff_variance(
  164. cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
  165. if (var < 8)
  166. return BLOCK_64X64;
  167. else if (var < 128)
  168. return BLOCK_32X32;
  169. else if (var < 2048)
  170. return BLOCK_16X16;
  171. else
  172. return BLOCK_8X8;
  173. }
  174. #endif // !CONFIG_REALTIME_ONLY
  175. static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row,
  176. int mi_col, BLOCK_SIZE bsize, int segment_index) {
  177. VP9_COMMON *const cm = &cpi->common;
  178. const struct segmentation *const seg = &cm->seg;
  179. MACROBLOCKD *const xd = &x->e_mbd;
  180. MODE_INFO *mi = xd->mi[0];
  181. const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
  182. const uint8_t *const map =
  183. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  184. // Initialize the segmentation index as 0.
  185. mi->segment_id = 0;
  186. // Skip the rest if AQ mode is disabled.
  187. if (!seg->enabled) return;
  188. switch (aq_mode) {
  189. case CYCLIC_REFRESH_AQ:
  190. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  191. break;
  192. #if !CONFIG_REALTIME_ONLY
  193. case VARIANCE_AQ:
  194. if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
  195. cpi->force_update_segmentation ||
  196. (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
  197. int min_energy;
  198. int max_energy;
  199. // Get sub block energy range
  200. if (bsize >= BLOCK_32X32) {
  201. vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
  202. &max_energy);
  203. } else {
  204. min_energy = bsize <= BLOCK_16X16 ? x->mb_energy
  205. : vp9_block_energy(cpi, x, bsize);
  206. }
  207. mi->segment_id = vp9_vaq_segment_id(min_energy);
  208. } else {
  209. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  210. }
  211. break;
  212. case EQUATOR360_AQ:
  213. if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation)
  214. mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
  215. else
  216. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  217. break;
  218. #endif
  219. case LOOKAHEAD_AQ:
  220. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  221. break;
  222. case PSNR_AQ: mi->segment_id = segment_index; break;
  223. case PERCEPTUAL_AQ: mi->segment_id = x->segment_id; break;
  224. default:
  225. // NO_AQ or PSNR_AQ
  226. break;
  227. }
  228. // Set segment index from ROI map if it's enabled.
  229. if (cpi->roi.enabled)
  230. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  231. vp9_init_plane_quantizers(cpi, x);
  232. }
  233. // Lighter version of set_offsets that only sets the mode info
  234. // pointers.
  235. static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
  236. MACROBLOCK *const x,
  237. MACROBLOCKD *const xd, int mi_row,
  238. int mi_col) {
  239. const int idx_str = xd->mi_stride * mi_row + mi_col;
  240. xd->mi = cm->mi_grid_visible + idx_str;
  241. xd->mi[0] = cm->mi + idx_str;
  242. x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
  243. }
  244. static void set_ssim_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
  245. const BLOCK_SIZE bsize, const int mi_row,
  246. const int mi_col, int *const rdmult) {
  247. const VP9_COMMON *const cm = &cpi->common;
  248. const int bsize_base = BLOCK_16X16;
  249. const int num_8x8_w = num_8x8_blocks_wide_lookup[bsize_base];
  250. const int num_8x8_h = num_8x8_blocks_high_lookup[bsize_base];
  251. const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
  252. const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
  253. const int num_bcols =
  254. (num_8x8_blocks_wide_lookup[bsize] + num_8x8_w - 1) / num_8x8_w;
  255. const int num_brows =
  256. (num_8x8_blocks_high_lookup[bsize] + num_8x8_h - 1) / num_8x8_h;
  257. int row, col;
  258. double num_of_mi = 0.0;
  259. double geom_mean_of_scale = 0.0;
  260. assert(cpi->oxcf.tuning == VP8_TUNE_SSIM);
  261. for (row = mi_row / num_8x8_w;
  262. row < num_rows && row < mi_row / num_8x8_w + num_brows; ++row) {
  263. for (col = mi_col / num_8x8_h;
  264. col < num_cols && col < mi_col / num_8x8_h + num_bcols; ++col) {
  265. const int index = row * num_cols + col;
  266. geom_mean_of_scale += log(cpi->mi_ssim_rdmult_scaling_factors[index]);
  267. num_of_mi += 1.0;
  268. }
  269. }
  270. geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);
  271. *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale);
  272. *rdmult = VPXMAX(*rdmult, 0);
  273. set_error_per_bit(x, *rdmult);
  274. vpx_clear_system_state();
  275. }
  276. static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
  277. MACROBLOCK *const x, int mi_row, int mi_col,
  278. BLOCK_SIZE bsize) {
  279. VP9_COMMON *const cm = &cpi->common;
  280. const VP9EncoderConfig *const oxcf = &cpi->oxcf;
  281. MACROBLOCKD *const xd = &x->e_mbd;
  282. const int mi_width = num_8x8_blocks_wide_lookup[bsize];
  283. const int mi_height = num_8x8_blocks_high_lookup[bsize];
  284. MvLimits *const mv_limits = &x->mv_limits;
  285. set_skip_context(xd, mi_row, mi_col);
  286. set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
  287. // Set up destination pointers.
  288. vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
  289. // Set up limit values for MV components.
  290. // Mv beyond the range do not produce new/different prediction block.
  291. mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
  292. mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
  293. mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
  294. mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
  295. // Set up distance of MB to edge of frame in 1/8th pel units.
  296. assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
  297. set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
  298. cm->mi_cols);
  299. // Set up source buffers.
  300. vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
  301. // R/D setup.
  302. x->rddiv = cpi->rd.RDDIV;
  303. x->rdmult = cpi->rd.RDMULT;
  304. if (oxcf->tuning == VP8_TUNE_SSIM) {
  305. set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
  306. }
  307. // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
  308. xd->tile = *tile;
  309. }
  310. static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
  311. int mi_row, int mi_col,
  312. BLOCK_SIZE bsize) {
  313. const int block_width =
  314. VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
  315. const int block_height =
  316. VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
  317. const int mi_stride = xd->mi_stride;
  318. MODE_INFO *const src_mi = xd->mi[0];
  319. int i, j;
  320. for (j = 0; j < block_height; ++j)
  321. for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
  322. }
  323. static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
  324. MACROBLOCKD *const xd, int mi_row, int mi_col,
  325. BLOCK_SIZE bsize) {
  326. if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
  327. set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
  328. xd->mi[0]->sb_type = bsize;
  329. }
  330. }
  331. typedef struct {
  332. // This struct is used for computing variance in choose_partitioning(), where
  333. // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even
  334. // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16
  335. // * 16 = 2^32).
  336. uint32_t sum_square_error;
  337. int32_t sum_error;
  338. int log2_count;
  339. int variance;
  340. } var;
  341. typedef struct {
  342. var none;
  343. var horz[2];
  344. var vert[2];
  345. } partition_variance;
  346. typedef struct {
  347. partition_variance part_variances;
  348. var split[4];
  349. } v4x4;
  350. typedef struct {
  351. partition_variance part_variances;
  352. v4x4 split[4];
  353. } v8x8;
  354. typedef struct {
  355. partition_variance part_variances;
  356. v8x8 split[4];
  357. } v16x16;
  358. typedef struct {
  359. partition_variance part_variances;
  360. v16x16 split[4];
  361. } v32x32;
  362. typedef struct {
  363. partition_variance part_variances;
  364. v32x32 split[4];
  365. } v64x64;
  366. typedef struct {
  367. partition_variance *part_variances;
  368. var *split[4];
  369. } variance_node;
  370. typedef enum {
  371. V16X16,
  372. V32X32,
  373. V64X64,
  374. } TREE_LEVEL;
  375. static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
  376. int i;
  377. node->part_variances = NULL;
  378. switch (bsize) {
  379. case BLOCK_64X64: {
  380. v64x64 *vt = (v64x64 *)data;
  381. node->part_variances = &vt->part_variances;
  382. for (i = 0; i < 4; i++)
  383. node->split[i] = &vt->split[i].part_variances.none;
  384. break;
  385. }
  386. case BLOCK_32X32: {
  387. v32x32 *vt = (v32x32 *)data;
  388. node->part_variances = &vt->part_variances;
  389. for (i = 0; i < 4; i++)
  390. node->split[i] = &vt->split[i].part_variances.none;
  391. break;
  392. }
  393. case BLOCK_16X16: {
  394. v16x16 *vt = (v16x16 *)data;
  395. node->part_variances = &vt->part_variances;
  396. for (i = 0; i < 4; i++)
  397. node->split[i] = &vt->split[i].part_variances.none;
  398. break;
  399. }
  400. case BLOCK_8X8: {
  401. v8x8 *vt = (v8x8 *)data;
  402. node->part_variances = &vt->part_variances;
  403. for (i = 0; i < 4; i++)
  404. node->split[i] = &vt->split[i].part_variances.none;
  405. break;
  406. }
  407. default: {
  408. v4x4 *vt = (v4x4 *)data;
  409. assert(bsize == BLOCK_4X4);
  410. node->part_variances = &vt->part_variances;
  411. for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
  412. break;
  413. }
  414. }
  415. }
  416. // Set variance values given sum square error, sum error, count.
  417. static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
  418. v->sum_square_error = s2;
  419. v->sum_error = s;
  420. v->log2_count = c;
  421. }
  422. static void get_variance(var *v) {
  423. v->variance =
  424. (int)(256 * (v->sum_square_error -
  425. (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
  426. v->log2_count)) >>
  427. v->log2_count);
  428. }
  429. static void sum_2_variances(const var *a, const var *b, var *r) {
  430. assert(a->log2_count == b->log2_count);
  431. fill_variance(a->sum_square_error + b->sum_square_error,
  432. a->sum_error + b->sum_error, a->log2_count + 1, r);
  433. }
  434. static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
  435. variance_node node;
  436. memset(&node, 0, sizeof(node));
  437. tree_to_node(data, bsize, &node);
  438. sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
  439. sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
  440. sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
  441. sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
  442. sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
  443. &node.part_variances->none);
  444. }
  445. static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
  446. MACROBLOCKD *const xd, void *data,
  447. BLOCK_SIZE bsize, int mi_row, int mi_col,
  448. int64_t threshold, BLOCK_SIZE bsize_min,
  449. int force_split) {
  450. VP9_COMMON *const cm = &cpi->common;
  451. variance_node vt;
  452. const int block_width = num_8x8_blocks_wide_lookup[bsize];
  453. const int block_height = num_8x8_blocks_high_lookup[bsize];
  454. assert(block_height == block_width);
  455. tree_to_node(data, bsize, &vt);
  456. if (force_split == 1) return 0;
  457. // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
  458. // variance is below threshold, otherwise split will be selected.
  459. // No check for vert/horiz split as too few samples for variance.
  460. if (bsize == bsize_min) {
  461. // Variance already computed to set the force_split.
  462. if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
  463. if (mi_col + block_width / 2 < cm->mi_cols &&
  464. mi_row + block_height / 2 < cm->mi_rows &&
  465. vt.part_variances->none.variance < threshold) {
  466. set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
  467. return 1;
  468. }
  469. return 0;
  470. } else if (bsize > bsize_min) {
  471. // Variance already computed to set the force_split.
  472. if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
  473. // For key frame: take split for bsize above 32X32 or very high variance.
  474. if (frame_is_intra_only(cm) &&
  475. (bsize > BLOCK_32X32 ||
  476. vt.part_variances->none.variance > (threshold << 4))) {
  477. return 0;
  478. }
  479. // If variance is low, take the bsize (no split).
  480. if (mi_col + block_width / 2 < cm->mi_cols &&
  481. mi_row + block_height / 2 < cm->mi_rows &&
  482. vt.part_variances->none.variance < threshold) {
  483. set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
  484. return 1;
  485. }
  486. // Check vertical split.
  487. if (mi_row + block_height / 2 < cm->mi_rows) {
  488. BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
  489. get_variance(&vt.part_variances->vert[0]);
  490. get_variance(&vt.part_variances->vert[1]);
  491. if (vt.part_variances->vert[0].variance < threshold &&
  492. vt.part_variances->vert[1].variance < threshold &&
  493. get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
  494. set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
  495. set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
  496. return 1;
  497. }
  498. }
  499. // Check horizontal split.
  500. if (mi_col + block_width / 2 < cm->mi_cols) {
  501. BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
  502. get_variance(&vt.part_variances->horz[0]);
  503. get_variance(&vt.part_variances->horz[1]);
  504. if (vt.part_variances->horz[0].variance < threshold &&
  505. vt.part_variances->horz[1].variance < threshold &&
  506. get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
  507. set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
  508. set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
  509. return 1;
  510. }
  511. }
  512. return 0;
  513. }
  514. return 0;
  515. }
  516. static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
  517. int width, int height,
  518. int content_state) {
  519. if (speed >= 8) {
  520. if (width <= 640 && height <= 480)
  521. return (5 * threshold_base) >> 2;
  522. else if ((content_state == kLowSadLowSumdiff) ||
  523. (content_state == kHighSadLowSumdiff) ||
  524. (content_state == kLowVarHighSumdiff))
  525. return (5 * threshold_base) >> 2;
  526. } else if (speed == 7) {
  527. if ((content_state == kLowSadLowSumdiff) ||
  528. (content_state == kHighSadLowSumdiff) ||
  529. (content_state == kLowVarHighSumdiff)) {
  530. return (5 * threshold_base) >> 2;
  531. }
  532. }
  533. return threshold_base;
  534. }
  535. // Set the variance split thresholds for following the block sizes:
  536. // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
  537. // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
  538. // currently only used on key frame.
  539. static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
  540. int content_state) {
  541. VP9_COMMON *const cm = &cpi->common;
  542. const int is_key_frame = frame_is_intra_only(cm);
  543. const int threshold_multiplier =
  544. is_key_frame ? 20 : cpi->sf.variance_part_thresh_mult;
  545. int64_t threshold_base =
  546. (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
  547. if (is_key_frame) {
  548. thresholds[0] = threshold_base;
  549. thresholds[1] = threshold_base >> 2;
  550. thresholds[2] = threshold_base >> 2;
  551. thresholds[3] = threshold_base << 2;
  552. } else {
  553. // Increase base variance threshold based on estimated noise level.
  554. if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
  555. NOISE_LEVEL noise_level =
  556. vp9_noise_estimate_extract_level(&cpi->noise_estimate);
  557. if (noise_level == kHigh)
  558. threshold_base = 3 * threshold_base;
  559. else if (noise_level == kMedium)
  560. threshold_base = threshold_base << 1;
  561. else if (noise_level < kLow)
  562. threshold_base = (7 * threshold_base) >> 3;
  563. }
  564. #if CONFIG_VP9_TEMPORAL_DENOISING
  565. if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
  566. cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
  567. threshold_base =
  568. vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
  569. content_state, cpi->svc.temporal_layer_id);
  570. else
  571. threshold_base =
  572. scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
  573. cm->height, content_state);
  574. #else
  575. // Increase base variance threshold based on content_state/sum_diff level.
  576. threshold_base = scale_part_thresh_sumdiff(
  577. threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
  578. #endif
  579. thresholds[0] = threshold_base;
  580. thresholds[2] = threshold_base << cpi->oxcf.speed;
  581. if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7)
  582. thresholds[2] = thresholds[2] << 1;
  583. if (cm->width <= 352 && cm->height <= 288) {
  584. thresholds[0] = threshold_base >> 3;
  585. thresholds[1] = threshold_base >> 1;
  586. thresholds[2] = threshold_base << 3;
  587. } else if (cm->width < 1280 && cm->height < 720) {
  588. thresholds[1] = (5 * threshold_base) >> 2;
  589. } else if (cm->width < 1920 && cm->height < 1080) {
  590. thresholds[1] = threshold_base << 1;
  591. } else {
  592. thresholds[1] = (5 * threshold_base) >> 1;
  593. }
  594. if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX;
  595. }
  596. }
  597. void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
  598. int content_state) {
  599. VP9_COMMON *const cm = &cpi->common;
  600. SPEED_FEATURES *const sf = &cpi->sf;
  601. const int is_key_frame = frame_is_intra_only(cm);
  602. if (sf->partition_search_type != VAR_BASED_PARTITION &&
  603. sf->partition_search_type != REFERENCE_PARTITION) {
  604. return;
  605. } else {
  606. set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
  607. // The thresholds below are not changed locally.
  608. if (is_key_frame) {
  609. cpi->vbp_threshold_sad = 0;
  610. cpi->vbp_threshold_copy = 0;
  611. cpi->vbp_bsize_min = BLOCK_8X8;
  612. } else {
  613. if (cm->width <= 352 && cm->height <= 288)
  614. cpi->vbp_threshold_sad = 10;
  615. else
  616. cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
  617. ? (cpi->y_dequant[q][1] << 1)
  618. : 1000;
  619. cpi->vbp_bsize_min = BLOCK_16X16;
  620. if (cm->width <= 352 && cm->height <= 288)
  621. cpi->vbp_threshold_copy = 4000;
  622. else if (cm->width <= 640 && cm->height <= 360)
  623. cpi->vbp_threshold_copy = 8000;
  624. else
  625. cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
  626. ? (cpi->y_dequant[q][1] << 3)
  627. : 8000;
  628. if (cpi->rc.high_source_sad ||
  629. (cpi->use_svc && cpi->svc.high_source_sad_superframe)) {
  630. cpi->vbp_threshold_sad = 0;
  631. cpi->vbp_threshold_copy = 0;
  632. }
  633. }
  634. cpi->vbp_threshold_minmax = 15 + (q >> 3);
  635. }
  636. }
  637. // Compute the minmax over the 8x8 subblocks.
  638. static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
  639. int dp, int x16_idx, int y16_idx,
  640. #if CONFIG_VP9_HIGHBITDEPTH
  641. int highbd_flag,
  642. #endif
  643. int pixels_wide, int pixels_high) {
  644. int k;
  645. int minmax_max = 0;
  646. int minmax_min = 255;
  647. // Loop over the 4 8x8 subblocks.
  648. for (k = 0; k < 4; k++) {
  649. int x8_idx = x16_idx + ((k & 1) << 3);
  650. int y8_idx = y16_idx + ((k >> 1) << 3);
  651. int min = 0;
  652. int max = 0;
  653. if (x8_idx < pixels_wide && y8_idx < pixels_high) {
  654. #if CONFIG_VP9_HIGHBITDEPTH
  655. if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
  656. vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
  657. d + y8_idx * dp + x8_idx, dp, &min, &max);
  658. } else {
  659. vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
  660. dp, &min, &max);
  661. }
  662. #else
  663. vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
  664. &min, &max);
  665. #endif
  666. if ((max - min) > minmax_max) minmax_max = (max - min);
  667. if ((max - min) < minmax_min) minmax_min = (max - min);
  668. }
  669. }
  670. return (minmax_max - minmax_min);
  671. }
  672. static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
  673. int dp, int x8_idx, int y8_idx, v8x8 *vst,
  674. #if CONFIG_VP9_HIGHBITDEPTH
  675. int highbd_flag,
  676. #endif
  677. int pixels_wide, int pixels_high,
  678. int is_key_frame) {
  679. int k;
  680. for (k = 0; k < 4; k++) {
  681. int x4_idx = x8_idx + ((k & 1) << 2);
  682. int y4_idx = y8_idx + ((k >> 1) << 2);
  683. unsigned int sse = 0;
  684. int sum = 0;
  685. if (x4_idx < pixels_wide && y4_idx < pixels_high) {
  686. int s_avg;
  687. int d_avg = 128;
  688. #if CONFIG_VP9_HIGHBITDEPTH
  689. if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
  690. s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
  691. if (!is_key_frame)
  692. d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
  693. } else {
  694. s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
  695. if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
  696. }
  697. #else
  698. s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
  699. if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
  700. #endif
  701. sum = s_avg - d_avg;
  702. sse = sum * sum;
  703. }
  704. fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  705. }
  706. }
  707. static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
  708. int dp, int x16_idx, int y16_idx, v16x16 *vst,
  709. #if CONFIG_VP9_HIGHBITDEPTH
  710. int highbd_flag,
  711. #endif
  712. int pixels_wide, int pixels_high,
  713. int is_key_frame) {
  714. int k;
  715. for (k = 0; k < 4; k++) {
  716. int x8_idx = x16_idx + ((k & 1) << 3);
  717. int y8_idx = y16_idx + ((k >> 1) << 3);
  718. unsigned int sse = 0;
  719. int sum = 0;
  720. if (x8_idx < pixels_wide && y8_idx < pixels_high) {
  721. int s_avg;
  722. int d_avg = 128;
  723. #if CONFIG_VP9_HIGHBITDEPTH
  724. if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
  725. s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
  726. if (!is_key_frame)
  727. d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
  728. } else {
  729. s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
  730. if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
  731. }
  732. #else
  733. s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
  734. if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
  735. #endif
  736. sum = s_avg - d_avg;
  737. sse = sum * sum;
  738. }
  739. fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  740. }
  741. }
  742. // Check if most of the superblock is skin content, and if so, force split to
  743. // 32x32, and set x->sb_is_skin for use in mode selection.
  744. static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
  745. int mi_row, int mi_col, int *force_split) {
  746. VP9_COMMON *const cm = &cpi->common;
  747. #if CONFIG_VP9_HIGHBITDEPTH
  748. if (cm->use_highbitdepth) return 0;
  749. #endif
  750. // Avoid checking superblocks on/near boundary and avoid low resolutions.
  751. // Note superblock may still pick 64X64 if y_sad is very small
  752. // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
  753. if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
  754. mi_row + 8 < cm->mi_rows)) {
  755. int num_16x16_skin = 0;
  756. int num_16x16_nonskin = 0;
  757. uint8_t *ysignal = x->plane[0].src.buf;
  758. uint8_t *usignal = x->plane[1].src.buf;
  759. uint8_t *vsignal = x->plane[2].src.buf;
  760. int sp = x->plane[0].src.stride;
  761. int spuv = x->plane[1].src.stride;
  762. const int block_index = mi_row * cm->mi_cols + mi_col;
  763. const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
  764. const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
  765. const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
  766. const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
  767. // Loop through the 16x16 sub-blocks.
  768. int i, j;
  769. for (i = 0; i < ymis; i += 2) {
  770. for (j = 0; j < xmis; j += 2) {
  771. int bl_index = block_index + i * cm->mi_cols + j;
  772. int is_skin = cpi->skin_map[bl_index];
  773. num_16x16_skin += is_skin;
  774. num_16x16_nonskin += (1 - is_skin);
  775. if (num_16x16_nonskin > 3) {
  776. // Exit loop if at least 4 of the 16x16 blocks are not skin.
  777. i = ymis;
  778. break;
  779. }
  780. ysignal += 16;
  781. usignal += 8;
  782. vsignal += 8;
  783. }
  784. ysignal += (sp << 4) - 64;
  785. usignal += (spuv << 3) - 32;
  786. vsignal += (spuv << 3) - 32;
  787. }
  788. if (num_16x16_skin > 12) {
  789. *force_split = 1;
  790. return 1;
  791. }
  792. }
  793. return 0;
  794. }
  795. static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
  796. v64x64 *vt, int64_t thresholds[],
  797. MV_REFERENCE_FRAME ref_frame_partition,
  798. int mi_col, int mi_row) {
  799. int i, j;
  800. VP9_COMMON *const cm = &cpi->common;
  801. const int mv_thr = cm->width > 640 ? 8 : 4;
  802. // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
  803. // int_pro mv is small. If the temporal variance is small set the flag
  804. // variance_low for the block. The variance threshold can be adjusted, the
  805. // higher the more aggressive.
  806. if (ref_frame_partition == LAST_FRAME &&
  807. (cpi->sf.short_circuit_low_temp_var == 1 ||
  808. (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
  809. xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
  810. xd->mi[0]->mv[0].as_mv.row < mv_thr &&
  811. xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
  812. if (xd->mi[0]->sb_type == BLOCK_64X64) {
  813. if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
  814. x->variance_low[0] = 1;
  815. } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
  816. for (i = 0; i < 2; i++) {
  817. if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
  818. x->variance_low[i + 1] = 1;
  819. }
  820. } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
  821. for (i = 0; i < 2; i++) {
  822. if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
  823. x->variance_low[i + 3] = 1;
  824. }
  825. } else {
  826. for (i = 0; i < 4; i++) {
  827. const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
  828. const int idx_str =
  829. cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
  830. MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
  831. if (cm->mi_cols <= mi_col + idx[i][1] ||
  832. cm->mi_rows <= mi_row + idx[i][0])
  833. continue;
  834. if ((*this_mi)->sb_type == BLOCK_32X32) {
  835. int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
  836. cpi->sf.short_circuit_low_temp_var == 3)
  837. ? ((5 * thresholds[1]) >> 3)
  838. : (thresholds[1] >> 1);
  839. if (vt->split[i].part_variances.none.variance < threshold_32x32)
  840. x->variance_low[i + 5] = 1;
  841. } else if (cpi->sf.short_circuit_low_temp_var >= 2) {
  842. // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
  843. // inside.
  844. if ((*this_mi)->sb_type == BLOCK_16X16 ||
  845. (*this_mi)->sb_type == BLOCK_32X16 ||
  846. (*this_mi)->sb_type == BLOCK_16X32) {
  847. for (j = 0; j < 4; j++) {
  848. if (vt->split[i].split[j].part_variances.none.variance <
  849. (thresholds[2] >> 8))
  850. x->variance_low[(i << 2) + j + 9] = 1;
  851. }
  852. }
  853. }
  854. }
  855. }
  856. }
  857. }
  858. static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x,
  859. MACROBLOCKD *xd, BLOCK_SIZE bsize,
  860. int mi_row, int mi_col) {
  861. VP9_COMMON *const cm = &cpi->common;
  862. BLOCK_SIZE *prev_part = cpi->prev_partition;
  863. int start_pos = mi_row * cm->mi_stride + mi_col;
  864. const int bsl = b_width_log2_lookup[bsize];
  865. const int bs = (1 << bsl) >> 2;
  866. BLOCK_SIZE subsize;
  867. PARTITION_TYPE partition;
  868. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  869. partition = partition_lookup[bsl][prev_part[start_pos]];
  870. subsize = get_subsize(bsize, partition);
  871. if (subsize < BLOCK_8X8) {
  872. set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
  873. } else {
  874. switch (partition) {
  875. case PARTITION_NONE:
  876. set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
  877. break;
  878. case PARTITION_HORZ:
  879. set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
  880. set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize);
  881. break;
  882. case PARTITION_VERT:
  883. set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
  884. set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize);
  885. break;
  886. default:
  887. assert(partition == PARTITION_SPLIT);
  888. copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col);
  889. copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col);
  890. copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs);
  891. copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs);
  892. break;
  893. }
  894. }
  895. }
  896. static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
  897. int mi_row, int mi_col, int segment_id,
  898. int sb_offset) {
  899. int svc_copy_allowed = 1;
  900. int frames_since_key_thresh = 1;
  901. if (cpi->use_svc) {
  902. // For SVC, don't allow copy if base spatial layer is key frame, or if
  903. // frame is not a temporal enhancement layer frame.
  904. int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id,
  905. cpi->svc.number_temporal_layers);
  906. const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
  907. if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0;
  908. frames_since_key_thresh = cpi->svc.number_spatial_layers << 1;
  909. }
  910. if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed &&
  911. !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE &&
  912. cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE &&
  913. cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) {
  914. if (cpi->prev_partition != NULL) {
  915. copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col);
  916. cpi->copied_frame_cnt[sb_offset] += 1;
  917. memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]),
  918. sizeof(x->variance_low));
  919. return 1;
  920. }
  921. }
  922. return 0;
  923. }
  924. static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
  925. BLOCK_SIZE bsize, int mi_row, int mi_col,
  926. int mi_row_high, int mi_col_high) {
  927. VP9_COMMON *const cm = &cpi->common;
  928. SVC *const svc = &cpi->svc;
  929. BLOCK_SIZE *prev_part = svc->prev_partition_svc;
  930. // Variables with _high are for higher resolution.
  931. int bsize_high = 0;
  932. int subsize_high = 0;
  933. const int bsl_high = b_width_log2_lookup[bsize];
  934. const int bs_high = (1 << bsl_high) >> 2;
  935. const int has_rows = (mi_row_high + bs_high) < cm->mi_rows;
  936. const int has_cols = (mi_col_high + bs_high) < cm->mi_cols;
  937. const int row_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 1, 0,
  938. 1, 1, 0, 1, 1,
  939. 0, 1, 0 };
  940. const int col_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 2, 2,
  941. 0, 2, 2, 0, 2,
  942. 2, 0, 0 };
  943. int start_pos;
  944. BLOCK_SIZE bsize_low;
  945. PARTITION_TYPE partition_high;
  946. if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0;
  947. if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] ||
  948. mi_col >= svc->mi_cols[svc->spatial_layer_id - 1])
  949. return 0;
  950. // Find corresponding (mi_col/mi_row) block down-scaled by 2x2.
  951. start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col;
  952. bsize_low = prev_part[start_pos];
  953. // The block size is too big for boundaries. Do variance based partitioning.
  954. if ((!has_rows || !has_cols) && bsize_low > BLOCK_16X16) return 1;
  955. // For reference frames: return 1 (do variance-based partitioning) if the
  956. // superblock is not low source sad and lower-resoln bsize is below 32x32.
  957. if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad &&
  958. bsize_low < BLOCK_32X32)
  959. return 1;
  960. // Scale up block size by 2x2. Force 64x64 for size larger than 32x32.
  961. if (bsize_low < BLOCK_32X32) {
  962. bsize_high = bsize_low + 3;
  963. } else if (bsize_low >= BLOCK_32X32) {
  964. bsize_high = BLOCK_64X64;
  965. }
  966. // Scale up blocks on boundary.
  967. if (!has_cols && has_rows) {
  968. bsize_high = bsize_low + row_boundary_block_scale_factor[bsize_low];
  969. } else if (has_cols && !has_rows) {
  970. bsize_high = bsize_low + col_boundary_block_scale_factor[bsize_low];
  971. } else if (!has_cols && !has_rows) {
  972. bsize_high = bsize_low;
  973. }
  974. partition_high = partition_lookup[bsl_high][bsize_high];
  975. subsize_high = get_subsize(bsize, partition_high);
  976. if (subsize_high < BLOCK_8X8) {
  977. set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high);
  978. } else {
  979. const int bsl = b_width_log2_lookup[bsize];
  980. const int bs = (1 << bsl) >> 2;
  981. switch (partition_high) {
  982. case PARTITION_NONE:
  983. set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high);
  984. break;
  985. case PARTITION_HORZ:
  986. set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high);
  987. if (subsize_high < BLOCK_64X64)
  988. set_block_size(cpi, x, xd, mi_row_high + bs_high, mi_col_high,
  989. subsize_high);
  990. break;
  991. case PARTITION_VERT:
  992. set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high);
  993. if (subsize_high < BLOCK_64X64)
  994. set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high,
  995. subsize_high);
  996. break;
  997. default:
  998. assert(partition_high == PARTITION_SPLIT);
  999. if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col,
  1000. mi_row_high, mi_col_high))
  1001. return 1;
  1002. if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1),
  1003. mi_col, mi_row_high + bs_high, mi_col_high))
  1004. return 1;
  1005. if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row,
  1006. mi_col + (bs >> 1), mi_row_high,
  1007. mi_col_high + bs_high))
  1008. return 1;
  1009. if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1),
  1010. mi_col + (bs >> 1), mi_row_high + bs_high,
  1011. mi_col_high + bs_high))
  1012. return 1;
  1013. break;
  1014. }
  1015. }
  1016. return 0;
  1017. }
  1018. static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
  1019. int mi_col) {
  1020. VP9_COMMON *const cm = &cpi->common;
  1021. BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc;
  1022. int start_pos = mi_row * cm->mi_stride + mi_col;
  1023. const int bsl = b_width_log2_lookup[bsize];
  1024. const int bs = (1 << bsl) >> 2;
  1025. BLOCK_SIZE subsize;
  1026. PARTITION_TYPE partition;
  1027. const MODE_INFO *mi = NULL;
  1028. int xx, yy;
  1029. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  1030. mi = cm->mi_grid_visible[start_pos];
  1031. partition = partition_lookup[bsl][mi->sb_type];
  1032. subsize = get_subsize(bsize, partition);
  1033. if (subsize < BLOCK_8X8) {
  1034. prev_part[start_pos] = bsize;
  1035. } else {
  1036. switch (partition) {
  1037. case PARTITION_NONE:
  1038. prev_part[start_pos] = bsize;
  1039. if (bsize == BLOCK_64X64) {
  1040. for (xx = 0; xx < 8; xx += 4)
  1041. for (yy = 0; yy < 8; yy += 4) {
  1042. if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols))
  1043. prev_part[start_pos + xx * cm->mi_stride + yy] = bsize;
  1044. }
  1045. }
  1046. break;
  1047. case PARTITION_HORZ:
  1048. prev_part[start_pos] = subsize;
  1049. if (mi_row + bs < cm->mi_rows)
  1050. prev_part[start_pos + bs * cm->mi_stride] = subsize;
  1051. break;
  1052. case PARTITION_VERT:
  1053. prev_part[start_pos] = subsize;
  1054. if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
  1055. break;
  1056. default:
  1057. assert(partition == PARTITION_SPLIT);
  1058. update_partition_svc(cpi, subsize, mi_row, mi_col);
  1059. update_partition_svc(cpi, subsize, mi_row + bs, mi_col);
  1060. update_partition_svc(cpi, subsize, mi_row, mi_col + bs);
  1061. update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs);
  1062. break;
  1063. }
  1064. }
  1065. }
  1066. static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize,
  1067. int mi_row, int mi_col) {
  1068. VP9_COMMON *const cm = &cpi->common;
  1069. BLOCK_SIZE *prev_part = cpi->prev_partition;
  1070. int start_pos = mi_row * cm->mi_stride + mi_col;
  1071. const int bsl = b_width_log2_lookup[bsize];
  1072. const int bs = (1 << bsl) >> 2;
  1073. BLOCK_SIZE subsize;
  1074. PARTITION_TYPE partition;
  1075. const MODE_INFO *mi = NULL;
  1076. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  1077. mi = cm->mi_grid_visible[start_pos];
  1078. partition = partition_lookup[bsl][mi->sb_type];
  1079. subsize = get_subsize(bsize, partition);
  1080. if (subsize < BLOCK_8X8) {
  1081. prev_part[start_pos] = bsize;
  1082. } else {
  1083. switch (partition) {
  1084. case PARTITION_NONE: prev_part[start_pos] = bsize; break;
  1085. case PARTITION_HORZ:
  1086. prev_part[start_pos] = subsize;
  1087. if (mi_row + bs < cm->mi_rows)
  1088. prev_part[start_pos + bs * cm->mi_stride] = subsize;
  1089. break;
  1090. case PARTITION_VERT:
  1091. prev_part[start_pos] = subsize;
  1092. if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
  1093. break;
  1094. default:
  1095. assert(partition == PARTITION_SPLIT);
  1096. update_prev_partition_helper(cpi, subsize, mi_row, mi_col);
  1097. update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col);
  1098. update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs);
  1099. update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs);
  1100. break;
  1101. }
  1102. }
  1103. }
  1104. static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id,
  1105. int mi_row, int mi_col, int sb_offset) {
  1106. update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col);
  1107. cpi->prev_segment_id[sb_offset] = segment_id;
  1108. memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low,
  1109. sizeof(x->variance_low));
  1110. // Reset the counter for copy partitioning
  1111. cpi->copied_frame_cnt[sb_offset] = 0;
  1112. }
  1113. static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
  1114. unsigned int y_sad, int is_key_frame) {
  1115. int i;
  1116. MACROBLOCKD *xd = &x->e_mbd;
  1117. if (is_key_frame) return;
  1118. // For speed >= 8, avoid the chroma check if y_sad is above threshold.
  1119. if (cpi->oxcf.speed >= 8) {
  1120. if (y_sad > cpi->vbp_thresholds[1] &&
  1121. (!cpi->noise_estimate.enabled ||
  1122. vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium))
  1123. return;
  1124. }
  1125. for (i = 1; i <= 2; ++i) {
  1126. unsigned int uv_sad = UINT_MAX;
  1127. struct macroblock_plane *p = &x->plane[i];
  1128. struct macroblockd_plane *pd = &xd->plane[i];
  1129. const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  1130. if (bs != BLOCK_INVALID)
  1131. uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
  1132. pd->dst.stride);
  1133. // TODO(marpan): Investigate if we should lower this threshold if
  1134. // superblock is detected as skin.
  1135. x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
  1136. }
  1137. }
  1138. static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,
  1139. int sb_offset) {
  1140. unsigned int tmp_sse;
  1141. uint64_t tmp_sad;
  1142. unsigned int tmp_variance;
  1143. const BLOCK_SIZE bsize = BLOCK_64X64;
  1144. uint8_t *src_y = cpi->Source->y_buffer;
  1145. int src_ystride = cpi->Source->y_stride;
  1146. uint8_t *last_src_y = cpi->Last_Source->y_buffer;
  1147. int last_src_ystride = cpi->Last_Source->y_stride;
  1148. uint64_t avg_source_sad_threshold = 10000;
  1149. uint64_t avg_source_sad_threshold2 = 12000;
  1150. #if CONFIG_VP9_HIGHBITDEPTH
  1151. if (cpi->common.use_highbitdepth) return 0;
  1152. #endif
  1153. src_y += shift;
  1154. last_src_y += shift;
  1155. tmp_sad =
  1156. cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride);
  1157. tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y,
  1158. last_src_ystride, &tmp_sse);
  1159. // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
  1160. if (tmp_sad < avg_source_sad_threshold)
  1161. x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff
  1162. : kLowSadHighSumdiff;
  1163. else
  1164. x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff
  1165. : kHighSadHighSumdiff;
  1166. // Detect large lighting change.
  1167. if (cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
  1168. cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) &&
  1169. (tmp_sse - tmp_variance) > 10000)
  1170. x->content_state_sb = kLowVarHighSumdiff;
  1171. else if (tmp_sad > (avg_source_sad_threshold << 1))
  1172. x->content_state_sb = kVeryHighSad;
  1173. if (cpi->content_state_sb_fd != NULL) {
  1174. if (tmp_sad < avg_source_sad_threshold2) {
  1175. // Cap the increment to 255.
  1176. if (cpi->content_state_sb_fd[sb_offset] < 255)
  1177. cpi->content_state_sb_fd[sb_offset]++;
  1178. } else {
  1179. cpi->content_state_sb_fd[sb_offset] = 0;
  1180. }
  1181. }
  1182. if (tmp_sad == 0) x->zero_temp_sad_source = 1;
  1183. return tmp_sad;
  1184. }
  1185. // This function chooses partitioning based on the variance between source and
  1186. // reconstructed last, where variance is computed for down-sampled inputs.
  1187. static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
  1188. MACROBLOCK *x, int mi_row, int mi_col) {
  1189. VP9_COMMON *const cm = &cpi->common;
  1190. MACROBLOCKD *xd = &x->e_mbd;
  1191. int i, j, k, m;
  1192. v64x64 vt;
  1193. v16x16 *vt2 = NULL;
  1194. int force_split[21];
  1195. int avg_32x32;
  1196. int max_var_32x32 = 0;
  1197. int min_var_32x32 = INT_MAX;
  1198. int var_32x32;
  1199. int avg_16x16[4];
  1200. int maxvar_16x16[4];
  1201. int minvar_16x16[4];
  1202. int64_t threshold_4x4avg;
  1203. NOISE_LEVEL noise_level = kLow;
  1204. int content_state = 0;
  1205. uint8_t *s;
  1206. const uint8_t *d;
  1207. int sp;
  1208. int dp;
  1209. int compute_minmax_variance = 1;
  1210. unsigned int y_sad = UINT_MAX;
  1211. BLOCK_SIZE bsize = BLOCK_64X64;
  1212. // Ref frame used in partitioning.
  1213. MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
  1214. int pixels_wide = 64, pixels_high = 64;
  1215. int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
  1216. cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] };
  1217. int force_64_split = cpi->rc.high_source_sad ||
  1218. (cpi->use_svc && cpi->svc.high_source_sad_superframe) ||
  1219. (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
  1220. cpi->compute_source_sad_onepass &&
  1221. cpi->sf.use_source_sad && !x->zero_temp_sad_source);
  1222. // For the variance computation under SVC mode, we treat the frame as key if
  1223. // the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
  1224. int is_key_frame =
  1225. (frame_is_intra_only(cm) ||
  1226. (is_one_pass_cbr_svc(cpi) &&
  1227. cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
  1228. // Always use 4x4 partition for key frame.
  1229. const int use_4x4_partition = frame_is_intra_only(cm);
  1230. const int low_res = (cm->width <= 352 && cm->height <= 288);
  1231. int variance4x4downsample[16];
  1232. int segment_id;
  1233. int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3);
  1234. // For SVC: check if LAST frame is NULL or if the resolution of LAST is
  1235. // different than the current frame resolution, and if so, treat this frame
  1236. // as a key frame, for the purpose of the superblock partitioning.
  1237. // LAST == NULL can happen in some cases where enhancement spatial layers are
  1238. // enabled dyanmically in the stream and the only reference is the spatial
  1239. // reference (GOLDEN).
  1240. if (cpi->use_svc) {
  1241. const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME);
  1242. if (ref == NULL || ref->y_crop_height != cm->height ||
  1243. ref->y_crop_width != cm->width)
  1244. is_key_frame = 1;
  1245. }
  1246. set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
  1247. set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0);
  1248. segment_id = xd->mi[0]->segment_id;
  1249. if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame))
  1250. compute_minmax_variance = 0;
  1251. memset(x->variance_low, 0, sizeof(x->variance_low));
  1252. if (cpi->sf.use_source_sad && !is_key_frame) {
  1253. int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
  1254. content_state = x->content_state_sb;
  1255. x->skip_low_source_sad = (content_state == kLowSadLowSumdiff ||
  1256. content_state == kLowSadHighSumdiff)
  1257. ? 1
  1258. : 0;
  1259. x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0;
  1260. if (cpi->content_state_sb_fd != NULL)
  1261. x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];
  1262. // For SVC on top spatial layer: use/scale the partition from
  1263. // the lower spatial resolution if svc_use_lowres_part is enabled.
  1264. if (cpi->sf.svc_use_lowres_part &&
  1265. cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 &&
  1266. cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) {
  1267. if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1,
  1268. mi_col >> 1, mi_row, mi_col)) {
  1269. if (cpi->sf.copy_partition_flag) {
  1270. update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
  1271. }
  1272. return 0;
  1273. }
  1274. }
  1275. // If source_sad is low copy the partition without computing the y_sad.
  1276. if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
  1277. !force_64_split &&
  1278. copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
  1279. x->sb_use_mv_part = 1;
  1280. if (cpi->sf.svc_use_lowres_part &&
  1281. cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
  1282. update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
  1283. return 0;
  1284. }
  1285. }
  1286. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
  1287. cyclic_refresh_segment_id_boosted(segment_id)) {
  1288. int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
  1289. set_vbp_thresholds(cpi, thresholds, q, content_state);
  1290. } else {
  1291. set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
  1292. }
  1293. // Decrease 32x32 split threshold for screen on base layer, for scene
  1294. // change/high motion frames.
  1295. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
  1296. cpi->svc.spatial_layer_id == 0 && force_64_split)
  1297. thresholds[1] = 3 * thresholds[1] >> 2;
  1298. // For non keyframes, disable 4x4 average for low resolution when speed = 8
  1299. threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX;
  1300. if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
  1301. if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
  1302. s = x->plane[0].src.buf;
  1303. sp = x->plane[0].src.stride;
  1304. // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
  1305. // 5-20 for the 16x16 blocks.
  1306. force_split[0] = force_64_split;
  1307. if (!is_key_frame) {
  1308. // In the case of spatial/temporal scalable coding, the assumption here is
  1309. // that the temporal reference frame will always be of type LAST_FRAME.
  1310. // TODO(marpan): If that assumption is broken, we need to revisit this code.
  1311. MODE_INFO *mi = xd->mi[0];
  1312. YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
  1313. const YV12_BUFFER_CONFIG *yv12_g = NULL;
  1314. unsigned int y_sad_g, y_sad_thr, y_sad_last;
  1315. bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
  1316. (mi_row + 4 < cm->mi_rows);
  1317. assert(yv12 != NULL);
  1318. if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
  1319. cpi->svc.use_gf_temporal_ref_current_layer) {
  1320. // For now, GOLDEN will not be used for non-zero spatial layers, since
  1321. // it may not be a temporal reference.
  1322. yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
  1323. }
  1324. // Only compute y_sad_g (sad for golden reference) for speed < 8.
  1325. if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
  1326. (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
  1327. vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
  1328. &cm->frame_refs[GOLDEN_FRAME - 1].sf);
  1329. y_sad_g = cpi->fn_ptr[bsize].sdf(
  1330. x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
  1331. xd->plane[0].pre[0].stride);
  1332. } else {
  1333. y_sad_g = UINT_MAX;
  1334. }
  1335. if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
  1336. cpi->rc.is_src_frame_alt_ref) {
  1337. yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
  1338. vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
  1339. &cm->frame_refs[ALTREF_FRAME - 1].sf);
  1340. mi->ref_frame[0] = ALTREF_FRAME;
  1341. y_sad_g = UINT_MAX;
  1342. } else {
  1343. vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
  1344. &cm->frame_refs[LAST_FRAME - 1].sf);
  1345. mi->ref_frame[0] = LAST_FRAME;
  1346. }
  1347. mi->ref_frame[1] = NONE;
  1348. mi->sb_type = BLOCK_64X64;
  1349. mi->mv[0].as_int = 0;
  1350. mi->interp_filter = BILINEAR;
  1351. if (cpi->oxcf.speed >= 8 && !low_res &&
  1352. x->content_state_sb != kVeryHighSad) {
  1353. y_sad = cpi->fn_ptr[bsize].sdf(
  1354. x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
  1355. xd->plane[0].pre[0].stride);
  1356. } else {
  1357. const MV dummy_mv = { 0, 0 };
  1358. y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
  1359. &dummy_mv);
  1360. x->sb_use_mv_part = 1;
  1361. x->sb_mvcol_part = mi->mv[0].as_mv.col;
  1362. x->sb_mvrow_part = mi->mv[0].as_mv.row;
  1363. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
  1364. cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode &&
  1365. cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source &&
  1366. cm->width > 640 && cm->height > 480) {
  1367. // Disable split below 16x16 block size when scroll motion (horz or
  1368. // vert) is detected.
  1369. // TODO(marpan/jianj): Improve this condition: issue is that search
  1370. // range is hard-coded/limited in vp9_int_pro_motion_estimation() so
  1371. // scroll motion may not be detected here.
  1372. if (((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) ||
  1373. (abs(x->sb_mvcol_part) >= 48 && abs(x->sb_mvrow_part) <= 8)) &&
  1374. y_sad < 100000) {
  1375. compute_minmax_variance = 0;
  1376. thresholds[2] = INT64_MAX;
  1377. }
  1378. }
  1379. }
  1380. y_sad_last = y_sad;
  1381. // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
  1382. // are close if short_circuit_low_temp_var is on.
  1383. y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
  1384. if (y_sad_g < y_sad_thr) {
  1385. vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
  1386. &cm->frame_refs[GOLDEN_FRAME - 1].sf);
  1387. mi->ref_frame[0] = GOLDEN_FRAME;
  1388. mi->mv[0].as_int = 0;
  1389. y_sad = y_sad_g;
  1390. ref_frame_partition = GOLDEN_FRAME;
  1391. } else {
  1392. x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
  1393. ref_frame_partition = LAST_FRAME;
  1394. }
  1395. set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
  1396. vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
  1397. if (cpi->use_skin_detection)
  1398. x->sb_is_skin =
  1399. skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split);
  1400. d = xd->plane[0].dst.buf;
  1401. dp = xd->plane[0].dst.stride;
  1402. // If the y_sad is very small, take 64x64 as partition and exit.
  1403. // Don't check on boosted segment for now, as 64x64 is suppressed there.
  1404. if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
  1405. const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64];
  1406. const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
  1407. if (mi_col + block_width / 2 < cm->mi_cols &&
  1408. mi_row + block_height / 2 < cm->mi_rows) {
  1409. set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
  1410. x->variance_low[0] = 1;
  1411. chroma_check(cpi, x, bsize, y_sad, is_key_frame);
  1412. if (cpi->sf.svc_use_lowres_part &&
  1413. cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
  1414. update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
  1415. if (cpi->sf.copy_partition_flag) {
  1416. update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
  1417. }
  1418. return 0;
  1419. }
  1420. }
  1421. // If the y_sad is small enough, copy the partition of the superblock in the
  1422. // last frame to current frame only if the last frame is not a keyframe.
  1423. // Stop the copy every cpi->max_copied_frame to refresh the partition.
  1424. // TODO(jianj) : tune the threshold.
  1425. if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy &&
  1426. copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
  1427. chroma_check(cpi, x, bsize, y_sad, is_key_frame);
  1428. if (cpi->sf.svc_use_lowres_part &&
  1429. cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
  1430. update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
  1431. return 0;
  1432. }
  1433. } else {
  1434. d = VP9_VAR_OFFS;
  1435. dp = 0;
  1436. #if CONFIG_VP9_HIGHBITDEPTH
  1437. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  1438. switch (xd->bd) {
  1439. case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break;
  1440. case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break;
  1441. case 8:
  1442. default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break;
  1443. }
  1444. }
  1445. #endif // CONFIG_VP9_HIGHBITDEPTH
  1446. }
  1447. if (low_res && threshold_4x4avg < INT64_MAX)
  1448. CHECK_MEM_ERROR(cm, vt2, vpx_calloc(16, sizeof(*vt2)));
  1449. // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
  1450. // for splits.
  1451. for (i = 0; i < 4; i++) {
  1452. const int x32_idx = ((i & 1) << 5);
  1453. const int y32_idx = ((i >> 1) << 5);
  1454. const int i2 = i << 2;
  1455. force_split[i + 1] = 0;
  1456. avg_16x16[i] = 0;
  1457. maxvar_16x16[i] = 0;
  1458. minvar_16x16[i] = INT_MAX;
  1459. for (j = 0; j < 4; j++) {
  1460. const int x16_idx = x32_idx + ((j & 1) << 4);
  1461. const int y16_idx = y32_idx + ((j >> 1) << 4);
  1462. const int split_index = 5 + i2 + j;
  1463. v16x16 *vst = &vt.split[i].split[j];
  1464. force_split[split_index] = 0;
  1465. variance4x4downsample[i2 + j] = 0;
  1466. if (!is_key_frame) {
  1467. fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
  1468. #if CONFIG_VP9_HIGHBITDEPTH
  1469. xd->cur_buf->flags,
  1470. #endif
  1471. pixels_wide, pixels_high, is_key_frame);
  1472. fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
  1473. get_variance(&vt.split[i].split[j].part_variances.none);
  1474. avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
  1475. if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i])
  1476. minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
  1477. if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i])
  1478. maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
  1479. if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) {
  1480. // 16X16 variance is above threshold for split, so force split to 8x8
  1481. // for this 16x16 block (this also forces splits for upper levels).
  1482. force_split[split_index] = 1;
  1483. force_split[i + 1] = 1;
  1484. force_split[0] = 1;
  1485. } else if (compute_minmax_variance &&
  1486. vt.split[i].split[j].part_variances.none.variance >
  1487. thresholds[1] &&
  1488. !cyclic_refresh_segment_id_boosted(segment_id)) {
  1489. // We have some nominal amount of 16x16 variance (based on average),
  1490. // compute the minmax over the 8x8 sub-blocks, and if above threshold,
  1491. // force split to 8x8 block for this 16x16 block.
  1492. int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
  1493. #if CONFIG_VP9_HIGHBITDEPTH
  1494. xd->cur_buf->flags,
  1495. #endif
  1496. pixels_wide, pixels_high);
  1497. int thresh_minmax = (int)cpi->vbp_threshold_minmax;
  1498. if (x->content_state_sb == kVeryHighSad)
  1499. thresh_minmax = thresh_minmax << 1;
  1500. if (minmax > thresh_minmax) {
  1501. force_split[split_index] = 1;
  1502. force_split[i + 1] = 1;
  1503. force_split[0] = 1;
  1504. }
  1505. }
  1506. }
  1507. if (is_key_frame ||
  1508. (low_res && vt.split[i].split[j].part_variances.none.variance >
  1509. threshold_4x4avg)) {
  1510. force_split[split_index] = 0;
  1511. // Go down to 4x4 down-sampling for variance.
  1512. variance4x4downsample[i2 + j] = 1;
  1513. for (k = 0; k < 4; k++) {
  1514. int x8_idx = x16_idx + ((k & 1) << 3);
  1515. int y8_idx = y16_idx + ((k >> 1) << 3);
  1516. v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
  1517. fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
  1518. #if CONFIG_VP9_HIGHBITDEPTH
  1519. xd->cur_buf->flags,
  1520. #endif
  1521. pixels_wide, pixels_high, is_key_frame);
  1522. }
  1523. }
  1524. }
  1525. }
  1526. if (cpi->noise_estimate.enabled)
  1527. noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate);
  1528. // Fill the rest of the variance tree by summing split partition values.
  1529. avg_32x32 = 0;
  1530. for (i = 0; i < 4; i++) {
  1531. const int i2 = i << 2;
  1532. for (j = 0; j < 4; j++) {
  1533. if (variance4x4downsample[i2 + j] == 1) {
  1534. v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j];
  1535. for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
  1536. fill_variance_tree(vtemp, BLOCK_16X16);
  1537. // If variance of this 16x16 block is above the threshold, force block
  1538. // to split. This also forces a split on the upper levels.
  1539. get_variance(&vtemp->part_variances.none);
  1540. if (vtemp->part_variances.none.variance > thresholds[2]) {
  1541. force_split[5 + i2 + j] = 1;
  1542. force_split[i + 1] = 1;
  1543. force_split[0] = 1;
  1544. }
  1545. }
  1546. }
  1547. fill_variance_tree(&vt.split[i], BLOCK_32X32);
  1548. // If variance of this 32x32 block is above the threshold, or if its above
  1549. // (some threshold of) the average variance over the sub-16x16 blocks, then
  1550. // force this block to split. This also forces a split on the upper
  1551. // (64x64) level.
  1552. if (!force_split[i + 1]) {
  1553. get_variance(&vt.split[i].part_variances.none);
  1554. var_32x32 = vt.split[i].part_variances.none.variance;
  1555. max_var_32x32 = VPXMAX(var_32x32, max_var_32x32);
  1556. min_var_32x32 = VPXMIN(var_32x32, min_var_32x32);
  1557. if (vt.split[i].part_variances.none.variance > thresholds[1] ||
  1558. (!is_key_frame &&
  1559. vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
  1560. vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
  1561. force_split[i + 1] = 1;
  1562. force_split[0] = 1;
  1563. } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 &&
  1564. (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) &&
  1565. maxvar_16x16[i] > thresholds[1]) {
  1566. force_split[i + 1] = 1;
  1567. force_split[0] = 1;
  1568. }
  1569. avg_32x32 += var_32x32;
  1570. }
  1571. }
  1572. if (!force_split[0]) {
  1573. fill_variance_tree(&vt, BLOCK_64X64);
  1574. get_variance(&vt.part_variances.none);
  1575. // If variance of this 64x64 block is above (some threshold of) the average
  1576. // variance over the sub-32x32 blocks, then force this block to split.
  1577. // Only checking this for noise level >= medium for now.
  1578. if (!is_key_frame && noise_level >= kMedium &&
  1579. vt.part_variances.none.variance > (9 * avg_32x32) >> 5)
  1580. force_split[0] = 1;
  1581. // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in
  1582. // a 64x64 block is greater than threshold and the maximum 32x32 variance is
  1583. // above a miniumum threshold, then force the split of a 64x64 block
  1584. // Only check this for low noise.
  1585. else if (!is_key_frame && noise_level < kMedium &&
  1586. (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) &&
  1587. max_var_32x32 > thresholds[0] >> 1)
  1588. force_split[0] = 1;
  1589. }
  1590. // Now go through the entire structure, splitting every block size until
  1591. // we get to one that's got a variance lower than our threshold.
  1592. if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
  1593. !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col,
  1594. thresholds[0], BLOCK_16X16, force_split[0])) {
  1595. for (i = 0; i < 4; ++i) {
  1596. const int x32_idx = ((i & 1) << 2);
  1597. const int y32_idx = ((i >> 1) << 2);
  1598. const int i2 = i << 2;
  1599. if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32,
  1600. (mi_row + y32_idx), (mi_col + x32_idx),
  1601. thresholds[1], BLOCK_16X16,
  1602. force_split[i + 1])) {
  1603. for (j = 0; j < 4; ++j) {
  1604. const int x16_idx = ((j & 1) << 1);
  1605. const int y16_idx = ((j >> 1) << 1);
  1606. // For inter frames: if variance4x4downsample[] == 1 for this 16x16
  1607. // block, then the variance is based on 4x4 down-sampling, so use vt2
  1608. // in set_vt_partioning(), otherwise use vt.
  1609. v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1)
  1610. ? &vt2[i2 + j]
  1611. : &vt.split[i].split[j];
  1612. if (!set_vt_partitioning(
  1613. cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx,
  1614. mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min,
  1615. force_split[5 + i2 + j])) {
  1616. for (k = 0; k < 4; ++k) {
  1617. const int x8_idx = (k & 1);
  1618. const int y8_idx = (k >> 1);
  1619. if (use_4x4_partition) {
  1620. if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k],
  1621. BLOCK_8X8,
  1622. mi_row + y32_idx + y16_idx + y8_idx,
  1623. mi_col + x32_idx + x16_idx + x8_idx,
  1624. thresholds[3], BLOCK_8X8, 0)) {
  1625. set_block_size(
  1626. cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
  1627. (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4);
  1628. }
  1629. } else {
  1630. set_block_size(
  1631. cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
  1632. (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8);
  1633. }
  1634. }
  1635. }
  1636. }
  1637. }
  1638. }
  1639. }
  1640. if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) {
  1641. update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
  1642. }
  1643. if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part &&
  1644. cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
  1645. update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
  1646. if (cpi->sf.short_circuit_low_temp_var) {
  1647. set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition,
  1648. mi_col, mi_row);
  1649. }
  1650. chroma_check(cpi, x, bsize, y_sad, is_key_frame);
  1651. if (vt2) vpx_free(vt2);
  1652. return 0;
  1653. }
  1654. #if !CONFIG_REALTIME_ONLY
  1655. static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
  1656. int mi_row, int mi_col, BLOCK_SIZE bsize,
  1657. int output_enabled) {
  1658. int i, x_idx, y;
  1659. VP9_COMMON *const cm = &cpi->common;
  1660. RD_COUNTS *const rdc = &td->rd_counts;
  1661. MACROBLOCK *const x = &td->mb;
  1662. MACROBLOCKD *const xd = &x->e_mbd;
  1663. struct macroblock_plane *const p = x->plane;
  1664. struct macroblockd_plane *const pd = xd->plane;
  1665. MODE_INFO *mi = &ctx->mic;
  1666. MODE_INFO *const xdmi = xd->mi[0];
  1667. MODE_INFO *mi_addr = xd->mi[0];
  1668. const struct segmentation *const seg = &cm->seg;
  1669. const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
  1670. const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
  1671. const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
  1672. const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
  1673. MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
  1674. int w, h;
  1675. const int mis = cm->mi_stride;
  1676. const int mi_width = num_8x8_blocks_wide_lookup[bsize];
  1677. const int mi_height = num_8x8_blocks_high_lookup[bsize];
  1678. int max_plane;
  1679. assert(mi->sb_type == bsize);
  1680. *mi_addr = *mi;
  1681. *x->mbmi_ext = ctx->mbmi_ext;
  1682. // If segmentation in use
  1683. if (seg->enabled) {
  1684. // For in frame complexity AQ copy the segment id from the segment map.
  1685. if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
  1686. const uint8_t *const map =
  1687. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  1688. mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  1689. }
  1690. // Else for cyclic refresh mode update the segment map, set the segment id
  1691. // and then update the quantizer.
  1692. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
  1693. vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize,
  1694. ctx->rate, ctx->dist, x->skip, p);
  1695. }
  1696. }
  1697. max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1;
  1698. for (i = 0; i < max_plane; ++i) {
  1699. p[i].coeff = ctx->coeff_pbuf[i][1];
  1700. p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
  1701. pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
  1702. p[i].eobs = ctx->eobs_pbuf[i][1];
  1703. }
  1704. for (i = max_plane; i < MAX_MB_PLANE; ++i) {
  1705. p[i].coeff = ctx->coeff_pbuf[i][2];
  1706. p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
  1707. pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
  1708. p[i].eobs = ctx->eobs_pbuf[i][2];
  1709. }
  1710. // Restore the coding context of the MB to that that was in place
  1711. // when the mode was picked for it
  1712. for (y = 0; y < mi_height; y++)
  1713. for (x_idx = 0; x_idx < mi_width; x_idx++)
  1714. if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
  1715. (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
  1716. xd->mi[x_idx + y * mis] = mi_addr;
  1717. }
  1718. if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x);
  1719. if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) {
  1720. xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
  1721. xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
  1722. }
  1723. x->skip = ctx->skip;
  1724. memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk,
  1725. sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
  1726. if (!output_enabled) return;
  1727. #if CONFIG_INTERNAL_STATS
  1728. if (frame_is_intra_only(cm)) {
  1729. static const int kf_mode_index[] = {
  1730. THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/,
  1731. THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/,
  1732. THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
  1733. THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
  1734. THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/,
  1735. };
  1736. ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]];
  1737. } else {
  1738. // Note how often each mode chosen as best
  1739. ++cpi->mode_chosen_counts[ctx->best_mode_index];
  1740. }
  1741. #endif
  1742. if (!frame_is_intra_only(cm)) {
  1743. if (is_inter_block(xdmi)) {
  1744. vp9_update_mv_count(td);
  1745. if (cm->interp_filter == SWITCHABLE) {
  1746. const int ctx = get_pred_context_switchable_interp(xd);
  1747. ++td->counts->switchable_interp[ctx][xdmi->interp_filter];
  1748. }
  1749. }
  1750. rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
  1751. rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
  1752. rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
  1753. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  1754. rdc->filter_diff[i] += ctx->best_filter_diff[i];
  1755. }
  1756. for (h = 0; h < y_mis; ++h) {
  1757. MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
  1758. for (w = 0; w < x_mis; ++w) {
  1759. MV_REF *const mv = frame_mv + w;
  1760. mv->ref_frame[0] = mi->ref_frame[0];
  1761. mv->ref_frame[1] = mi->ref_frame[1];
  1762. mv->mv[0].as_int = mi->mv[0].as_int;
  1763. mv->mv[1].as_int = mi->mv[1].as_int;
  1764. }
  1765. }
  1766. }
  1767. #endif // !CONFIG_REALTIME_ONLY
  1768. void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
  1769. int mi_row, int mi_col) {
  1770. uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer };
  1771. const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
  1772. int i;
  1773. // Set current frame pointer.
  1774. x->e_mbd.cur_buf = src;
  1775. for (i = 0; i < MAX_MB_PLANE; i++)
  1776. setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
  1777. NULL, x->e_mbd.plane[i].subsampling_x,
  1778. x->e_mbd.plane[i].subsampling_y);
  1779. }
  1780. static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
  1781. RD_COST *rd_cost, BLOCK_SIZE bsize) {
  1782. MACROBLOCKD *const xd = &x->e_mbd;
  1783. MODE_INFO *const mi = xd->mi[0];
  1784. INTERP_FILTER filter_ref;
  1785. filter_ref = get_pred_context_switchable_interp(xd);
  1786. if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP;
  1787. mi->sb_type = bsize;
  1788. mi->mode = ZEROMV;
  1789. mi->tx_size =
  1790. VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
  1791. mi->skip = 1;
  1792. mi->uv_mode = DC_PRED;
  1793. mi->ref_frame[0] = LAST_FRAME;
  1794. mi->ref_frame[1] = NONE;
  1795. mi->mv[0].as_int = 0;
  1796. mi->interp_filter = filter_ref;
  1797. xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
  1798. x->skip = 1;
  1799. vp9_rd_cost_init(rd_cost);
  1800. }
  1801. #if !CONFIG_REALTIME_ONLY
  1802. static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
  1803. int mi_row, int mi_col, BLOCK_SIZE bsize,
  1804. AQ_MODE aq_mode) {
  1805. VP9_COMMON *const cm = &cpi->common;
  1806. const VP9EncoderConfig *const oxcf = &cpi->oxcf;
  1807. const uint8_t *const map =
  1808. cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  1809. vp9_init_plane_quantizers(cpi, x);
  1810. vpx_clear_system_state();
  1811. if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) {
  1812. if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
  1813. } else if (aq_mode == PERCEPTUAL_AQ) {
  1814. x->rdmult = x->cb_rdmult;
  1815. } else if (aq_mode == CYCLIC_REFRESH_AQ) {
  1816. // If segment is boosted, use rdmult for that segment.
  1817. if (cyclic_refresh_segment_id_boosted(
  1818. get_segment_id(cm, map, bsize, mi_row, mi_col)))
  1819. x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
  1820. } else {
  1821. x->rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
  1822. }
  1823. if (oxcf->tuning == VP8_TUNE_SSIM) {
  1824. set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
  1825. }
  1826. }
  1827. static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
  1828. MACROBLOCK *const x, int mi_row, int mi_col,
  1829. RD_COST *rd_cost, BLOCK_SIZE bsize,
  1830. PICK_MODE_CONTEXT *ctx, int rate_in_best_rd,
  1831. int64_t dist_in_best_rd) {
  1832. VP9_COMMON *const cm = &cpi->common;
  1833. TileInfo *const tile_info = &tile_data->tile_info;
  1834. MACROBLOCKD *const xd = &x->e_mbd;
  1835. MODE_INFO *mi;
  1836. struct macroblock_plane *const p = x->plane;
  1837. struct macroblockd_plane *const pd = xd->plane;
  1838. const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
  1839. int i, orig_rdmult;
  1840. int64_t best_rd = INT64_MAX;
  1841. vpx_clear_system_state();
  1842. // Use the lower precision, but faster, 32x32 fdct for mode selection.
  1843. x->use_lp32x32fdct = 1;
  1844. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  1845. mi = xd->mi[0];
  1846. mi->sb_type = bsize;
  1847. for (i = 0; i < MAX_MB_PLANE; ++i) {
  1848. p[i].coeff = ctx->coeff_pbuf[i][0];
  1849. p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
  1850. pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
  1851. p[i].eobs = ctx->eobs_pbuf[i][0];
  1852. }
  1853. ctx->is_coded = 0;
  1854. ctx->skippable = 0;
  1855. ctx->pred_pixel_ready = 0;
  1856. x->skip_recode = 0;
  1857. // Set to zero to make sure we do not use the previous encoded frame stats
  1858. mi->skip = 0;
  1859. #if CONFIG_VP9_HIGHBITDEPTH
  1860. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  1861. x->source_variance = vp9_high_get_sby_perpixel_variance(
  1862. cpi, &x->plane[0].src, bsize, xd->bd);
  1863. } else {
  1864. x->source_variance =
  1865. vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
  1866. }
  1867. #else
  1868. x->source_variance =
  1869. vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
  1870. #endif // CONFIG_VP9_HIGHBITDEPTH
  1871. // Save rdmult before it might be changed, so it can be restored later.
  1872. orig_rdmult = x->rdmult;
  1873. if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) {
  1874. double logvar = vp9_log_block_var(cpi, x, bsize);
  1875. // Check block complexity as part of descision on using pixel or transform
  1876. // domain distortion in rd tests.
  1877. x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion &&
  1878. (logvar >= cpi->sf.tx_domain_thresh);
  1879. // Check block complexity as part of descision on using quantized
  1880. // coefficient optimisation inside the rd loop.
  1881. x->block_qcoeff_opt =
  1882. cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh);
  1883. } else {
  1884. x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion;
  1885. x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
  1886. }
  1887. set_segment_index(cpi, x, mi_row, mi_col, bsize, 0);
  1888. set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode);
  1889. if (rate_in_best_rd < INT_MAX && dist_in_best_rd < INT64_MAX) {
  1890. best_rd = vp9_calculate_rd_cost(x->rdmult, x->rddiv, rate_in_best_rd,
  1891. dist_in_best_rd);
  1892. }
  1893. // Find best coding mode & reconstruct the MB so it is available
  1894. // as a predictor for MBs that follow in the SB
  1895. if (frame_is_intra_only(cm)) {
  1896. vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
  1897. } else {
  1898. if (bsize >= BLOCK_8X8) {
  1899. if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
  1900. vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
  1901. ctx, best_rd);
  1902. else
  1903. vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
  1904. bsize, ctx, best_rd);
  1905. } else {
  1906. vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
  1907. bsize, ctx, best_rd);
  1908. }
  1909. }
  1910. // Examine the resulting rate and for AQ mode 2 make a segment choice.
  1911. if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
  1912. (bsize >= BLOCK_16X16) &&
  1913. (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
  1914. (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
  1915. vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
  1916. }
  1917. // TODO(jingning) The rate-distortion optimization flow needs to be
  1918. // refactored to provide proper exit/return handle.
  1919. if (rd_cost->rate == INT_MAX || rd_cost->dist == INT64_MAX)
  1920. rd_cost->rdcost = INT64_MAX;
  1921. else
  1922. rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
  1923. x->rdmult = orig_rdmult;
  1924. ctx->rate = rd_cost->rate;
  1925. ctx->dist = rd_cost->dist;
  1926. }
  1927. #endif // !CONFIG_REALTIME_ONLY
  1928. static void update_stats(VP9_COMMON *cm, ThreadData *td) {
  1929. const MACROBLOCK *x = &td->mb;
  1930. const MACROBLOCKD *const xd = &x->e_mbd;
  1931. const MODE_INFO *const mi = xd->mi[0];
  1932. const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  1933. const BLOCK_SIZE bsize = mi->sb_type;
  1934. if (!frame_is_intra_only(cm)) {
  1935. FRAME_COUNTS *const counts = td->counts;
  1936. const int inter_block = is_inter_block(mi);
  1937. const int seg_ref_active =
  1938. segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME);
  1939. if (!seg_ref_active) {
  1940. counts->intra_inter[get_intra_inter_context(xd)][inter_block]++;
  1941. // If the segment reference feature is enabled we have only a single
  1942. // reference frame allowed for the segment so exclude it from
  1943. // the reference frame counts used to work out probabilities.
  1944. if (inter_block) {
  1945. const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
  1946. if (cm->reference_mode == REFERENCE_MODE_SELECT)
  1947. counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
  1948. [has_second_ref(mi)]++;
  1949. if (has_second_ref(mi)) {
  1950. const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
  1951. const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
  1952. const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1];
  1953. counts->comp_ref[ctx][bit]++;
  1954. } else {
  1955. counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
  1956. [ref0 != LAST_FRAME]++;
  1957. if (ref0 != LAST_FRAME)
  1958. counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
  1959. [ref0 != GOLDEN_FRAME]++;
  1960. }
  1961. }
  1962. }
  1963. if (inter_block &&
  1964. !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) {
  1965. const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
  1966. if (bsize >= BLOCK_8X8) {
  1967. const PREDICTION_MODE mode = mi->mode;
  1968. ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
  1969. } else {
  1970. const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  1971. const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  1972. int idx, idy;
  1973. for (idy = 0; idy < 2; idy += num_4x4_h) {
  1974. for (idx = 0; idx < 2; idx += num_4x4_w) {
  1975. const int j = idy * 2 + idx;
  1976. const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
  1977. ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
  1978. }
  1979. }
  1980. }
  1981. }
  1982. }
  1983. }
  1984. #if !CONFIG_REALTIME_ONLY
  1985. static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
  1986. ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
  1987. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
  1988. PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
  1989. BLOCK_SIZE bsize) {
  1990. MACROBLOCKD *const xd = &x->e_mbd;
  1991. int p;
  1992. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1993. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1994. int mi_width = num_8x8_blocks_wide_lookup[bsize];
  1995. int mi_height = num_8x8_blocks_high_lookup[bsize];
  1996. for (p = 0; p < MAX_MB_PLANE; p++) {
  1997. memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
  1998. a + num_4x4_blocks_wide * p,
  1999. (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
  2000. xd->plane[p].subsampling_x);
  2001. memcpy(xd->left_context[p] +
  2002. ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
  2003. l + num_4x4_blocks_high * p,
  2004. (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
  2005. xd->plane[p].subsampling_y);
  2006. }
  2007. memcpy(xd->above_seg_context + mi_col, sa,
  2008. sizeof(*xd->above_seg_context) * mi_width);
  2009. memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
  2010. sizeof(xd->left_seg_context[0]) * mi_height);
  2011. }
  2012. static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
  2013. ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
  2014. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
  2015. PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
  2016. BLOCK_SIZE bsize) {
  2017. const MACROBLOCKD *const xd = &x->e_mbd;
  2018. int p;
  2019. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  2020. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  2021. int mi_width = num_8x8_blocks_wide_lookup[bsize];
  2022. int mi_height = num_8x8_blocks_high_lookup[bsize];
  2023. // buffer the above/left context information of the block in search.
  2024. for (p = 0; p < MAX_MB_PLANE; ++p) {
  2025. memcpy(a + num_4x4_blocks_wide * p,
  2026. xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
  2027. (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
  2028. xd->plane[p].subsampling_x);
  2029. memcpy(l + num_4x4_blocks_high * p,
  2030. xd->left_context[p] +
  2031. ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
  2032. (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
  2033. xd->plane[p].subsampling_y);
  2034. }
  2035. memcpy(sa, xd->above_seg_context + mi_col,
  2036. sizeof(*xd->above_seg_context) * mi_width);
  2037. memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
  2038. sizeof(xd->left_seg_context[0]) * mi_height);
  2039. }
  2040. static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
  2041. TOKENEXTRA **tp, int mi_row, int mi_col,
  2042. int output_enabled, BLOCK_SIZE bsize,
  2043. PICK_MODE_CONTEXT *ctx) {
  2044. MACROBLOCK *const x = &td->mb;
  2045. set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
  2046. if (cpi->sf.enable_tpl_model &&
  2047. (cpi->oxcf.aq_mode == NO_AQ || cpi->oxcf.aq_mode == PERCEPTUAL_AQ)) {
  2048. const VP9EncoderConfig *const oxcf = &cpi->oxcf;
  2049. x->rdmult = x->cb_rdmult;
  2050. if (oxcf->tuning == VP8_TUNE_SSIM) {
  2051. set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
  2052. }
  2053. }
  2054. update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
  2055. encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
  2056. if (output_enabled) {
  2057. update_stats(&cpi->common, td);
  2058. (*tp)->token = EOSB_TOKEN;
  2059. (*tp)++;
  2060. }
  2061. }
  2062. static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
  2063. TOKENEXTRA **tp, int mi_row, int mi_col,
  2064. int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
  2065. VP9_COMMON *const cm = &cpi->common;
  2066. MACROBLOCK *const x = &td->mb;
  2067. MACROBLOCKD *const xd = &x->e_mbd;
  2068. const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  2069. int ctx;
  2070. PARTITION_TYPE partition;
  2071. BLOCK_SIZE subsize = bsize;
  2072. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  2073. if (bsize >= BLOCK_8X8) {
  2074. ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
  2075. subsize = get_subsize(bsize, pc_tree->partitioning);
  2076. } else {
  2077. ctx = 0;
  2078. subsize = BLOCK_4X4;
  2079. }
  2080. partition = partition_lookup[bsl][subsize];
  2081. if (output_enabled && bsize != BLOCK_4X4)
  2082. td->counts->partition[ctx][partition]++;
  2083. switch (partition) {
  2084. case PARTITION_NONE:
  2085. encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
  2086. &pc_tree->none);
  2087. break;
  2088. case PARTITION_VERT:
  2089. encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
  2090. &pc_tree->vertical[0]);
  2091. if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
  2092. encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
  2093. subsize, &pc_tree->vertical[1]);
  2094. }
  2095. break;
  2096. case PARTITION_HORZ:
  2097. encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
  2098. &pc_tree->horizontal[0]);
  2099. if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
  2100. encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
  2101. subsize, &pc_tree->horizontal[1]);
  2102. }
  2103. break;
  2104. default:
  2105. assert(partition == PARTITION_SPLIT);
  2106. if (bsize == BLOCK_8X8) {
  2107. encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
  2108. pc_tree->leaf_split[0]);
  2109. } else {
  2110. encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2111. pc_tree->split[0]);
  2112. encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
  2113. subsize, pc_tree->split[1]);
  2114. encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
  2115. subsize, pc_tree->split[2]);
  2116. encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
  2117. subsize, pc_tree->split[3]);
  2118. }
  2119. break;
  2120. }
  2121. if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
  2122. update_partition_context(xd, mi_row, mi_col, subsize, bsize);
  2123. }
  2124. #endif // !CONFIG_REALTIME_ONLY
  2125. // Check to see if the given partition size is allowed for a specified number
  2126. // of 8x8 block rows and columns remaining in the image.
  2127. // If not then return the largest allowed partition size
  2128. static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
  2129. int cols_left, int *bh, int *bw) {
  2130. if (rows_left <= 0 || cols_left <= 0) {
  2131. return VPXMIN(bsize, BLOCK_8X8);
  2132. } else {
  2133. for (; bsize > 0; bsize -= 3) {
  2134. *bh = num_8x8_blocks_high_lookup[bsize];
  2135. *bw = num_8x8_blocks_wide_lookup[bsize];
  2136. if ((*bh <= rows_left) && (*bw <= cols_left)) {
  2137. break;
  2138. }
  2139. }
  2140. }
  2141. return bsize;
  2142. }
  2143. static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in,
  2144. int bw_in, int row8x8_remaining,
  2145. int col8x8_remaining, BLOCK_SIZE bsize,
  2146. MODE_INFO **mi_8x8) {
  2147. int bh = bh_in;
  2148. int r, c;
  2149. for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
  2150. int bw = bw_in;
  2151. for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
  2152. const int index = r * mis + c;
  2153. mi_8x8[index] = mi + index;
  2154. mi_8x8[index]->sb_type = find_partition_size(
  2155. bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
  2156. }
  2157. }
  2158. }
  2159. // This function attempts to set all mode info entries in a given SB64
  2160. // to the same block partition size.
  2161. // However, at the bottom and right borders of the image the requested size
  2162. // may not be allowed in which case this code attempts to choose the largest
  2163. // allowable partition.
  2164. static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
  2165. MODE_INFO **mi_8x8, int mi_row, int mi_col,
  2166. BLOCK_SIZE bsize) {
  2167. VP9_COMMON *const cm = &cpi->common;
  2168. const int mis = cm->mi_stride;
  2169. const int row8x8_remaining = tile->mi_row_end - mi_row;
  2170. const int col8x8_remaining = tile->mi_col_end - mi_col;
  2171. int block_row, block_col;
  2172. MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
  2173. int bh = num_8x8_blocks_high_lookup[bsize];
  2174. int bw = num_8x8_blocks_wide_lookup[bsize];
  2175. assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
  2176. // Apply the requested partition size to the SB64 if it is all "in image"
  2177. if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
  2178. (row8x8_remaining >= MI_BLOCK_SIZE)) {
  2179. for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
  2180. for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
  2181. int index = block_row * mis + block_col;
  2182. mi_8x8[index] = mi_upper_left + index;
  2183. mi_8x8[index]->sb_type = bsize;
  2184. }
  2185. }
  2186. } else {
  2187. // Else this is a partial SB64.
  2188. set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
  2189. col8x8_remaining, bsize, mi_8x8);
  2190. }
  2191. }
  2192. static const struct {
  2193. int row;
  2194. int col;
  2195. } coord_lookup[16] = {
  2196. // 32x32 index = 0
  2197. { 0, 0 },
  2198. { 0, 2 },
  2199. { 2, 0 },
  2200. { 2, 2 },
  2201. // 32x32 index = 1
  2202. { 0, 4 },
  2203. { 0, 6 },
  2204. { 2, 4 },
  2205. { 2, 6 },
  2206. // 32x32 index = 2
  2207. { 4, 0 },
  2208. { 4, 2 },
  2209. { 6, 0 },
  2210. { 6, 2 },
  2211. // 32x32 index = 3
  2212. { 4, 4 },
  2213. { 4, 6 },
  2214. { 6, 4 },
  2215. { 6, 6 },
  2216. };
  2217. static void set_source_var_based_partition(VP9_COMP *cpi,
  2218. const TileInfo *const tile,
  2219. MACROBLOCK *const x,
  2220. MODE_INFO **mi_8x8, int mi_row,
  2221. int mi_col) {
  2222. VP9_COMMON *const cm = &cpi->common;
  2223. const int mis = cm->mi_stride;
  2224. const int row8x8_remaining = tile->mi_row_end - mi_row;
  2225. const int col8x8_remaining = tile->mi_col_end - mi_col;
  2226. MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
  2227. vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
  2228. assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
  2229. // In-image SB64
  2230. if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
  2231. (row8x8_remaining >= MI_BLOCK_SIZE)) {
  2232. int i, j;
  2233. int index;
  2234. diff d32[4];
  2235. const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
  2236. int is_larger_better = 0;
  2237. int use32x32 = 0;
  2238. unsigned int thr = cpi->source_var_thresh;
  2239. memset(d32, 0, 4 * sizeof(diff));
  2240. for (i = 0; i < 4; i++) {
  2241. diff *d16[4];
  2242. for (j = 0; j < 4; j++) {
  2243. int b_mi_row = coord_lookup[i * 4 + j].row;
  2244. int b_mi_col = coord_lookup[i * 4 + j].col;
  2245. int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2;
  2246. d16[j] = cpi->source_diff_var + offset + boffset;
  2247. index = b_mi_row * mis + b_mi_col;
  2248. mi_8x8[index] = mi_upper_left + index;
  2249. mi_8x8[index]->sb_type = BLOCK_16X16;
  2250. // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
  2251. // size to further improve quality.
  2252. }
  2253. is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
  2254. (d16[2]->var < thr) && (d16[3]->var < thr);
  2255. // Use 32x32 partition
  2256. if (is_larger_better) {
  2257. use32x32 += 1;
  2258. for (j = 0; j < 4; j++) {
  2259. d32[i].sse += d16[j]->sse;
  2260. d32[i].sum += d16[j]->sum;
  2261. }
  2262. d32[i].var =
  2263. (unsigned int)(d32[i].sse -
  2264. (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >>
  2265. 10));
  2266. index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col;
  2267. mi_8x8[index] = mi_upper_left + index;
  2268. mi_8x8[index]->sb_type = BLOCK_32X32;
  2269. }
  2270. }
  2271. if (use32x32 == 4) {
  2272. thr <<= 1;
  2273. is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
  2274. (d32[2].var < thr) && (d32[3].var < thr);
  2275. // Use 64x64 partition
  2276. if (is_larger_better) {
  2277. mi_8x8[0] = mi_upper_left;
  2278. mi_8x8[0]->sb_type = BLOCK_64X64;
  2279. }
  2280. }
  2281. } else { // partial in-image SB64
  2282. int bh = num_8x8_blocks_high_lookup[BLOCK_16X16];
  2283. int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16];
  2284. set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
  2285. col8x8_remaining, BLOCK_16X16, mi_8x8);
  2286. }
  2287. }
  2288. static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
  2289. PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
  2290. int bsize) {
  2291. VP9_COMMON *const cm = &cpi->common;
  2292. MACROBLOCK *const x = &td->mb;
  2293. MACROBLOCKD *const xd = &x->e_mbd;
  2294. MODE_INFO *const mi = xd->mi[0];
  2295. struct macroblock_plane *const p = x->plane;
  2296. const struct segmentation *const seg = &cm->seg;
  2297. const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
  2298. const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
  2299. const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
  2300. const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
  2301. *(xd->mi[0]) = ctx->mic;
  2302. *(x->mbmi_ext) = ctx->mbmi_ext;
  2303. if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled)) {
  2304. // Setting segmentation map for cyclic_refresh.
  2305. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
  2306. vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,
  2307. ctx->rate, ctx->dist, x->skip, p);
  2308. } else {
  2309. const uint8_t *const map =
  2310. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  2311. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  2312. }
  2313. vp9_init_plane_quantizers(cpi, x);
  2314. }
  2315. if (is_inter_block(mi)) {
  2316. vp9_update_mv_count(td);
  2317. if (cm->interp_filter == SWITCHABLE) {
  2318. const int pred_ctx = get_pred_context_switchable_interp(xd);
  2319. ++td->counts->switchable_interp[pred_ctx][mi->interp_filter];
  2320. }
  2321. if (mi->sb_type < BLOCK_8X8) {
  2322. mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
  2323. mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
  2324. }
  2325. }
  2326. if (cm->use_prev_frame_mvs || !cm->error_resilient_mode ||
  2327. (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 &&
  2328. cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {
  2329. MV_REF *const frame_mvs =
  2330. cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
  2331. int w, h;
  2332. for (h = 0; h < y_mis; ++h) {
  2333. MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
  2334. for (w = 0; w < x_mis; ++w) {
  2335. MV_REF *const mv = frame_mv + w;
  2336. mv->ref_frame[0] = mi->ref_frame[0];
  2337. mv->ref_frame[1] = mi->ref_frame[1];
  2338. mv->mv[0].as_int = mi->mv[0].as_int;
  2339. mv->mv[1].as_int = mi->mv[1].as_int;
  2340. }
  2341. }
  2342. }
  2343. x->skip = ctx->skip;
  2344. x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0];
  2345. }
  2346. static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
  2347. const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
  2348. int mi_col, int output_enabled, BLOCK_SIZE bsize,
  2349. PICK_MODE_CONTEXT *ctx) {
  2350. MACROBLOCK *const x = &td->mb;
  2351. set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
  2352. update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
  2353. encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
  2354. update_stats(&cpi->common, td);
  2355. (*tp)->token = EOSB_TOKEN;
  2356. (*tp)++;
  2357. }
  2358. static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
  2359. const TileInfo *const tile, TOKENEXTRA **tp,
  2360. int mi_row, int mi_col, int output_enabled,
  2361. BLOCK_SIZE bsize, PC_TREE *pc_tree) {
  2362. VP9_COMMON *const cm = &cpi->common;
  2363. MACROBLOCK *const x = &td->mb;
  2364. MACROBLOCKD *const xd = &x->e_mbd;
  2365. const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  2366. int ctx;
  2367. PARTITION_TYPE partition;
  2368. BLOCK_SIZE subsize;
  2369. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  2370. if (bsize >= BLOCK_8X8) {
  2371. const int idx_str = xd->mi_stride * mi_row + mi_col;
  2372. MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
  2373. ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
  2374. subsize = mi_8x8[0]->sb_type;
  2375. } else {
  2376. ctx = 0;
  2377. subsize = BLOCK_4X4;
  2378. }
  2379. partition = partition_lookup[bsl][subsize];
  2380. if (output_enabled && bsize != BLOCK_4X4)
  2381. td->counts->partition[ctx][partition]++;
  2382. switch (partition) {
  2383. case PARTITION_NONE:
  2384. encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2385. &pc_tree->none);
  2386. break;
  2387. case PARTITION_VERT:
  2388. encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2389. &pc_tree->vertical[0]);
  2390. if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
  2391. encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
  2392. subsize, &pc_tree->vertical[1]);
  2393. }
  2394. break;
  2395. case PARTITION_HORZ:
  2396. encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2397. &pc_tree->horizontal[0]);
  2398. if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
  2399. encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
  2400. subsize, &pc_tree->horizontal[1]);
  2401. }
  2402. break;
  2403. default:
  2404. assert(partition == PARTITION_SPLIT);
  2405. subsize = get_subsize(bsize, PARTITION_SPLIT);
  2406. encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2407. pc_tree->split[0]);
  2408. encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
  2409. subsize, pc_tree->split[1]);
  2410. encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
  2411. subsize, pc_tree->split[2]);
  2412. encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
  2413. output_enabled, subsize, pc_tree->split[3]);
  2414. break;
  2415. }
  2416. if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
  2417. update_partition_context(xd, mi_row, mi_col, subsize, bsize);
  2418. }
  2419. #if !CONFIG_REALTIME_ONLY
  2420. static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
  2421. TileDataEnc *tile_data, MODE_INFO **mi_8x8,
  2422. TOKENEXTRA **tp, int mi_row, int mi_col,
  2423. BLOCK_SIZE bsize, int *rate, int64_t *dist,
  2424. int do_recon, PC_TREE *pc_tree) {
  2425. VP9_COMMON *const cm = &cpi->common;
  2426. TileInfo *const tile_info = &tile_data->tile_info;
  2427. MACROBLOCK *const x = &td->mb;
  2428. MACROBLOCKD *const xd = &x->e_mbd;
  2429. const int mis = cm->mi_stride;
  2430. const int bsl = b_width_log2_lookup[bsize];
  2431. const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
  2432. const int bss = (1 << bsl) / 4;
  2433. int i, pl;
  2434. PARTITION_TYPE partition = PARTITION_NONE;
  2435. BLOCK_SIZE subsize;
  2436. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  2437. PARTITION_CONTEXT sl[8], sa[8];
  2438. RD_COST last_part_rdc, none_rdc, chosen_rdc;
  2439. BLOCK_SIZE sub_subsize = BLOCK_4X4;
  2440. int splits_below = 0;
  2441. BLOCK_SIZE bs_type = mi_8x8[0]->sb_type;
  2442. int do_partition_search = 1;
  2443. PICK_MODE_CONTEXT *ctx = &pc_tree->none;
  2444. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  2445. assert(num_4x4_blocks_wide_lookup[bsize] ==
  2446. num_4x4_blocks_high_lookup[bsize]);
  2447. vp9_rd_cost_reset(&last_part_rdc);
  2448. vp9_rd_cost_reset(&none_rdc);
  2449. vp9_rd_cost_reset(&chosen_rdc);
  2450. partition = partition_lookup[bsl][bs_type];
  2451. subsize = get_subsize(bsize, partition);
  2452. pc_tree->partitioning = partition;
  2453. save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2454. if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) {
  2455. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  2456. x->mb_energy = vp9_block_energy(cpi, x, bsize);
  2457. }
  2458. if (do_partition_search &&
  2459. cpi->sf.partition_search_type == SEARCH_PARTITION &&
  2460. cpi->sf.adjust_partitioning_from_last_frame) {
  2461. // Check if any of the sub blocks are further split.
  2462. if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
  2463. sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
  2464. splits_below = 1;
  2465. for (i = 0; i < 4; i++) {
  2466. int jj = i >> 1, ii = i & 0x01;
  2467. MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
  2468. if (this_mi && this_mi->sb_type >= sub_subsize) {
  2469. splits_below = 0;
  2470. }
  2471. }
  2472. }
  2473. // If partition is not none try none unless each of the 4 splits are split
  2474. // even further..
  2475. if (partition != PARTITION_NONE && !splits_below &&
  2476. mi_row + (mi_step >> 1) < cm->mi_rows &&
  2477. mi_col + (mi_step >> 1) < cm->mi_cols) {
  2478. pc_tree->partitioning = PARTITION_NONE;
  2479. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx,
  2480. INT_MAX, INT64_MAX);
  2481. pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  2482. if (none_rdc.rate < INT_MAX) {
  2483. none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
  2484. none_rdc.rdcost =
  2485. RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist);
  2486. }
  2487. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2488. mi_8x8[0]->sb_type = bs_type;
  2489. pc_tree->partitioning = partition;
  2490. }
  2491. }
  2492. switch (partition) {
  2493. case PARTITION_NONE:
  2494. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize,
  2495. ctx, INT_MAX, INT64_MAX);
  2496. break;
  2497. case PARTITION_HORZ:
  2498. pc_tree->horizontal[0].skip_ref_frame_mask = 0;
  2499. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
  2500. subsize, &pc_tree->horizontal[0], INT_MAX, INT64_MAX);
  2501. if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
  2502. mi_row + (mi_step >> 1) < cm->mi_rows) {
  2503. RD_COST tmp_rdc;
  2504. PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
  2505. vp9_rd_cost_init(&tmp_rdc);
  2506. update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
  2507. encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
  2508. pc_tree->horizontal[1].skip_ref_frame_mask = 0;
  2509. rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
  2510. &tmp_rdc, subsize, &pc_tree->horizontal[1], INT_MAX,
  2511. INT64_MAX);
  2512. if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
  2513. vp9_rd_cost_reset(&last_part_rdc);
  2514. break;
  2515. }
  2516. last_part_rdc.rate += tmp_rdc.rate;
  2517. last_part_rdc.dist += tmp_rdc.dist;
  2518. last_part_rdc.rdcost += tmp_rdc.rdcost;
  2519. }
  2520. break;
  2521. case PARTITION_VERT:
  2522. pc_tree->vertical[0].skip_ref_frame_mask = 0;
  2523. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
  2524. subsize, &pc_tree->vertical[0], INT_MAX, INT64_MAX);
  2525. if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
  2526. mi_col + (mi_step >> 1) < cm->mi_cols) {
  2527. RD_COST tmp_rdc;
  2528. PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
  2529. vp9_rd_cost_init(&tmp_rdc);
  2530. update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
  2531. encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
  2532. pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0;
  2533. rd_pick_sb_modes(
  2534. cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc,
  2535. subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT_MAX, INT64_MAX);
  2536. if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
  2537. vp9_rd_cost_reset(&last_part_rdc);
  2538. break;
  2539. }
  2540. last_part_rdc.rate += tmp_rdc.rate;
  2541. last_part_rdc.dist += tmp_rdc.dist;
  2542. last_part_rdc.rdcost += tmp_rdc.rdcost;
  2543. }
  2544. break;
  2545. default:
  2546. assert(partition == PARTITION_SPLIT);
  2547. if (bsize == BLOCK_8X8) {
  2548. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
  2549. subsize, pc_tree->leaf_split[0], INT_MAX, INT64_MAX);
  2550. break;
  2551. }
  2552. last_part_rdc.rate = 0;
  2553. last_part_rdc.dist = 0;
  2554. last_part_rdc.rdcost = 0;
  2555. for (i = 0; i < 4; i++) {
  2556. int x_idx = (i & 1) * (mi_step >> 1);
  2557. int y_idx = (i >> 1) * (mi_step >> 1);
  2558. int jj = i >> 1, ii = i & 0x01;
  2559. RD_COST tmp_rdc;
  2560. if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
  2561. continue;
  2562. vp9_rd_cost_init(&tmp_rdc);
  2563. rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss,
  2564. tp, mi_row + y_idx, mi_col + x_idx, subsize,
  2565. &tmp_rdc.rate, &tmp_rdc.dist, i != 3,
  2566. pc_tree->split[i]);
  2567. if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
  2568. vp9_rd_cost_reset(&last_part_rdc);
  2569. break;
  2570. }
  2571. last_part_rdc.rate += tmp_rdc.rate;
  2572. last_part_rdc.dist += tmp_rdc.dist;
  2573. }
  2574. break;
  2575. }
  2576. pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  2577. if (last_part_rdc.rate < INT_MAX) {
  2578. last_part_rdc.rate += cpi->partition_cost[pl][partition];
  2579. last_part_rdc.rdcost =
  2580. RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist);
  2581. }
  2582. if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
  2583. cpi->sf.partition_search_type == SEARCH_PARTITION &&
  2584. partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
  2585. (mi_row + mi_step < cm->mi_rows ||
  2586. mi_row + (mi_step >> 1) == cm->mi_rows) &&
  2587. (mi_col + mi_step < cm->mi_cols ||
  2588. mi_col + (mi_step >> 1) == cm->mi_cols)) {
  2589. BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
  2590. chosen_rdc.rate = 0;
  2591. chosen_rdc.dist = 0;
  2592. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2593. pc_tree->partitioning = PARTITION_SPLIT;
  2594. // Split partition.
  2595. for (i = 0; i < 4; i++) {
  2596. int x_idx = (i & 1) * (mi_step >> 1);
  2597. int y_idx = (i >> 1) * (mi_step >> 1);
  2598. RD_COST tmp_rdc;
  2599. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  2600. PARTITION_CONTEXT sl[8], sa[8];
  2601. if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
  2602. continue;
  2603. save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2604. pc_tree->split[i]->partitioning = PARTITION_NONE;
  2605. rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
  2606. &tmp_rdc, split_subsize, &pc_tree->split[i]->none,
  2607. INT_MAX, INT64_MAX);
  2608. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2609. if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
  2610. vp9_rd_cost_reset(&chosen_rdc);
  2611. break;
  2612. }
  2613. chosen_rdc.rate += tmp_rdc.rate;
  2614. chosen_rdc.dist += tmp_rdc.dist;
  2615. if (i != 3)
  2616. encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
  2617. split_subsize, pc_tree->split[i]);
  2618. pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
  2619. split_subsize);
  2620. chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
  2621. }
  2622. pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  2623. if (chosen_rdc.rate < INT_MAX) {
  2624. chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
  2625. chosen_rdc.rdcost =
  2626. RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist);
  2627. }
  2628. }
  2629. // If last_part is better set the partitioning to that.
  2630. if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
  2631. mi_8x8[0]->sb_type = bsize;
  2632. if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
  2633. chosen_rdc = last_part_rdc;
  2634. }
  2635. // If none was better set the partitioning to that.
  2636. if (none_rdc.rdcost < chosen_rdc.rdcost) {
  2637. if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
  2638. chosen_rdc = none_rdc;
  2639. }
  2640. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2641. // We must have chosen a partitioning and encoding or we'll fail later on.
  2642. // No other opportunities for success.
  2643. if (bsize == BLOCK_64X64)
  2644. assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
  2645. if (do_recon) {
  2646. int output_enabled = (bsize == BLOCK_64X64);
  2647. encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
  2648. pc_tree);
  2649. }
  2650. *rate = chosen_rdc.rate;
  2651. *dist = chosen_rdc.dist;
  2652. }
  2653. static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
  2654. BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
  2655. BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16,
  2656. BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
  2657. };
  2658. static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
  2659. BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
  2660. BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
  2661. BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
  2662. };
  2663. // Look at all the mode_info entries for blocks that are part of this
  2664. // partition and find the min and max values for sb_type.
  2665. // At the moment this is designed to work on a 64x64 SB but could be
  2666. // adjusted to use a size parameter.
  2667. //
  2668. // The min and max are assumed to have been initialized prior to calling this
  2669. // function so repeat calls can accumulate a min and max of more than one sb64.
  2670. static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
  2671. BLOCK_SIZE *min_block_size,
  2672. BLOCK_SIZE *max_block_size,
  2673. int bs_hist[BLOCK_SIZES]) {
  2674. int sb_width_in_blocks = MI_BLOCK_SIZE;
  2675. int sb_height_in_blocks = MI_BLOCK_SIZE;
  2676. int i, j;
  2677. int index = 0;
  2678. // Check the sb_type for each block that belongs to this region.
  2679. for (i = 0; i < sb_height_in_blocks; ++i) {
  2680. for (j = 0; j < sb_width_in_blocks; ++j) {
  2681. MODE_INFO *mi = mi_8x8[index + j];
  2682. BLOCK_SIZE sb_type = mi ? mi->sb_type : 0;
  2683. bs_hist[sb_type]++;
  2684. *min_block_size = VPXMIN(*min_block_size, sb_type);
  2685. *max_block_size = VPXMAX(*max_block_size, sb_type);
  2686. }
  2687. index += xd->mi_stride;
  2688. }
  2689. }
  2690. // Next square block size less or equal than current block size.
  2691. static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
  2692. BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
  2693. BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
  2694. BLOCK_32X32, BLOCK_32X32, BLOCK_64X64
  2695. };
  2696. // Look at neighboring blocks and set a min and max partition size based on
  2697. // what they chose.
  2698. static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
  2699. MACROBLOCKD *const xd, int mi_row,
  2700. int mi_col, BLOCK_SIZE *min_block_size,
  2701. BLOCK_SIZE *max_block_size) {
  2702. VP9_COMMON *const cm = &cpi->common;
  2703. MODE_INFO **mi = xd->mi;
  2704. const int left_in_image = !!xd->left_mi;
  2705. const int above_in_image = !!xd->above_mi;
  2706. const int row8x8_remaining = tile->mi_row_end - mi_row;
  2707. const int col8x8_remaining = tile->mi_col_end - mi_col;
  2708. int bh, bw;
  2709. BLOCK_SIZE min_size = BLOCK_4X4;
  2710. BLOCK_SIZE max_size = BLOCK_64X64;
  2711. int bs_hist[BLOCK_SIZES] = { 0 };
  2712. // Trap case where we do not have a prediction.
  2713. if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
  2714. // Default "min to max" and "max to min"
  2715. min_size = BLOCK_64X64;
  2716. max_size = BLOCK_4X4;
  2717. // NOTE: each call to get_sb_partition_size_range() uses the previous
  2718. // passed in values for min and max as a starting point.
  2719. // Find the min and max partition used in previous frame at this location
  2720. if (cm->frame_type != KEY_FRAME) {
  2721. MODE_INFO **prev_mi =
  2722. &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
  2723. get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
  2724. }
  2725. // Find the min and max partition sizes used in the left SB64
  2726. if (left_in_image) {
  2727. MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
  2728. get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
  2729. bs_hist);
  2730. }
  2731. // Find the min and max partition sizes used in the above SB64.
  2732. if (above_in_image) {
  2733. MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
  2734. get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
  2735. bs_hist);
  2736. }
  2737. // Adjust observed min and max for "relaxed" auto partition case.
  2738. if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
  2739. min_size = min_partition_size[min_size];
  2740. max_size = max_partition_size[max_size];
  2741. }
  2742. }
  2743. // Check border cases where max and min from neighbors may not be legal.
  2744. max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
  2745. &bh, &bw);
  2746. // Test for blocks at the edge of the active image.
  2747. // This may be the actual edge of the image or where there are formatting
  2748. // bars.
  2749. if (vp9_active_edge_sb(cpi, mi_row, mi_col)) {
  2750. min_size = BLOCK_4X4;
  2751. } else {
  2752. min_size =
  2753. VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
  2754. }
  2755. // When use_square_partition_only is true, make sure at least one square
  2756. // partition is allowed by selecting the next smaller square size as
  2757. // *min_block_size.
  2758. if (cpi->sf.use_square_partition_only &&
  2759. next_square_size[max_size] < min_size) {
  2760. min_size = next_square_size[max_size];
  2761. }
  2762. *min_block_size = min_size;
  2763. *max_block_size = max_size;
  2764. }
  2765. // TODO(jingning) refactor functions setting partition search range
  2766. static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row,
  2767. int mi_col, BLOCK_SIZE bsize,
  2768. BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
  2769. int mi_width = num_8x8_blocks_wide_lookup[bsize];
  2770. int mi_height = num_8x8_blocks_high_lookup[bsize];
  2771. int idx, idy;
  2772. MODE_INFO *mi;
  2773. const int idx_str = cm->mi_stride * mi_row + mi_col;
  2774. MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
  2775. BLOCK_SIZE bs, min_size, max_size;
  2776. min_size = BLOCK_64X64;
  2777. max_size = BLOCK_4X4;
  2778. if (prev_mi) {
  2779. for (idy = 0; idy < mi_height; ++idy) {
  2780. for (idx = 0; idx < mi_width; ++idx) {
  2781. mi = prev_mi[idy * cm->mi_stride + idx];
  2782. bs = mi ? mi->sb_type : bsize;
  2783. min_size = VPXMIN(min_size, bs);
  2784. max_size = VPXMAX(max_size, bs);
  2785. }
  2786. }
  2787. }
  2788. if (xd->left_mi) {
  2789. for (idy = 0; idy < mi_height; ++idy) {
  2790. mi = xd->mi[idy * cm->mi_stride - 1];
  2791. bs = mi ? mi->sb_type : bsize;
  2792. min_size = VPXMIN(min_size, bs);
  2793. max_size = VPXMAX(max_size, bs);
  2794. }
  2795. }
  2796. if (xd->above_mi) {
  2797. for (idx = 0; idx < mi_width; ++idx) {
  2798. mi = xd->mi[idx - cm->mi_stride];
  2799. bs = mi ? mi->sb_type : bsize;
  2800. min_size = VPXMIN(min_size, bs);
  2801. max_size = VPXMAX(max_size, bs);
  2802. }
  2803. }
  2804. if (min_size == max_size) {
  2805. min_size = min_partition_size[min_size];
  2806. max_size = max_partition_size[max_size];
  2807. }
  2808. *min_bs = min_size;
  2809. *max_bs = max_size;
  2810. }
  2811. #endif // !CONFIG_REALTIME_ONLY
  2812. static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
  2813. memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
  2814. }
  2815. static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
  2816. memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
  2817. }
  2818. #if CONFIG_FP_MB_STATS
  2819. const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
  2820. 1, 2, 2, 2, 4, 4 };
  2821. const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
  2822. 2, 1, 2, 4, 2, 4 };
  2823. const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { 0, 10, 10, 30, 40,
  2824. 40, 60, 80, 80, 90,
  2825. 100, 100, 120 };
  2826. const int qindex_split_threshold_lookup[BLOCK_SIZES] = { 0, 3, 3, 7, 15,
  2827. 15, 30, 40, 40, 60,
  2828. 80, 80, 120 };
  2829. const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { 1, 1, 1, 1, 1,
  2830. 1, 1, 1, 1, 1,
  2831. 4, 4, 6 };
  2832. typedef enum {
  2833. MV_ZERO = 0,
  2834. MV_LEFT = 1,
  2835. MV_UP = 2,
  2836. MV_RIGHT = 3,
  2837. MV_DOWN = 4,
  2838. MV_INVALID
  2839. } MOTION_DIRECTION;
  2840. static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
  2841. if (fp_byte & FPMB_MOTION_ZERO_MASK) {
  2842. return MV_ZERO;
  2843. } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
  2844. return MV_LEFT;
  2845. } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
  2846. return MV_RIGHT;
  2847. } else if (fp_byte & FPMB_MOTION_UP_MASK) {
  2848. return MV_UP;
  2849. } else {
  2850. return MV_DOWN;
  2851. }
  2852. }
  2853. static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
  2854. MOTION_DIRECTION that_mv) {
  2855. if (this_mv == that_mv) {
  2856. return 0;
  2857. } else {
  2858. return abs(this_mv - that_mv) == 2 ? 2 : 1;
  2859. }
  2860. }
  2861. #endif
  2862. // Calculate prediction based on the given input features and neural net config.
  2863. // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
  2864. // layer.
  2865. static void nn_predict(const float *features, const NN_CONFIG *nn_config,
  2866. float *output) {
  2867. int num_input_nodes = nn_config->num_inputs;
  2868. int buf_index = 0;
  2869. float buf[2][NN_MAX_NODES_PER_LAYER];
  2870. const float *input_nodes = features;
  2871. // Propagate hidden layers.
  2872. const int num_layers = nn_config->num_hidden_layers;
  2873. int layer, node, i;
  2874. assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
  2875. for (layer = 0; layer < num_layers; ++layer) {
  2876. const float *weights = nn_config->weights[layer];
  2877. const float *bias = nn_config->bias[layer];
  2878. float *output_nodes = buf[buf_index];
  2879. const int num_output_nodes = nn_config->num_hidden_nodes[layer];
  2880. assert(num_output_nodes < NN_MAX_NODES_PER_LAYER);
  2881. for (node = 0; node < num_output_nodes; ++node) {
  2882. float val = 0.0f;
  2883. for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
  2884. val += bias[node];
  2885. // ReLU as activation function.
  2886. val = VPXMAX(val, 0.0f);
  2887. output_nodes[node] = val;
  2888. weights += num_input_nodes;
  2889. }
  2890. num_input_nodes = num_output_nodes;
  2891. input_nodes = output_nodes;
  2892. buf_index = 1 - buf_index;
  2893. }
  2894. // Final output layer.
  2895. {
  2896. const float *weights = nn_config->weights[num_layers];
  2897. for (node = 0; node < nn_config->num_outputs; ++node) {
  2898. const float *bias = nn_config->bias[num_layers];
  2899. float val = 0.0f;
  2900. for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
  2901. output[node] = val + bias[node];
  2902. weights += num_input_nodes;
  2903. }
  2904. }
  2905. }
  2906. #if !CONFIG_REALTIME_ONLY
  2907. #define FEATURES 7
  2908. // Machine-learning based partition search early termination.
  2909. // Return 1 to skip split and rect partitions.
  2910. static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  2911. PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
  2912. BLOCK_SIZE bsize) {
  2913. const int mag_mv =
  2914. abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
  2915. const int left_in_image = !!xd->left_mi;
  2916. const int above_in_image = !!xd->above_mi;
  2917. MODE_INFO **prev_mi =
  2918. &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row];
  2919. int above_par = 0; // above_partitioning
  2920. int left_par = 0; // left_partitioning
  2921. int last_par = 0; // last_partitioning
  2922. int offset = 0;
  2923. int i;
  2924. BLOCK_SIZE context_size;
  2925. const NN_CONFIG *nn_config = NULL;
  2926. const float *mean, *sd, *linear_weights;
  2927. float nn_score, linear_score;
  2928. float features[FEATURES];
  2929. assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
  2930. vpx_clear_system_state();
  2931. switch (bsize) {
  2932. case BLOCK_64X64:
  2933. offset = 0;
  2934. nn_config = &vp9_partition_nnconfig_64x64;
  2935. break;
  2936. case BLOCK_32X32:
  2937. offset = 8;
  2938. nn_config = &vp9_partition_nnconfig_32x32;
  2939. break;
  2940. case BLOCK_16X16:
  2941. offset = 16;
  2942. nn_config = &vp9_partition_nnconfig_16x16;
  2943. break;
  2944. default: assert(0 && "Unexpected block size."); return 0;
  2945. }
  2946. if (above_in_image) {
  2947. context_size = xd->above_mi->sb_type;
  2948. if (context_size < bsize)
  2949. above_par = 2;
  2950. else if (context_size == bsize)
  2951. above_par = 1;
  2952. }
  2953. if (left_in_image) {
  2954. context_size = xd->left_mi->sb_type;
  2955. if (context_size < bsize)
  2956. left_par = 2;
  2957. else if (context_size == bsize)
  2958. left_par = 1;
  2959. }
  2960. if (prev_mi) {
  2961. context_size = prev_mi[0]->sb_type;
  2962. if (context_size < bsize)
  2963. last_par = 2;
  2964. else if (context_size == bsize)
  2965. last_par = 1;
  2966. }
  2967. mean = &vp9_partition_feature_mean[offset];
  2968. sd = &vp9_partition_feature_std[offset];
  2969. features[0] = ((float)ctx->rate - mean[0]) / sd[0];
  2970. features[1] = ((float)ctx->dist - mean[1]) / sd[1];
  2971. features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2];
  2972. features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3];
  2973. features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4];
  2974. features[5] = ((float)cm->base_qindex - mean[5]) * sd[5];
  2975. features[6] = ((float)last_par - mean[6]) * sd[6];
  2976. // Predict using linear model.
  2977. linear_weights = &vp9_partition_linear_weights[offset];
  2978. linear_score = linear_weights[FEATURES];
  2979. for (i = 0; i < FEATURES; ++i)
  2980. linear_score += linear_weights[i] * features[i];
  2981. if (linear_score > 0.1f) return 0;
  2982. // Predict using neural net model.
  2983. nn_predict(features, nn_config, &nn_score);
  2984. if (linear_score < -0.0f && nn_score < 0.1f) return 1;
  2985. if (nn_score < -0.0f && linear_score < 0.1f) return 1;
  2986. return 0;
  2987. }
  2988. #undef FEATURES
  2989. #define FEATURES 4
  2990. // ML-based partition search breakout.
  2991. static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize,
  2992. const MACROBLOCK *const x,
  2993. const RD_COST *const rd_cost) {
  2994. DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
  2995. const VP9_COMMON *const cm = &cpi->common;
  2996. float features[FEATURES];
  2997. const float *linear_weights = NULL; // Linear model weights.
  2998. float linear_score = 0.0f;
  2999. const int qindex = cm->base_qindex;
  3000. const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2);
  3001. const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720;
  3002. const int resolution_ctx = is_720p_or_larger ? 1 : 0;
  3003. switch (bsize) {
  3004. case BLOCK_64X64:
  3005. linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx];
  3006. break;
  3007. case BLOCK_32X32:
  3008. linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx];
  3009. break;
  3010. case BLOCK_16X16:
  3011. linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx];
  3012. break;
  3013. case BLOCK_8X8:
  3014. linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx];
  3015. break;
  3016. default: assert(0 && "Unexpected block size."); return 0;
  3017. }
  3018. if (!linear_weights) return 0;
  3019. { // Generate feature values.
  3020. #if CONFIG_VP9_HIGHBITDEPTH
  3021. const int ac_q =
  3022. vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
  3023. #else
  3024. const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth);
  3025. #endif // CONFIG_VP9_HIGHBITDEPTH
  3026. const int num_pels_log2 = num_pels_log2_lookup[bsize];
  3027. int feature_index = 0;
  3028. unsigned int var, sse;
  3029. float rate_f, dist_f;
  3030. #if CONFIG_VP9_HIGHBITDEPTH
  3031. if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  3032. var =
  3033. vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd);
  3034. } else {
  3035. var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
  3036. vp9_64_zeros, 0, &sse);
  3037. }
  3038. #else
  3039. var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
  3040. vp9_64_zeros, 0, &sse);
  3041. #endif
  3042. var = var >> num_pels_log2;
  3043. vpx_clear_system_state();
  3044. rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX);
  3045. dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2);
  3046. rate_f =
  3047. ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
  3048. rate_f;
  3049. features[feature_index++] = rate_f;
  3050. features[feature_index++] = dist_f;
  3051. features[feature_index++] = (float)var;
  3052. features[feature_index++] = (float)ac_q;
  3053. assert(feature_index == FEATURES);
  3054. }
  3055. { // Calculate the output score.
  3056. int i;
  3057. linear_score = linear_weights[FEATURES];
  3058. for (i = 0; i < FEATURES; ++i)
  3059. linear_score += linear_weights[i] * features[i];
  3060. }
  3061. return linear_score >= cpi->sf.rd_ml_partition.search_breakout_thresh[q_ctx];
  3062. }
  3063. #undef FEATURES
  3064. #define FEATURES 8
  3065. #define LABELS 4
  3066. static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
  3067. BLOCK_SIZE bsize,
  3068. const PC_TREE *const pc_tree,
  3069. int *allow_horz, int *allow_vert,
  3070. int64_t ref_rd) {
  3071. const NN_CONFIG *nn_config = NULL;
  3072. float score[LABELS] = {
  3073. 0.0f,
  3074. };
  3075. int thresh = -1;
  3076. int i;
  3077. (void)x;
  3078. if (ref_rd <= 0 || ref_rd > 1000000000) return;
  3079. switch (bsize) {
  3080. case BLOCK_8X8: break;
  3081. case BLOCK_16X16:
  3082. nn_config = &vp9_rect_part_nnconfig_16;
  3083. thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[1];
  3084. break;
  3085. case BLOCK_32X32:
  3086. nn_config = &vp9_rect_part_nnconfig_32;
  3087. thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[2];
  3088. break;
  3089. case BLOCK_64X64:
  3090. nn_config = &vp9_rect_part_nnconfig_64;
  3091. thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[3];
  3092. break;
  3093. default: assert(0 && "Unexpected block size."); return;
  3094. }
  3095. if (!nn_config || thresh < 0) return;
  3096. // Feature extraction and model score calculation.
  3097. {
  3098. const VP9_COMMON *const cm = &cpi->common;
  3099. #if CONFIG_VP9_HIGHBITDEPTH
  3100. const int dc_q =
  3101. vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
  3102. #else
  3103. const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
  3104. #endif // CONFIG_VP9_HIGHBITDEPTH
  3105. const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
  3106. int feature_index = 0;
  3107. float features[FEATURES];
  3108. features[feature_index++] = logf((float)dc_q + 1.0f);
  3109. features[feature_index++] =
  3110. (float)(pc_tree->partitioning == PARTITION_NONE);
  3111. features[feature_index++] = logf((float)ref_rd / bs / bs + 1.0f);
  3112. {
  3113. const float norm_factor = 1.0f / ((float)ref_rd + 1.0f);
  3114. const int64_t none_rdcost = pc_tree->none.rdcost;
  3115. float rd_ratio = 2.0f;
  3116. if (none_rdcost > 0 && none_rdcost < 1000000000)
  3117. rd_ratio = (float)none_rdcost * norm_factor;
  3118. features[feature_index++] = VPXMIN(rd_ratio, 2.0f);
  3119. for (i = 0; i < 4; ++i) {
  3120. const int64_t this_rd = pc_tree->split[i]->none.rdcost;
  3121. const int rd_valid = this_rd > 0 && this_rd < 1000000000;
  3122. // Ratio between sub-block RD and whole block RD.
  3123. features[feature_index++] =
  3124. rd_valid ? (float)this_rd * norm_factor : 1.0f;
  3125. }
  3126. }
  3127. assert(feature_index == FEATURES);
  3128. nn_predict(features, nn_config, score);
  3129. }
  3130. // Make decisions based on the model score.
  3131. {
  3132. int max_score = -1000;
  3133. int horz = 0, vert = 0;
  3134. int int_score[LABELS];
  3135. for (i = 0; i < LABELS; ++i) {
  3136. int_score[i] = (int)(100 * score[i]);
  3137. max_score = VPXMAX(int_score[i], max_score);
  3138. }
  3139. thresh = max_score - thresh;
  3140. for (i = 0; i < LABELS; ++i) {
  3141. if (int_score[i] >= thresh) {
  3142. if ((i >> 0) & 1) horz = 1;
  3143. if ((i >> 1) & 1) vert = 1;
  3144. }
  3145. }
  3146. *allow_horz = *allow_horz && horz;
  3147. *allow_vert = *allow_vert && vert;
  3148. }
  3149. }
  3150. #undef FEATURES
  3151. #undef LABELS
  3152. // Perform fast and coarse motion search for the given block. This is a
  3153. // pre-processing step for the ML based partition search speedup.
  3154. static void simple_motion_search(const VP9_COMP *const cpi, MACROBLOCK *const x,
  3155. BLOCK_SIZE bsize, int mi_row, int mi_col,
  3156. MV ref_mv, MV_REFERENCE_FRAME ref,
  3157. uint8_t *const pred_buf) {
  3158. const VP9_COMMON *const cm = &cpi->common;
  3159. MACROBLOCKD *const xd = &x->e_mbd;
  3160. MODE_INFO *const mi = xd->mi[0];
  3161. const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_buffer(cpi, ref);
  3162. const int step_param = 1;
  3163. const MvLimits tmp_mv_limits = x->mv_limits;
  3164. const SEARCH_METHODS search_method = NSTEP;
  3165. const int sadpb = x->sadperbit16;
  3166. MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 };
  3167. MV best_mv = { 0, 0 };
  3168. int cost_list[5];
  3169. assert(yv12 != NULL);
  3170. if (!yv12) return;
  3171. vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
  3172. &cm->frame_refs[ref - 1].sf);
  3173. mi->ref_frame[0] = ref;
  3174. mi->ref_frame[1] = NONE;
  3175. mi->sb_type = bsize;
  3176. vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
  3177. vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method,
  3178. sadpb, cond_cost_list(cpi, cost_list), &ref_mv,
  3179. &best_mv, 0, 0);
  3180. best_mv.row *= 8;
  3181. best_mv.col *= 8;
  3182. x->mv_limits = tmp_mv_limits;
  3183. mi->mv[0].as_mv = best_mv;
  3184. set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
  3185. xd->plane[0].dst.buf = pred_buf;
  3186. xd->plane[0].dst.stride = 64;
  3187. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  3188. }
  3189. // Use a neural net model to prune partition-none and partition-split search.
  3190. // Features used: QP; spatial block size contexts; variance of prediction
  3191. // residue after simple_motion_search.
  3192. #define FEATURES 12
  3193. static void ml_predict_var_rd_paritioning(const VP9_COMP *const cpi,
  3194. MACROBLOCK *const x,
  3195. PC_TREE *const pc_tree,
  3196. BLOCK_SIZE bsize, int mi_row,
  3197. int mi_col, int *none, int *split) {
  3198. const VP9_COMMON *const cm = &cpi->common;
  3199. const NN_CONFIG *nn_config = NULL;
  3200. #if CONFIG_VP9_HIGHBITDEPTH
  3201. MACROBLOCKD *xd = &x->e_mbd;
  3202. DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]);
  3203. uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
  3204. ? (CONVERT_TO_BYTEPTR(pred_buffer))
  3205. : pred_buffer;
  3206. #else
  3207. DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]);
  3208. uint8_t *const pred_buf = pred_buffer;
  3209. #endif // CONFIG_VP9_HIGHBITDEPTH
  3210. const int speed = cpi->oxcf.speed;
  3211. float thresh = 0.0f;
  3212. switch (bsize) {
  3213. case BLOCK_64X64:
  3214. nn_config = &vp9_part_split_nnconfig_64;
  3215. thresh = speed > 0 ? 2.8f : 3.0f;
  3216. break;
  3217. case BLOCK_32X32:
  3218. nn_config = &vp9_part_split_nnconfig_32;
  3219. thresh = speed > 0 ? 3.5f : 3.0f;
  3220. break;
  3221. case BLOCK_16X16:
  3222. nn_config = &vp9_part_split_nnconfig_16;
  3223. thresh = speed > 0 ? 3.8f : 4.0f;
  3224. break;
  3225. case BLOCK_8X8:
  3226. nn_config = &vp9_part_split_nnconfig_8;
  3227. if (cm->width >= 720 && cm->height >= 720)
  3228. thresh = speed > 0 ? 2.5f : 2.0f;
  3229. else
  3230. thresh = speed > 0 ? 3.8f : 2.0f;
  3231. break;
  3232. default: assert(0 && "Unexpected block size."); return;
  3233. }
  3234. if (!nn_config) return;
  3235. // Do a simple single motion search to find a prediction for current block.
  3236. // The variance of the residue will be used as input features.
  3237. {
  3238. MV ref_mv;
  3239. const MV_REFERENCE_FRAME ref =
  3240. cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
  3241. // If bsize is 64x64, use zero MV as reference; otherwise, use MV result
  3242. // of previous(larger) block as reference.
  3243. if (bsize == BLOCK_64X64)
  3244. ref_mv.row = ref_mv.col = 0;
  3245. else
  3246. ref_mv = pc_tree->mv;
  3247. vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
  3248. simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf);
  3249. pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv;
  3250. }
  3251. vpx_clear_system_state();
  3252. {
  3253. float features[FEATURES] = { 0.0f };
  3254. #if CONFIG_VP9_HIGHBITDEPTH
  3255. const int dc_q =
  3256. vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8);
  3257. #else
  3258. const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
  3259. #endif // CONFIG_VP9_HIGHBITDEPTH
  3260. int feature_idx = 0;
  3261. float score;
  3262. // Generate model input features.
  3263. features[feature_idx++] = logf((float)dc_q + 1.0f);
  3264. // Get the variance of the residue as input features.
  3265. {
  3266. const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
  3267. const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
  3268. const uint8_t *pred = pred_buf;
  3269. const uint8_t *src = x->plane[0].src.buf;
  3270. const int src_stride = x->plane[0].src.stride;
  3271. const int pred_stride = 64;
  3272. unsigned int sse;
  3273. // Variance of whole block.
  3274. const unsigned int var =
  3275. cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
  3276. const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
  3277. const MACROBLOCKD *const xd = &x->e_mbd;
  3278. const int has_above = !!xd->above_mi;
  3279. const int has_left = !!xd->left_mi;
  3280. const BLOCK_SIZE above_bsize = has_above ? xd->above_mi->sb_type : bsize;
  3281. const BLOCK_SIZE left_bsize = has_left ? xd->left_mi->sb_type : bsize;
  3282. int i;
  3283. features[feature_idx++] = (float)has_above;
  3284. features[feature_idx++] = (float)b_width_log2_lookup[above_bsize];
  3285. features[feature_idx++] = (float)b_height_log2_lookup[above_bsize];
  3286. features[feature_idx++] = (float)has_left;
  3287. features[feature_idx++] = (float)b_width_log2_lookup[left_bsize];
  3288. features[feature_idx++] = (float)b_height_log2_lookup[left_bsize];
  3289. features[feature_idx++] = logf((float)var + 1.0f);
  3290. for (i = 0; i < 4; ++i) {
  3291. const int x_idx = (i & 1) * bs / 2;
  3292. const int y_idx = (i >> 1) * bs / 2;
  3293. const int src_offset = y_idx * src_stride + x_idx;
  3294. const int pred_offset = y_idx * pred_stride + x_idx;
  3295. // Variance of quarter block.
  3296. const unsigned int sub_var =
  3297. cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
  3298. pred + pred_offset, pred_stride, &sse);
  3299. const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
  3300. features[feature_idx++] = var_ratio;
  3301. }
  3302. }
  3303. assert(feature_idx == FEATURES);
  3304. // Feed the features into the model to get the confidence score.
  3305. nn_predict(features, nn_config, &score);
  3306. // Higher score means that the model has higher confidence that the split
  3307. // partition is better than the non-split partition. So if the score is
  3308. // high enough, we skip the none-split partition search; if the score is
  3309. // low enough, we skip the split partition search.
  3310. if (score > thresh) *none = 0;
  3311. if (score < -thresh) *split = 0;
  3312. }
  3313. }
  3314. #undef FEATURES
  3315. #endif // !CONFIG_REALTIME_ONLY
  3316. static double log_wiener_var(int64_t wiener_variance) {
  3317. return log(1.0 + wiener_variance) / log(2.0);
  3318. }
  3319. static void build_kmeans_segmentation(VP9_COMP *cpi) {
  3320. VP9_COMMON *cm = &cpi->common;
  3321. BLOCK_SIZE bsize = BLOCK_64X64;
  3322. KMEANS_DATA *kmeans_data;
  3323. vp9_disable_segmentation(&cm->seg);
  3324. if (cm->show_frame) {
  3325. int mi_row, mi_col;
  3326. cpi->kmeans_data_size = 0;
  3327. cpi->kmeans_ctr_num = 8;
  3328. for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
  3329. for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
  3330. int mb_row_start = mi_row >> 1;
  3331. int mb_col_start = mi_col >> 1;
  3332. int mb_row_end = VPXMIN(
  3333. (mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows);
  3334. int mb_col_end = VPXMIN(
  3335. (mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols);
  3336. int row, col;
  3337. int64_t wiener_variance = 0;
  3338. for (row = mb_row_start; row < mb_row_end; ++row)
  3339. for (col = mb_col_start; col < mb_col_end; ++col)
  3340. wiener_variance += cpi->mb_wiener_variance[row * cm->mb_cols + col];
  3341. wiener_variance /=
  3342. (mb_row_end - mb_row_start) * (mb_col_end - mb_col_start);
  3343. #if CONFIG_MULTITHREAD
  3344. pthread_mutex_lock(&cpi->kmeans_mutex);
  3345. #endif // CONFIG_MULTITHREAD
  3346. kmeans_data = &cpi->kmeans_data_arr[cpi->kmeans_data_size++];
  3347. kmeans_data->value = log_wiener_var(wiener_variance);
  3348. kmeans_data->pos = mi_row * cpi->kmeans_data_stride + mi_col;
  3349. #if CONFIG_MULTITHREAD
  3350. pthread_mutex_unlock(&cpi->kmeans_mutex);
  3351. #endif // CONFIG_MULTITHREAD
  3352. }
  3353. }
  3354. vp9_kmeans(cpi->kmeans_ctr_ls, cpi->kmeans_boundary_ls,
  3355. cpi->kmeans_count_ls, cpi->kmeans_ctr_num, cpi->kmeans_data_arr,
  3356. cpi->kmeans_data_size);
  3357. vp9_perceptual_aq_mode_setup(cpi, &cm->seg);
  3358. }
  3359. }
  3360. #if !CONFIG_REALTIME_ONLY
  3361. static int wiener_var_segment(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
  3362. int mi_col) {
  3363. VP9_COMMON *cm = &cpi->common;
  3364. int mb_row_start = mi_row >> 1;
  3365. int mb_col_start = mi_col >> 1;
  3366. int mb_row_end =
  3367. VPXMIN((mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows);
  3368. int mb_col_end =
  3369. VPXMIN((mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols);
  3370. int row, col, idx;
  3371. int64_t wiener_variance = 0;
  3372. int segment_id;
  3373. int8_t seg_hist[MAX_SEGMENTS] = { 0 };
  3374. int8_t max_count = 0, max_index = -1;
  3375. vpx_clear_system_state();
  3376. assert(cpi->norm_wiener_variance > 0);
  3377. for (row = mb_row_start; row < mb_row_end; ++row) {
  3378. for (col = mb_col_start; col < mb_col_end; ++col) {
  3379. wiener_variance = cpi->mb_wiener_variance[row * cm->mb_cols + col];
  3380. segment_id =
  3381. vp9_get_group_idx(log_wiener_var(wiener_variance),
  3382. cpi->kmeans_boundary_ls, cpi->kmeans_ctr_num);
  3383. ++seg_hist[segment_id];
  3384. }
  3385. }
  3386. for (idx = 0; idx < cpi->kmeans_ctr_num; ++idx) {
  3387. if (seg_hist[idx] > max_count) {
  3388. max_count = seg_hist[idx];
  3389. max_index = idx;
  3390. }
  3391. }
  3392. assert(max_index >= 0);
  3393. segment_id = max_index;
  3394. return segment_id;
  3395. }
  3396. static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
  3397. int mi_col, int orig_rdmult) {
  3398. const int gf_group_index = cpi->twopass.gf_group.index;
  3399. TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index];
  3400. TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
  3401. int tpl_stride = tpl_frame->stride;
  3402. int64_t intra_cost = 0;
  3403. int64_t mc_dep_cost = 0;
  3404. int mi_wide = num_8x8_blocks_wide_lookup[bsize];
  3405. int mi_high = num_8x8_blocks_high_lookup[bsize];
  3406. int row, col;
  3407. int dr = 0;
  3408. int count = 0;
  3409. double r0, rk, beta;
  3410. if (tpl_frame->is_valid == 0) return orig_rdmult;
  3411. if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult;
  3412. if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult;
  3413. for (row = mi_row; row < mi_row + mi_high; ++row) {
  3414. for (col = mi_col; col < mi_col + mi_wide; ++col) {
  3415. TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
  3416. if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
  3417. intra_cost += this_stats->intra_cost;
  3418. mc_dep_cost += this_stats->mc_dep_cost;
  3419. ++count;
  3420. }
  3421. }
  3422. vpx_clear_system_state();
  3423. r0 = cpi->rd.r0;
  3424. rk = (double)intra_cost / mc_dep_cost;
  3425. beta = r0 / rk;
  3426. dr = vp9_get_adaptive_rdmult(cpi, beta);
  3427. dr = VPXMIN(dr, orig_rdmult * 3 / 2);
  3428. dr = VPXMAX(dr, orig_rdmult * 1 / 2);
  3429. dr = VPXMAX(1, dr);
  3430. return dr;
  3431. }
  3432. #endif // !CONFIG_REALTIME_ONLY
  3433. #if !CONFIG_REALTIME_ONLY
  3434. // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
  3435. // unlikely to be selected depending on previous rate-distortion optimization
  3436. // results, for encoding speed-up.
  3437. static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
  3438. TileDataEnc *tile_data, TOKENEXTRA **tp,
  3439. int mi_row, int mi_col, BLOCK_SIZE bsize,
  3440. RD_COST *rd_cost, RD_COST best_rdc,
  3441. PC_TREE *pc_tree) {
  3442. VP9_COMMON *const cm = &cpi->common;
  3443. const VP9EncoderConfig *const oxcf = &cpi->oxcf;
  3444. TileInfo *const tile_info = &tile_data->tile_info;
  3445. MACROBLOCK *const x = &td->mb;
  3446. MACROBLOCKD *const xd = &x->e_mbd;
  3447. const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
  3448. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  3449. PARTITION_CONTEXT sl[8], sa[8];
  3450. TOKENEXTRA *tp_orig = *tp;
  3451. PICK_MODE_CONTEXT *const ctx = &pc_tree->none;
  3452. int i;
  3453. const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  3454. BLOCK_SIZE subsize;
  3455. RD_COST this_rdc, sum_rdc;
  3456. int do_split = bsize >= BLOCK_8X8;
  3457. int do_rect = 1;
  3458. INTERP_FILTER pred_interp_filter;
  3459. // Override skipping rectangular partition operations for edge blocks
  3460. const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
  3461. const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
  3462. const int xss = x->e_mbd.plane[1].subsampling_x;
  3463. const int yss = x->e_mbd.plane[1].subsampling_y;
  3464. BLOCK_SIZE min_size = x->min_partition_size;
  3465. BLOCK_SIZE max_size = x->max_partition_size;
  3466. #if CONFIG_FP_MB_STATS
  3467. unsigned int src_diff_var = UINT_MAX;
  3468. int none_complexity = 0;
  3469. #endif
  3470. int partition_none_allowed = !force_horz_split && !force_vert_split;
  3471. int partition_horz_allowed =
  3472. !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
  3473. int partition_vert_allowed =
  3474. !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
  3475. int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
  3476. int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
  3477. int must_split = 0;
  3478. int should_encode_sb = 0;
  3479. // Ref frames picked in the [i_th] quarter subblock during square partition
  3480. // RD search. It may be used to prune ref frame selection of rect partitions.
  3481. uint8_t ref_frames_used[4] = { 0, 0, 0, 0 };
  3482. int partition_mul = x->cb_rdmult;
  3483. (void)*tp_orig;
  3484. assert(num_8x8_blocks_wide_lookup[bsize] ==
  3485. num_8x8_blocks_high_lookup[bsize]);
  3486. dist_breakout_thr >>=
  3487. 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  3488. rate_breakout_thr *= num_pels_log2_lookup[bsize];
  3489. vp9_rd_cost_init(&this_rdc);
  3490. vp9_rd_cost_init(&sum_rdc);
  3491. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  3492. if (oxcf->tuning == VP8_TUNE_SSIM) {
  3493. set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &partition_mul);
  3494. }
  3495. vp9_rd_cost_update(partition_mul, x->rddiv, &best_rdc);
  3496. if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ &&
  3497. cpi->oxcf.aq_mode != LOOKAHEAD_AQ)
  3498. x->mb_energy = vp9_block_energy(cpi, x, bsize);
  3499. if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
  3500. int cb_partition_search_ctrl =
  3501. ((pc_tree->index == 0 || pc_tree->index == 3) +
  3502. get_chessboard_index(cm->current_video_frame)) &
  3503. 0x1;
  3504. if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
  3505. set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
  3506. }
  3507. // Get sub block energy range
  3508. if (bsize >= BLOCK_16X16) {
  3509. int min_energy, max_energy;
  3510. vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
  3511. &max_energy);
  3512. must_split = (min_energy < -3) && (max_energy - min_energy > 2);
  3513. }
  3514. // Determine partition types in search according to the speed features.
  3515. // The threshold set here has to be of square block size.
  3516. if (cpi->sf.auto_min_max_partition_size) {
  3517. partition_none_allowed &= (bsize <= max_size);
  3518. partition_horz_allowed &=
  3519. ((bsize <= max_size && bsize > min_size) || force_horz_split);
  3520. partition_vert_allowed &=
  3521. ((bsize <= max_size && bsize > min_size) || force_vert_split);
  3522. do_split &= bsize > min_size;
  3523. }
  3524. if (cpi->sf.use_square_partition_only &&
  3525. (bsize > cpi->sf.use_square_only_thresh_high ||
  3526. bsize < cpi->sf.use_square_only_thresh_low)) {
  3527. if (cpi->use_svc) {
  3528. if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
  3529. partition_horz_allowed &= force_horz_split;
  3530. if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
  3531. partition_vert_allowed &= force_vert_split;
  3532. } else {
  3533. partition_horz_allowed &= force_horz_split;
  3534. partition_vert_allowed &= force_vert_split;
  3535. }
  3536. }
  3537. save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  3538. #if CONFIG_FP_MB_STATS
  3539. if (cpi->use_fp_mb_stats) {
  3540. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  3541. src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
  3542. mi_col, bsize);
  3543. }
  3544. #endif
  3545. #if CONFIG_FP_MB_STATS
  3546. // Decide whether we shall split directly and skip searching NONE by using
  3547. // the first pass block statistics
  3548. if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
  3549. partition_none_allowed && src_diff_var > 4 &&
  3550. cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
  3551. int mb_row = mi_row >> 1;
  3552. int mb_col = mi_col >> 1;
  3553. int mb_row_end =
  3554. VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
  3555. int mb_col_end =
  3556. VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
  3557. int r, c;
  3558. // compute a complexity measure, basically measure inconsistency of motion
  3559. // vectors obtained from the first pass in the current block
  3560. for (r = mb_row; r < mb_row_end; r++) {
  3561. for (c = mb_col; c < mb_col_end; c++) {
  3562. const int mb_index = r * cm->mb_cols + c;
  3563. MOTION_DIRECTION this_mv;
  3564. MOTION_DIRECTION right_mv;
  3565. MOTION_DIRECTION bottom_mv;
  3566. this_mv =
  3567. get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
  3568. // to its right
  3569. if (c != mb_col_end - 1) {
  3570. right_mv = get_motion_direction_fp(
  3571. cpi->twopass.this_frame_mb_stats[mb_index + 1]);
  3572. none_complexity += get_motion_inconsistency(this_mv, right_mv);
  3573. }
  3574. // to its bottom
  3575. if (r != mb_row_end - 1) {
  3576. bottom_mv = get_motion_direction_fp(
  3577. cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
  3578. none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
  3579. }
  3580. // do not count its left and top neighbors to avoid double counting
  3581. }
  3582. }
  3583. if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
  3584. partition_none_allowed = 0;
  3585. }
  3586. }
  3587. #endif
  3588. pc_tree->partitioning = PARTITION_NONE;
  3589. if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) {
  3590. const int do_rd_ml_partition_var_pruning =
  3591. partition_none_allowed && do_split &&
  3592. mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows &&
  3593. mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols;
  3594. if (do_rd_ml_partition_var_pruning) {
  3595. ml_predict_var_rd_paritioning(cpi, x, pc_tree, bsize, mi_row, mi_col,
  3596. &partition_none_allowed, &do_split);
  3597. } else {
  3598. vp9_zero(pc_tree->mv);
  3599. }
  3600. if (bsize > BLOCK_8X8) { // Store MV result as reference for subblocks.
  3601. for (i = 0; i < 4; ++i) pc_tree->split[i]->mv = pc_tree->mv;
  3602. }
  3603. }
  3604. // PARTITION_NONE
  3605. if (partition_none_allowed) {
  3606. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
  3607. best_rdc.rate, best_rdc.dist);
  3608. ctx->rdcost = this_rdc.rdcost;
  3609. if (this_rdc.rate != INT_MAX) {
  3610. if (cpi->sf.prune_ref_frame_for_rect_partitions) {
  3611. const int ref1 = ctx->mic.ref_frame[0];
  3612. const int ref2 = ctx->mic.ref_frame[1];
  3613. for (i = 0; i < 4; ++i) {
  3614. ref_frames_used[i] |= (1 << ref1);
  3615. if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
  3616. }
  3617. }
  3618. if (bsize >= BLOCK_8X8) {
  3619. this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
  3620. vp9_rd_cost_update(partition_mul, x->rddiv, &this_rdc);
  3621. }
  3622. if (this_rdc.rdcost < best_rdc.rdcost) {
  3623. MODE_INFO *mi = xd->mi[0];
  3624. best_rdc = this_rdc;
  3625. should_encode_sb = 1;
  3626. if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
  3627. if (cpi->sf.rd_ml_partition.search_early_termination) {
  3628. // Currently, the machine-learning based partition search early
  3629. // termination is only used while bsize is 16x16, 32x32 or 64x64,
  3630. // VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
  3631. if (!x->e_mbd.lossless &&
  3632. !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
  3633. ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
  3634. if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) {
  3635. do_split = 0;
  3636. do_rect = 0;
  3637. }
  3638. }
  3639. }
  3640. if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) {
  3641. const int use_ml_based_breakout =
  3642. cpi->sf.rd_ml_partition.search_breakout && cm->base_qindex >= 100;
  3643. if (use_ml_based_breakout) {
  3644. if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) {
  3645. do_split = 0;
  3646. do_rect = 0;
  3647. }
  3648. } else {
  3649. if (!cpi->sf.rd_ml_partition.search_early_termination) {
  3650. if ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
  3651. (best_rdc.dist < dist_breakout_thr &&
  3652. best_rdc.rate < rate_breakout_thr)) {
  3653. do_split = 0;
  3654. do_rect = 0;
  3655. }
  3656. }
  3657. }
  3658. }
  3659. #if CONFIG_FP_MB_STATS
  3660. // Check if every 16x16 first pass block statistics has zero
  3661. // motion and the corresponding first pass residue is small enough.
  3662. // If that is the case, check the difference variance between the
  3663. // current frame and the last frame. If the variance is small enough,
  3664. // stop further splitting in RD optimization
  3665. if (cpi->use_fp_mb_stats && do_split != 0 &&
  3666. cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
  3667. int mb_row = mi_row >> 1;
  3668. int mb_col = mi_col >> 1;
  3669. int mb_row_end =
  3670. VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
  3671. int mb_col_end =
  3672. VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
  3673. int r, c;
  3674. int skip = 1;
  3675. for (r = mb_row; r < mb_row_end; r++) {
  3676. for (c = mb_col; c < mb_col_end; c++) {
  3677. const int mb_index = r * cm->mb_cols + c;
  3678. if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
  3679. FPMB_MOTION_ZERO_MASK) ||
  3680. !(cpi->twopass.this_frame_mb_stats[mb_index] &
  3681. FPMB_ERROR_SMALL_MASK)) {
  3682. skip = 0;
  3683. break;
  3684. }
  3685. }
  3686. if (skip == 0) {
  3687. break;
  3688. }
  3689. }
  3690. if (skip) {
  3691. if (src_diff_var == UINT_MAX) {
  3692. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  3693. src_diff_var = get_sby_perpixel_diff_variance(
  3694. cpi, &x->plane[0].src, mi_row, mi_col, bsize);
  3695. }
  3696. if (src_diff_var < 8) {
  3697. do_split = 0;
  3698. do_rect = 0;
  3699. }
  3700. }
  3701. }
  3702. #endif
  3703. }
  3704. }
  3705. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  3706. } else {
  3707. vp9_zero(ctx->pred_mv);
  3708. ctx->mic.interp_filter = EIGHTTAP;
  3709. }
  3710. // store estimated motion vector
  3711. store_pred_mv(x, ctx);
  3712. // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an
  3713. // intra block and used for context purposes.
  3714. if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) {
  3715. pred_interp_filter = EIGHTTAP;
  3716. } else {
  3717. pred_interp_filter = ctx->mic.interp_filter;
  3718. }
  3719. // PARTITION_SPLIT
  3720. // TODO(jingning): use the motion vectors given by the above search as
  3721. // the starting point of motion search in the following partition type check.
  3722. pc_tree->split[0]->none.rdcost = 0;
  3723. pc_tree->split[1]->none.rdcost = 0;
  3724. pc_tree->split[2]->none.rdcost = 0;
  3725. pc_tree->split[3]->none.rdcost = 0;
  3726. if (do_split || must_split) {
  3727. subsize = get_subsize(bsize, PARTITION_SPLIT);
  3728. load_pred_mv(x, ctx);
  3729. if (bsize == BLOCK_8X8) {
  3730. i = 4;
  3731. if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
  3732. pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
  3733. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  3734. pc_tree->leaf_split[0], best_rdc.rate, best_rdc.dist);
  3735. if (sum_rdc.rate == INT_MAX) {
  3736. sum_rdc.rdcost = INT64_MAX;
  3737. } else {
  3738. if (cpi->sf.prune_ref_frame_for_rect_partitions) {
  3739. const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0];
  3740. const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1];
  3741. for (i = 0; i < 4; ++i) {
  3742. ref_frames_used[i] |= (1 << ref1);
  3743. if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
  3744. }
  3745. }
  3746. }
  3747. } else {
  3748. for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split);
  3749. ++i) {
  3750. const int x_idx = (i & 1) * mi_step;
  3751. const int y_idx = (i >> 1) * mi_step;
  3752. int found_best_rd = 0;
  3753. RD_COST best_rdc_split;
  3754. vp9_rd_cost_reset(&best_rdc_split);
  3755. if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
  3756. // A must split test here increases the number of sub
  3757. // partitions but hurts metrics results quite a bit,
  3758. // so this extra test is commented out pending
  3759. // further tests on whether it adds much in terms of
  3760. // visual quality.
  3761. // (must_split) ? best_rdc.rate
  3762. // : best_rdc.rate - sum_rdc.rate,
  3763. // (must_split) ? best_rdc.dist
  3764. // : best_rdc.dist - sum_rdc.dist,
  3765. best_rdc_split.rate = best_rdc.rate - sum_rdc.rate;
  3766. best_rdc_split.dist = best_rdc.dist - sum_rdc.dist;
  3767. }
  3768. if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
  3769. continue;
  3770. pc_tree->split[i]->index = i;
  3771. if (cpi->sf.prune_ref_frame_for_rect_partitions)
  3772. pc_tree->split[i]->none.rate = INT_MAX;
  3773. found_best_rd = rd_pick_partition(
  3774. cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
  3775. &this_rdc, best_rdc_split, pc_tree->split[i]);
  3776. if (found_best_rd == 0) {
  3777. sum_rdc.rdcost = INT64_MAX;
  3778. break;
  3779. } else {
  3780. if (cpi->sf.prune_ref_frame_for_rect_partitions &&
  3781. pc_tree->split[i]->none.rate != INT_MAX) {
  3782. const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0];
  3783. const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1];
  3784. ref_frames_used[i] |= (1 << ref1);
  3785. if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
  3786. }
  3787. sum_rdc.rate += this_rdc.rate;
  3788. sum_rdc.dist += this_rdc.dist;
  3789. vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
  3790. }
  3791. }
  3792. }
  3793. if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) {
  3794. sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
  3795. vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
  3796. if ((sum_rdc.rdcost < best_rdc.rdcost) ||
  3797. (must_split && (sum_rdc.dist < best_rdc.dist))) {
  3798. best_rdc = sum_rdc;
  3799. should_encode_sb = 1;
  3800. pc_tree->partitioning = PARTITION_SPLIT;
  3801. // Rate and distortion based partition search termination clause.
  3802. if (!cpi->sf.rd_ml_partition.search_early_termination &&
  3803. !x->e_mbd.lossless &&
  3804. ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
  3805. (best_rdc.dist < dist_breakout_thr &&
  3806. best_rdc.rate < rate_breakout_thr))) {
  3807. do_rect = 0;
  3808. }
  3809. }
  3810. } else {
  3811. // skip rectangular partition test when larger block size
  3812. // gives better rd cost
  3813. if (cpi->sf.less_rectangular_check &&
  3814. (bsize > cpi->sf.use_square_only_thresh_high ||
  3815. best_rdc.dist < dist_breakout_thr))
  3816. do_rect &= !partition_none_allowed;
  3817. }
  3818. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  3819. }
  3820. pc_tree->horizontal[0].skip_ref_frame_mask = 0;
  3821. pc_tree->horizontal[1].skip_ref_frame_mask = 0;
  3822. pc_tree->vertical[0].skip_ref_frame_mask = 0;
  3823. pc_tree->vertical[1].skip_ref_frame_mask = 0;
  3824. if (cpi->sf.prune_ref_frame_for_rect_partitions) {
  3825. uint8_t used_frames;
  3826. used_frames = ref_frames_used[0] | ref_frames_used[1];
  3827. if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
  3828. used_frames = ref_frames_used[2] | ref_frames_used[3];
  3829. if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
  3830. used_frames = ref_frames_used[0] | ref_frames_used[2];
  3831. if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
  3832. used_frames = ref_frames_used[1] | ref_frames_used[3];
  3833. if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
  3834. }
  3835. {
  3836. const int do_ml_rect_partition_pruning =
  3837. !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split &&
  3838. (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8;
  3839. if (do_ml_rect_partition_pruning) {
  3840. ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed,
  3841. &partition_vert_allowed, best_rdc.rdcost);
  3842. }
  3843. }
  3844. // PARTITION_HORZ
  3845. if (partition_horz_allowed &&
  3846. (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
  3847. const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ];
  3848. subsize = get_subsize(bsize, PARTITION_HORZ);
  3849. load_pred_mv(x, ctx);
  3850. if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
  3851. partition_none_allowed)
  3852. pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter;
  3853. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  3854. &pc_tree->horizontal[0], best_rdc.rate - part_mode_rate,
  3855. best_rdc.dist);
  3856. if (sum_rdc.rdcost < INT64_MAX) {
  3857. sum_rdc.rate += part_mode_rate;
  3858. vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
  3859. }
  3860. if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
  3861. bsize > BLOCK_8X8) {
  3862. PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
  3863. update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
  3864. encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
  3865. if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
  3866. partition_none_allowed)
  3867. pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter;
  3868. rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
  3869. subsize, &pc_tree->horizontal[1],
  3870. best_rdc.rate - sum_rdc.rate,
  3871. best_rdc.dist - sum_rdc.dist);
  3872. if (this_rdc.rate == INT_MAX) {
  3873. sum_rdc.rdcost = INT64_MAX;
  3874. } else {
  3875. sum_rdc.rate += this_rdc.rate;
  3876. sum_rdc.dist += this_rdc.dist;
  3877. vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
  3878. }
  3879. }
  3880. if (sum_rdc.rdcost < best_rdc.rdcost) {
  3881. best_rdc = sum_rdc;
  3882. should_encode_sb = 1;
  3883. pc_tree->partitioning = PARTITION_HORZ;
  3884. if (cpi->sf.less_rectangular_check &&
  3885. bsize > cpi->sf.use_square_only_thresh_high)
  3886. do_rect = 0;
  3887. }
  3888. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  3889. }
  3890. // PARTITION_VERT
  3891. if (partition_vert_allowed &&
  3892. (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
  3893. const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT];
  3894. subsize = get_subsize(bsize, PARTITION_VERT);
  3895. load_pred_mv(x, ctx);
  3896. if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
  3897. partition_none_allowed)
  3898. pc_tree->vertical[0].pred_interp_filter = pred_interp_filter;
  3899. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  3900. &pc_tree->vertical[0], best_rdc.rate - part_mode_rate,
  3901. best_rdc.dist);
  3902. if (sum_rdc.rdcost < INT64_MAX) {
  3903. sum_rdc.rate += part_mode_rate;
  3904. vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
  3905. }
  3906. if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
  3907. bsize > BLOCK_8X8) {
  3908. update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
  3909. encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
  3910. &pc_tree->vertical[0]);
  3911. if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
  3912. partition_none_allowed)
  3913. pc_tree->vertical[1].pred_interp_filter = pred_interp_filter;
  3914. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
  3915. subsize, &pc_tree->vertical[1],
  3916. best_rdc.rate - sum_rdc.rate,
  3917. best_rdc.dist - sum_rdc.dist);
  3918. if (this_rdc.rate == INT_MAX) {
  3919. sum_rdc.rdcost = INT64_MAX;
  3920. } else {
  3921. sum_rdc.rate += this_rdc.rate;
  3922. sum_rdc.dist += this_rdc.dist;
  3923. vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
  3924. }
  3925. }
  3926. if (sum_rdc.rdcost < best_rdc.rdcost) {
  3927. best_rdc = sum_rdc;
  3928. should_encode_sb = 1;
  3929. pc_tree->partitioning = PARTITION_VERT;
  3930. }
  3931. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  3932. }
  3933. *rd_cost = best_rdc;
  3934. if (should_encode_sb && pc_tree->index != 3) {
  3935. int output_enabled = (bsize == BLOCK_64X64);
  3936. encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
  3937. pc_tree);
  3938. }
  3939. if (bsize == BLOCK_64X64) {
  3940. assert(tp_orig < *tp);
  3941. assert(best_rdc.rate < INT_MAX);
  3942. assert(best_rdc.dist < INT64_MAX);
  3943. } else {
  3944. assert(tp_orig == *tp);
  3945. }
  3946. return should_encode_sb;
  3947. }
  3948. static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
  3949. TileDataEnc *tile_data, int mi_row,
  3950. TOKENEXTRA **tp) {
  3951. VP9_COMMON *const cm = &cpi->common;
  3952. TileInfo *const tile_info = &tile_data->tile_info;
  3953. MACROBLOCK *const x = &td->mb;
  3954. MACROBLOCKD *const xd = &x->e_mbd;
  3955. SPEED_FEATURES *const sf = &cpi->sf;
  3956. const int mi_col_start = tile_info->mi_col_start;
  3957. const int mi_col_end = tile_info->mi_col_end;
  3958. int mi_col;
  3959. const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
  3960. const int num_sb_cols =
  3961. get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
  3962. int sb_col_in_tile;
  3963. // Initialize the left context for the new SB row
  3964. memset(&xd->left_context, 0, sizeof(xd->left_context));
  3965. memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
  3966. // Code each SB in the row
  3967. for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
  3968. mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) {
  3969. const struct segmentation *const seg = &cm->seg;
  3970. int dummy_rate;
  3971. int64_t dummy_dist;
  3972. RD_COST dummy_rdc;
  3973. int i;
  3974. int seg_skip = 0;
  3975. int orig_rdmult = cpi->rd.RDMULT;
  3976. const int idx_str = cm->mi_stride * mi_row + mi_col;
  3977. MODE_INFO **mi = cm->mi_grid_visible + idx_str;
  3978. vp9_rd_cost_reset(&dummy_rdc);
  3979. (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
  3980. sb_col_in_tile);
  3981. if (sf->adaptive_pred_interp_filter) {
  3982. for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
  3983. for (i = 0; i < 64; ++i) {
  3984. td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
  3985. td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
  3986. td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
  3987. td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
  3988. }
  3989. }
  3990. for (i = 0; i < MAX_REF_FRAMES; ++i) {
  3991. x->pred_mv[i].row = INT16_MAX;
  3992. x->pred_mv[i].col = INT16_MAX;
  3993. }
  3994. td->pc_root->index = 0;
  3995. if (seg->enabled) {
  3996. const uint8_t *const map =
  3997. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  3998. int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
  3999. seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
  4000. }
  4001. x->source_variance = UINT_MAX;
  4002. x->cb_rdmult = orig_rdmult;
  4003. if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
  4004. const BLOCK_SIZE bsize =
  4005. seg_skip ? BLOCK_64X64 : sf->always_this_block_size;
  4006. set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
  4007. set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
  4008. rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
  4009. &dummy_rate, &dummy_dist, 1, td->pc_root);
  4010. } else if (cpi->partition_search_skippable_frame) {
  4011. BLOCK_SIZE bsize;
  4012. set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
  4013. bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
  4014. set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
  4015. rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
  4016. &dummy_rate, &dummy_dist, 1, td->pc_root);
  4017. } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
  4018. cm->frame_type != KEY_FRAME) {
  4019. choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
  4020. rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
  4021. &dummy_rate, &dummy_dist, 1, td->pc_root);
  4022. } else {
  4023. if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
  4024. int dr =
  4025. get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
  4026. x->cb_rdmult = dr;
  4027. }
  4028. if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ && cm->show_frame) {
  4029. x->segment_id = wiener_var_segment(cpi, BLOCK_64X64, mi_row, mi_col);
  4030. x->cb_rdmult = vp9_compute_rd_mult(
  4031. cpi, vp9_get_qindex(&cm->seg, x->segment_id, cm->base_qindex));
  4032. }
  4033. // If required set upper and lower partition size limits
  4034. if (sf->auto_min_max_partition_size) {
  4035. set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
  4036. rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
  4037. &x->min_partition_size, &x->max_partition_size);
  4038. }
  4039. td->pc_root->none.rdcost = 0;
  4040. rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
  4041. &dummy_rdc, dummy_rdc, td->pc_root);
  4042. }
  4043. (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
  4044. sb_col_in_tile, num_sb_cols);
  4045. }
  4046. }
  4047. #endif // !CONFIG_REALTIME_ONLY
  4048. static void init_encode_frame_mb_context(VP9_COMP *cpi) {
  4049. MACROBLOCK *const x = &cpi->td.mb;
  4050. VP9_COMMON *const cm = &cpi->common;
  4051. MACROBLOCKD *const xd = &x->e_mbd;
  4052. const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
  4053. // Copy data over into macro block data structures.
  4054. vp9_setup_src_planes(x, cpi->Source, 0, 0);
  4055. vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
  4056. // Note: this memset assumes above_context[0], [1] and [2]
  4057. // are allocated as part of the same buffer.
  4058. memset(xd->above_context[0], 0,
  4059. sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE);
  4060. memset(xd->above_seg_context, 0,
  4061. sizeof(*xd->above_seg_context) * aligned_mi_cols);
  4062. }
  4063. static int check_dual_ref_flags(VP9_COMP *cpi) {
  4064. const int ref_flags = cpi->ref_frame_flags;
  4065. if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
  4066. return 0;
  4067. } else {
  4068. return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) +
  4069. !!(ref_flags & VP9_ALT_FLAG)) >= 2;
  4070. }
  4071. }
  4072. static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
  4073. int mi_row, mi_col;
  4074. const int mis = cm->mi_stride;
  4075. MODE_INFO **mi_ptr = cm->mi_grid_visible;
  4076. for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
  4077. for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
  4078. if (mi_ptr[mi_col]->tx_size > max_tx_size)
  4079. mi_ptr[mi_col]->tx_size = max_tx_size;
  4080. }
  4081. }
  4082. }
  4083. static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
  4084. if (frame_is_intra_only(&cpi->common))
  4085. return INTRA_FRAME;
  4086. else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
  4087. return ALTREF_FRAME;
  4088. else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
  4089. return GOLDEN_FRAME;
  4090. else
  4091. return LAST_FRAME;
  4092. }
  4093. static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
  4094. if (xd->lossless) return ONLY_4X4;
  4095. if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode)
  4096. return ALLOW_16X16;
  4097. if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
  4098. return ALLOW_32X32;
  4099. else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
  4100. cpi->sf.tx_size_search_method == USE_TX_8X8)
  4101. return TX_MODE_SELECT;
  4102. else
  4103. return cpi->common.tx_mode;
  4104. }
  4105. static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x,
  4106. RD_COST *rd_cost, BLOCK_SIZE bsize,
  4107. PICK_MODE_CONTEXT *ctx) {
  4108. if (!cpi->sf.nonrd_keyframe && bsize < BLOCK_16X16)
  4109. vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
  4110. else
  4111. vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
  4112. }
  4113. static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x,
  4114. RD_COST *rd_cost, BLOCK_SIZE bsize,
  4115. PICK_MODE_CONTEXT *ctx,
  4116. TileDataEnc *tile_data, int mi_row,
  4117. int mi_col) {
  4118. if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
  4119. vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
  4120. } else {
  4121. if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF)
  4122. vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
  4123. else if (bsize >= BLOCK_8X8)
  4124. vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
  4125. ctx);
  4126. else
  4127. vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
  4128. }
  4129. }
  4130. static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x,
  4131. RD_COST *rd_cost, BLOCK_SIZE bsize,
  4132. PICK_MODE_CONTEXT *ctx,
  4133. TileDataEnc *tile_data, int mi_row,
  4134. int mi_col) {
  4135. if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
  4136. vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
  4137. } else {
  4138. vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
  4139. }
  4140. }
  4141. static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
  4142. MACROBLOCK *const x, int mi_row, int mi_col,
  4143. RD_COST *rd_cost, BLOCK_SIZE bsize,
  4144. PICK_MODE_CONTEXT *ctx) {
  4145. VP9_COMMON *const cm = &cpi->common;
  4146. TileInfo *const tile_info = &tile_data->tile_info;
  4147. MACROBLOCKD *const xd = &x->e_mbd;
  4148. MODE_INFO *mi;
  4149. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  4150. BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size
  4151. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
  4152. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
  4153. int plane;
  4154. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  4155. set_segment_index(cpi, x, mi_row, mi_col, bsize, 0);
  4156. mi = xd->mi[0];
  4157. mi->sb_type = bsize;
  4158. for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
  4159. struct macroblockd_plane *pd = &xd->plane[plane];
  4160. memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
  4161. (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
  4162. memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
  4163. (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
  4164. }
  4165. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
  4166. if (cyclic_refresh_segment_id_boosted(mi->segment_id))
  4167. x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
  4168. if (frame_is_intra_only(cm))
  4169. hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
  4170. else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)
  4171. hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
  4172. mi_col);
  4173. else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
  4174. set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
  4175. else if (bsize >= BLOCK_8X8) {
  4176. if (cpi->rc.hybrid_intra_scene_change)
  4177. hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
  4178. mi_col);
  4179. else
  4180. vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
  4181. ctx);
  4182. } else {
  4183. vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
  4184. }
  4185. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
  4186. for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
  4187. struct macroblockd_plane *pd = &xd->plane[plane];
  4188. memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
  4189. (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
  4190. memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
  4191. (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
  4192. }
  4193. if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost);
  4194. ctx->rate = rd_cost->rate;
  4195. ctx->dist = rd_cost->dist;
  4196. }
  4197. static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
  4198. int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
  4199. MACROBLOCKD *xd = &x->e_mbd;
  4200. int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  4201. PARTITION_TYPE partition = pc_tree->partitioning;
  4202. BLOCK_SIZE subsize = get_subsize(bsize, partition);
  4203. assert(bsize >= BLOCK_8X8);
  4204. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  4205. switch (partition) {
  4206. case PARTITION_NONE:
  4207. set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
  4208. *(xd->mi[0]) = pc_tree->none.mic;
  4209. *(x->mbmi_ext) = pc_tree->none.mbmi_ext;
  4210. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
  4211. break;
  4212. case PARTITION_VERT:
  4213. set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
  4214. *(xd->mi[0]) = pc_tree->vertical[0].mic;
  4215. *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext;
  4216. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
  4217. if (mi_col + hbs < cm->mi_cols) {
  4218. set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs);
  4219. *(xd->mi[0]) = pc_tree->vertical[1].mic;
  4220. *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext;
  4221. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize);
  4222. }
  4223. break;
  4224. case PARTITION_HORZ:
  4225. set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
  4226. *(xd->mi[0]) = pc_tree->horizontal[0].mic;
  4227. *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext;
  4228. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
  4229. if (mi_row + hbs < cm->mi_rows) {
  4230. set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col);
  4231. *(xd->mi[0]) = pc_tree->horizontal[1].mic;
  4232. *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext;
  4233. duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize);
  4234. }
  4235. break;
  4236. case PARTITION_SPLIT: {
  4237. fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]);
  4238. fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
  4239. pc_tree->split[1]);
  4240. fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
  4241. pc_tree->split[2]);
  4242. fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
  4243. pc_tree->split[3]);
  4244. break;
  4245. }
  4246. default: break;
  4247. }
  4248. }
  4249. // Reset the prediction pixel ready flag recursively.
  4250. static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
  4251. pc_tree->none.pred_pixel_ready = 0;
  4252. pc_tree->horizontal[0].pred_pixel_ready = 0;
  4253. pc_tree->horizontal[1].pred_pixel_ready = 0;
  4254. pc_tree->vertical[0].pred_pixel_ready = 0;
  4255. pc_tree->vertical[1].pred_pixel_ready = 0;
  4256. if (bsize > BLOCK_8X8) {
  4257. BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
  4258. int i;
  4259. for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize);
  4260. }
  4261. }
  4262. #define FEATURES 6
  4263. #define LABELS 2
  4264. static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
  4265. BLOCK_SIZE bsize, int mi_row,
  4266. int mi_col) {
  4267. VP9_COMMON *const cm = &cpi->common;
  4268. const NN_CONFIG *nn_config = NULL;
  4269. switch (bsize) {
  4270. case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break;
  4271. case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;
  4272. case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;
  4273. case BLOCK_8X8: break;
  4274. default: assert(0 && "Unexpected block size."); return -1;
  4275. }
  4276. if (!nn_config) return -1;
  4277. vpx_clear_system_state();
  4278. {
  4279. const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f;
  4280. float features[FEATURES] = { 0.0f };
  4281. const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
  4282. int feature_idx = 0;
  4283. float score[LABELS];
  4284. features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
  4285. vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
  4286. {
  4287. const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
  4288. const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
  4289. const int sb_offset_row = 8 * (mi_row & 7);
  4290. const int sb_offset_col = 8 * (mi_col & 7);
  4291. const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
  4292. const uint8_t *src = x->plane[0].src.buf;
  4293. const int src_stride = x->plane[0].src.stride;
  4294. const int pred_stride = 64;
  4295. unsigned int sse;
  4296. int i;
  4297. // Variance of whole block.
  4298. const unsigned int var =
  4299. cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
  4300. const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
  4301. features[feature_idx++] = logf((float)var + 1.0f);
  4302. for (i = 0; i < 4; ++i) {
  4303. const int x_idx = (i & 1) * bs / 2;
  4304. const int y_idx = (i >> 1) * bs / 2;
  4305. const int src_offset = y_idx * src_stride + x_idx;
  4306. const int pred_offset = y_idx * pred_stride + x_idx;
  4307. // Variance of quarter block.
  4308. const unsigned int sub_var =
  4309. cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
  4310. pred + pred_offset, pred_stride, &sse);
  4311. const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
  4312. features[feature_idx++] = var_ratio;
  4313. }
  4314. }
  4315. assert(feature_idx == FEATURES);
  4316. nn_predict(features, nn_config, score);
  4317. if (score[0] > thresh) return PARTITION_SPLIT;
  4318. if (score[0] < -thresh) return PARTITION_NONE;
  4319. return -1;
  4320. }
  4321. }
  4322. #undef FEATURES
  4323. #undef LABELS
  4324. static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
  4325. TileDataEnc *tile_data, TOKENEXTRA **tp,
  4326. int mi_row, int mi_col, BLOCK_SIZE bsize,
  4327. RD_COST *rd_cost, int do_recon,
  4328. int64_t best_rd, PC_TREE *pc_tree) {
  4329. const SPEED_FEATURES *const sf = &cpi->sf;
  4330. VP9_COMMON *const cm = &cpi->common;
  4331. TileInfo *const tile_info = &tile_data->tile_info;
  4332. MACROBLOCK *const x = &td->mb;
  4333. MACROBLOCKD *const xd = &x->e_mbd;
  4334. const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
  4335. TOKENEXTRA *tp_orig = *tp;
  4336. PICK_MODE_CONTEXT *ctx = &pc_tree->none;
  4337. int i;
  4338. BLOCK_SIZE subsize = bsize;
  4339. RD_COST this_rdc, sum_rdc, best_rdc;
  4340. int do_split = bsize >= BLOCK_8X8;
  4341. int do_rect = 1;
  4342. // Override skipping rectangular partition operations for edge blocks
  4343. const int force_horz_split = (mi_row + ms >= cm->mi_rows);
  4344. const int force_vert_split = (mi_col + ms >= cm->mi_cols);
  4345. const int xss = x->e_mbd.plane[1].subsampling_x;
  4346. const int yss = x->e_mbd.plane[1].subsampling_y;
  4347. int partition_none_allowed = !force_horz_split && !force_vert_split;
  4348. int partition_horz_allowed =
  4349. !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
  4350. int partition_vert_allowed =
  4351. !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
  4352. const int use_ml_based_partitioning =
  4353. sf->partition_search_type == ML_BASED_PARTITION;
  4354. (void)*tp_orig;
  4355. // Avoid checking for rectangular partitions for speed >= 6.
  4356. if (cpi->oxcf.speed >= 6) do_rect = 0;
  4357. assert(num_8x8_blocks_wide_lookup[bsize] ==
  4358. num_8x8_blocks_high_lookup[bsize]);
  4359. vp9_rd_cost_init(&sum_rdc);
  4360. vp9_rd_cost_reset(&best_rdc);
  4361. best_rdc.rdcost = best_rd;
  4362. // Determine partition types in search according to the speed features.
  4363. // The threshold set here has to be of square block size.
  4364. if (sf->auto_min_max_partition_size) {
  4365. partition_none_allowed &=
  4366. (bsize <= x->max_partition_size && bsize >= x->min_partition_size);
  4367. partition_horz_allowed &=
  4368. ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
  4369. force_horz_split);
  4370. partition_vert_allowed &=
  4371. ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
  4372. force_vert_split);
  4373. do_split &= bsize > x->min_partition_size;
  4374. }
  4375. if (sf->use_square_partition_only) {
  4376. partition_horz_allowed &= force_horz_split;
  4377. partition_vert_allowed &= force_vert_split;
  4378. }
  4379. if (use_ml_based_partitioning) {
  4380. if (partition_none_allowed || do_split) do_rect = 0;
  4381. if (partition_none_allowed && do_split) {
  4382. const int ml_predicted_partition =
  4383. ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);
  4384. if (ml_predicted_partition == PARTITION_NONE) do_split = 0;
  4385. if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0;
  4386. }
  4387. }
  4388. if (!partition_none_allowed && !do_split) do_rect = 1;
  4389. ctx->pred_pixel_ready =
  4390. !(partition_vert_allowed || partition_horz_allowed || do_split);
  4391. // PARTITION_NONE
  4392. if (partition_none_allowed) {
  4393. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize,
  4394. ctx);
  4395. ctx->mic = *xd->mi[0];
  4396. ctx->mbmi_ext = *x->mbmi_ext;
  4397. ctx->skip_txfm[0] = x->skip_txfm[0];
  4398. ctx->skip = x->skip;
  4399. if (this_rdc.rate != INT_MAX) {
  4400. const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  4401. this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
  4402. this_rdc.rdcost =
  4403. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  4404. if (this_rdc.rdcost < best_rdc.rdcost) {
  4405. best_rdc = this_rdc;
  4406. if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
  4407. if (!use_ml_based_partitioning) {
  4408. int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
  4409. int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
  4410. dist_breakout_thr >>=
  4411. 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  4412. rate_breakout_thr *= num_pels_log2_lookup[bsize];
  4413. if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
  4414. this_rdc.dist < dist_breakout_thr) {
  4415. do_split = 0;
  4416. do_rect = 0;
  4417. }
  4418. }
  4419. }
  4420. }
  4421. }
  4422. // store estimated motion vector
  4423. store_pred_mv(x, ctx);
  4424. // PARTITION_SPLIT
  4425. if (do_split) {
  4426. int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  4427. sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
  4428. sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  4429. subsize = get_subsize(bsize, PARTITION_SPLIT);
  4430. for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
  4431. const int x_idx = (i & 1) * ms;
  4432. const int y_idx = (i >> 1) * ms;
  4433. if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
  4434. continue;
  4435. load_pred_mv(x, ctx);
  4436. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
  4437. mi_col + x_idx, subsize, &this_rdc, 0,
  4438. best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
  4439. if (this_rdc.rate == INT_MAX) {
  4440. vp9_rd_cost_reset(&sum_rdc);
  4441. } else {
  4442. sum_rdc.rate += this_rdc.rate;
  4443. sum_rdc.dist += this_rdc.dist;
  4444. sum_rdc.rdcost += this_rdc.rdcost;
  4445. }
  4446. }
  4447. if (sum_rdc.rdcost < best_rdc.rdcost) {
  4448. best_rdc = sum_rdc;
  4449. pc_tree->partitioning = PARTITION_SPLIT;
  4450. } else {
  4451. // skip rectangular partition test when larger block size
  4452. // gives better rd cost
  4453. if (sf->less_rectangular_check) do_rect &= !partition_none_allowed;
  4454. }
  4455. }
  4456. // PARTITION_HORZ
  4457. if (partition_horz_allowed && do_rect) {
  4458. subsize = get_subsize(bsize, PARTITION_HORZ);
  4459. load_pred_mv(x, ctx);
  4460. pc_tree->horizontal[0].pred_pixel_ready = 1;
  4461. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  4462. &pc_tree->horizontal[0]);
  4463. pc_tree->horizontal[0].mic = *xd->mi[0];
  4464. pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
  4465. pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
  4466. pc_tree->horizontal[0].skip = x->skip;
  4467. if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
  4468. load_pred_mv(x, ctx);
  4469. pc_tree->horizontal[1].pred_pixel_ready = 1;
  4470. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc,
  4471. subsize, &pc_tree->horizontal[1]);
  4472. pc_tree->horizontal[1].mic = *xd->mi[0];
  4473. pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
  4474. pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
  4475. pc_tree->horizontal[1].skip = x->skip;
  4476. if (this_rdc.rate == INT_MAX) {
  4477. vp9_rd_cost_reset(&sum_rdc);
  4478. } else {
  4479. int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  4480. this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
  4481. sum_rdc.rate += this_rdc.rate;
  4482. sum_rdc.dist += this_rdc.dist;
  4483. sum_rdc.rdcost =
  4484. RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  4485. }
  4486. }
  4487. if (sum_rdc.rdcost < best_rdc.rdcost) {
  4488. best_rdc = sum_rdc;
  4489. pc_tree->partitioning = PARTITION_HORZ;
  4490. } else {
  4491. pred_pixel_ready_reset(pc_tree, bsize);
  4492. }
  4493. }
  4494. // PARTITION_VERT
  4495. if (partition_vert_allowed && do_rect) {
  4496. subsize = get_subsize(bsize, PARTITION_VERT);
  4497. load_pred_mv(x, ctx);
  4498. pc_tree->vertical[0].pred_pixel_ready = 1;
  4499. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  4500. &pc_tree->vertical[0]);
  4501. pc_tree->vertical[0].mic = *xd->mi[0];
  4502. pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
  4503. pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
  4504. pc_tree->vertical[0].skip = x->skip;
  4505. if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
  4506. load_pred_mv(x, ctx);
  4507. pc_tree->vertical[1].pred_pixel_ready = 1;
  4508. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc,
  4509. subsize, &pc_tree->vertical[1]);
  4510. pc_tree->vertical[1].mic = *xd->mi[0];
  4511. pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
  4512. pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
  4513. pc_tree->vertical[1].skip = x->skip;
  4514. if (this_rdc.rate == INT_MAX) {
  4515. vp9_rd_cost_reset(&sum_rdc);
  4516. } else {
  4517. int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  4518. sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
  4519. sum_rdc.rate += this_rdc.rate;
  4520. sum_rdc.dist += this_rdc.dist;
  4521. sum_rdc.rdcost =
  4522. RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  4523. }
  4524. }
  4525. if (sum_rdc.rdcost < best_rdc.rdcost) {
  4526. best_rdc = sum_rdc;
  4527. pc_tree->partitioning = PARTITION_VERT;
  4528. } else {
  4529. pred_pixel_ready_reset(pc_tree, bsize);
  4530. }
  4531. }
  4532. *rd_cost = best_rdc;
  4533. if (best_rdc.rate == INT_MAX) {
  4534. vp9_rd_cost_reset(rd_cost);
  4535. return;
  4536. }
  4537. // update mode info array
  4538. fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree);
  4539. if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
  4540. int output_enabled = (bsize == BLOCK_64X64);
  4541. encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
  4542. pc_tree);
  4543. }
  4544. if (bsize == BLOCK_64X64 && do_recon) {
  4545. assert(tp_orig < *tp);
  4546. assert(best_rdc.rate < INT_MAX);
  4547. assert(best_rdc.dist < INT64_MAX);
  4548. } else {
  4549. assert(tp_orig == *tp);
  4550. }
  4551. }
  4552. static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
  4553. TileDataEnc *tile_data, MODE_INFO **mi,
  4554. TOKENEXTRA **tp, int mi_row, int mi_col,
  4555. BLOCK_SIZE bsize, int output_enabled,
  4556. RD_COST *rd_cost, PC_TREE *pc_tree) {
  4557. VP9_COMMON *const cm = &cpi->common;
  4558. TileInfo *const tile_info = &tile_data->tile_info;
  4559. MACROBLOCK *const x = &td->mb;
  4560. MACROBLOCKD *const xd = &x->e_mbd;
  4561. const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  4562. const int mis = cm->mi_stride;
  4563. PARTITION_TYPE partition;
  4564. BLOCK_SIZE subsize;
  4565. RD_COST this_rdc;
  4566. BLOCK_SIZE subsize_ref =
  4567. (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16;
  4568. vp9_rd_cost_reset(&this_rdc);
  4569. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  4570. subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
  4571. partition = partition_lookup[bsl][subsize];
  4572. if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) {
  4573. x->max_partition_size = BLOCK_32X32;
  4574. x->min_partition_size = BLOCK_16X16;
  4575. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
  4576. 0, INT64_MAX, pc_tree);
  4577. } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
  4578. subsize >= subsize_ref) {
  4579. x->max_partition_size = BLOCK_32X32;
  4580. x->min_partition_size = BLOCK_8X8;
  4581. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
  4582. 0, INT64_MAX, pc_tree);
  4583. } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
  4584. x->max_partition_size = BLOCK_16X16;
  4585. x->min_partition_size = BLOCK_8X8;
  4586. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
  4587. 0, INT64_MAX, pc_tree);
  4588. } else {
  4589. switch (partition) {
  4590. case PARTITION_NONE:
  4591. pc_tree->none.pred_pixel_ready = 1;
  4592. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
  4593. &pc_tree->none);
  4594. pc_tree->none.mic = *xd->mi[0];
  4595. pc_tree->none.mbmi_ext = *x->mbmi_ext;
  4596. pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
  4597. pc_tree->none.skip = x->skip;
  4598. break;
  4599. case PARTITION_VERT:
  4600. pc_tree->vertical[0].pred_pixel_ready = 1;
  4601. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
  4602. &pc_tree->vertical[0]);
  4603. pc_tree->vertical[0].mic = *xd->mi[0];
  4604. pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
  4605. pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
  4606. pc_tree->vertical[0].skip = x->skip;
  4607. if (mi_col + hbs < cm->mi_cols) {
  4608. pc_tree->vertical[1].pred_pixel_ready = 1;
  4609. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
  4610. &this_rdc, subsize, &pc_tree->vertical[1]);
  4611. pc_tree->vertical[1].mic = *xd->mi[0];
  4612. pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
  4613. pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
  4614. pc_tree->vertical[1].skip = x->skip;
  4615. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  4616. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  4617. rd_cost->rate += this_rdc.rate;
  4618. rd_cost->dist += this_rdc.dist;
  4619. }
  4620. }
  4621. break;
  4622. case PARTITION_HORZ:
  4623. pc_tree->horizontal[0].pred_pixel_ready = 1;
  4624. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
  4625. &pc_tree->horizontal[0]);
  4626. pc_tree->horizontal[0].mic = *xd->mi[0];
  4627. pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
  4628. pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
  4629. pc_tree->horizontal[0].skip = x->skip;
  4630. if (mi_row + hbs < cm->mi_rows) {
  4631. pc_tree->horizontal[1].pred_pixel_ready = 1;
  4632. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
  4633. &this_rdc, subsize, &pc_tree->horizontal[1]);
  4634. pc_tree->horizontal[1].mic = *xd->mi[0];
  4635. pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
  4636. pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
  4637. pc_tree->horizontal[1].skip = x->skip;
  4638. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  4639. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  4640. rd_cost->rate += this_rdc.rate;
  4641. rd_cost->dist += this_rdc.dist;
  4642. }
  4643. }
  4644. break;
  4645. default:
  4646. assert(partition == PARTITION_SPLIT);
  4647. subsize = get_subsize(bsize, PARTITION_SPLIT);
  4648. nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  4649. subsize, output_enabled, rd_cost,
  4650. pc_tree->split[0]);
  4651. nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
  4652. mi_col + hbs, subsize, output_enabled, &this_rdc,
  4653. pc_tree->split[1]);
  4654. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  4655. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  4656. rd_cost->rate += this_rdc.rate;
  4657. rd_cost->dist += this_rdc.dist;
  4658. }
  4659. nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp,
  4660. mi_row + hbs, mi_col, subsize, output_enabled,
  4661. &this_rdc, pc_tree->split[2]);
  4662. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  4663. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  4664. rd_cost->rate += this_rdc.rate;
  4665. rd_cost->dist += this_rdc.dist;
  4666. }
  4667. nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
  4668. mi_row + hbs, mi_col + hbs, subsize,
  4669. output_enabled, &this_rdc, pc_tree->split[3]);
  4670. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  4671. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  4672. rd_cost->rate += this_rdc.rate;
  4673. rd_cost->dist += this_rdc.dist;
  4674. }
  4675. break;
  4676. }
  4677. }
  4678. if (bsize == BLOCK_64X64 && output_enabled)
  4679. encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree);
  4680. }
  4681. static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
  4682. TileDataEnc *tile_data, MODE_INFO **mi,
  4683. TOKENEXTRA **tp, int mi_row, int mi_col,
  4684. BLOCK_SIZE bsize, int output_enabled,
  4685. RD_COST *dummy_cost, PC_TREE *pc_tree) {
  4686. VP9_COMMON *const cm = &cpi->common;
  4687. TileInfo *tile_info = &tile_data->tile_info;
  4688. MACROBLOCK *const x = &td->mb;
  4689. MACROBLOCKD *const xd = &x->e_mbd;
  4690. const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  4691. const int mis = cm->mi_stride;
  4692. PARTITION_TYPE partition;
  4693. BLOCK_SIZE subsize;
  4694. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  4695. subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
  4696. partition = partition_lookup[bsl][subsize];
  4697. if (output_enabled && bsize != BLOCK_4X4) {
  4698. int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
  4699. td->counts->partition[ctx][partition]++;
  4700. }
  4701. switch (partition) {
  4702. case PARTITION_NONE:
  4703. pc_tree->none.pred_pixel_ready = 1;
  4704. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
  4705. subsize, &pc_tree->none);
  4706. pc_tree->none.mic = *xd->mi[0];
  4707. pc_tree->none.mbmi_ext = *x->mbmi_ext;
  4708. pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
  4709. pc_tree->none.skip = x->skip;
  4710. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
  4711. subsize, &pc_tree->none);
  4712. break;
  4713. case PARTITION_VERT:
  4714. pc_tree->vertical[0].pred_pixel_ready = 1;
  4715. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
  4716. subsize, &pc_tree->vertical[0]);
  4717. pc_tree->vertical[0].mic = *xd->mi[0];
  4718. pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
  4719. pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
  4720. pc_tree->vertical[0].skip = x->skip;
  4721. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
  4722. subsize, &pc_tree->vertical[0]);
  4723. if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
  4724. pc_tree->vertical[1].pred_pixel_ready = 1;
  4725. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost,
  4726. subsize, &pc_tree->vertical[1]);
  4727. pc_tree->vertical[1].mic = *xd->mi[0];
  4728. pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
  4729. pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
  4730. pc_tree->vertical[1].skip = x->skip;
  4731. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs,
  4732. output_enabled, subsize, &pc_tree->vertical[1]);
  4733. }
  4734. break;
  4735. case PARTITION_HORZ:
  4736. pc_tree->horizontal[0].pred_pixel_ready = 1;
  4737. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
  4738. subsize, &pc_tree->horizontal[0]);
  4739. pc_tree->horizontal[0].mic = *xd->mi[0];
  4740. pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
  4741. pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
  4742. pc_tree->horizontal[0].skip = x->skip;
  4743. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
  4744. subsize, &pc_tree->horizontal[0]);
  4745. if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
  4746. pc_tree->horizontal[1].pred_pixel_ready = 1;
  4747. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost,
  4748. subsize, &pc_tree->horizontal[1]);
  4749. pc_tree->horizontal[1].mic = *xd->mi[0];
  4750. pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
  4751. pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
  4752. pc_tree->horizontal[1].skip = x->skip;
  4753. encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col,
  4754. output_enabled, subsize, &pc_tree->horizontal[1]);
  4755. }
  4756. break;
  4757. default:
  4758. assert(partition == PARTITION_SPLIT);
  4759. subsize = get_subsize(bsize, PARTITION_SPLIT);
  4760. if (bsize == BLOCK_8X8) {
  4761. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
  4762. subsize, pc_tree->leaf_split[0]);
  4763. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
  4764. subsize, pc_tree->leaf_split[0]);
  4765. } else {
  4766. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize,
  4767. output_enabled, dummy_cost, pc_tree->split[0]);
  4768. nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
  4769. mi_col + hbs, subsize, output_enabled, dummy_cost,
  4770. pc_tree->split[1]);
  4771. nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp,
  4772. mi_row + hbs, mi_col, subsize, output_enabled,
  4773. dummy_cost, pc_tree->split[2]);
  4774. nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
  4775. mi_row + hbs, mi_col + hbs, subsize, output_enabled,
  4776. dummy_cost, pc_tree->split[3]);
  4777. }
  4778. break;
  4779. }
  4780. if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
  4781. update_partition_context(xd, mi_row, mi_col, subsize, bsize);
  4782. }
  4783. // Get a prediction(stored in x->est_pred) for the whole 64x64 superblock.
  4784. static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile,
  4785. MACROBLOCK *x, int mi_row, int mi_col) {
  4786. VP9_COMMON *const cm = &cpi->common;
  4787. const int is_key_frame = frame_is_intra_only(cm);
  4788. MACROBLOCKD *xd = &x->e_mbd;
  4789. set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
  4790. if (!is_key_frame) {
  4791. MODE_INFO *mi = xd->mi[0];
  4792. YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
  4793. const YV12_BUFFER_CONFIG *yv12_g = NULL;
  4794. const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
  4795. (mi_row + 4 < cm->mi_rows);
  4796. unsigned int y_sad_g, y_sad_thr;
  4797. unsigned int y_sad = UINT_MAX;
  4798. assert(yv12 != NULL);
  4799. if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
  4800. cpi->svc.use_gf_temporal_ref_current_layer) {
  4801. // For now, GOLDEN will not be used for non-zero spatial layers, since
  4802. // it may not be a temporal reference.
  4803. yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
  4804. }
  4805. // Only compute y_sad_g (sad for golden reference) for speed < 8.
  4806. if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
  4807. (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
  4808. vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
  4809. &cm->frame_refs[GOLDEN_FRAME - 1].sf);
  4810. y_sad_g = cpi->fn_ptr[bsize].sdf(
  4811. x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
  4812. xd->plane[0].pre[0].stride);
  4813. } else {
  4814. y_sad_g = UINT_MAX;
  4815. }
  4816. if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
  4817. cpi->rc.is_src_frame_alt_ref) {
  4818. yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
  4819. vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
  4820. &cm->frame_refs[ALTREF_FRAME - 1].sf);
  4821. mi->ref_frame[0] = ALTREF_FRAME;
  4822. y_sad_g = UINT_MAX;
  4823. } else {
  4824. vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
  4825. &cm->frame_refs[LAST_FRAME - 1].sf);
  4826. mi->ref_frame[0] = LAST_FRAME;
  4827. }
  4828. mi->ref_frame[1] = NONE;
  4829. mi->sb_type = BLOCK_64X64;
  4830. mi->mv[0].as_int = 0;
  4831. mi->interp_filter = BILINEAR;
  4832. {
  4833. const MV dummy_mv = { 0, 0 };
  4834. y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
  4835. &dummy_mv);
  4836. x->sb_use_mv_part = 1;
  4837. x->sb_mvcol_part = mi->mv[0].as_mv.col;
  4838. x->sb_mvrow_part = mi->mv[0].as_mv.row;
  4839. }
  4840. // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
  4841. // are close if short_circuit_low_temp_var is on.
  4842. y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
  4843. if (y_sad_g < y_sad_thr) {
  4844. vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
  4845. &cm->frame_refs[GOLDEN_FRAME - 1].sf);
  4846. mi->ref_frame[0] = GOLDEN_FRAME;
  4847. mi->mv[0].as_int = 0;
  4848. } else {
  4849. x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
  4850. }
  4851. set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
  4852. xd->plane[0].dst.buf = x->est_pred;
  4853. xd->plane[0].dst.stride = 64;
  4854. vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
  4855. } else {
  4856. #if CONFIG_VP9_HIGHBITDEPTH
  4857. switch (xd->bd) {
  4858. case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
  4859. case 10:
  4860. memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
  4861. break;
  4862. case 12:
  4863. memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
  4864. break;
  4865. }
  4866. #else
  4867. memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
  4868. #endif // CONFIG_VP9_HIGHBITDEPTH
  4869. }
  4870. }
  4871. static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
  4872. TileDataEnc *tile_data, int mi_row,
  4873. TOKENEXTRA **tp) {
  4874. SPEED_FEATURES *const sf = &cpi->sf;
  4875. VP9_COMMON *const cm = &cpi->common;
  4876. TileInfo *const tile_info = &tile_data->tile_info;
  4877. MACROBLOCK *const x = &td->mb;
  4878. MACROBLOCKD *const xd = &x->e_mbd;
  4879. const int mi_col_start = tile_info->mi_col_start;
  4880. const int mi_col_end = tile_info->mi_col_end;
  4881. int mi_col;
  4882. const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
  4883. const int num_sb_cols =
  4884. get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
  4885. int sb_col_in_tile;
  4886. // Initialize the left context for the new SB row
  4887. memset(&xd->left_context, 0, sizeof(xd->left_context));
  4888. memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
  4889. // Code each SB in the row
  4890. for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
  4891. mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) {
  4892. const struct segmentation *const seg = &cm->seg;
  4893. RD_COST dummy_rdc;
  4894. const int idx_str = cm->mi_stride * mi_row + mi_col;
  4895. MODE_INFO **mi = cm->mi_grid_visible + idx_str;
  4896. PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
  4897. BLOCK_SIZE bsize = BLOCK_64X64;
  4898. int seg_skip = 0;
  4899. int i;
  4900. (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
  4901. sb_col_in_tile);
  4902. if (cpi->use_skin_detection) {
  4903. vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col);
  4904. }
  4905. x->source_variance = UINT_MAX;
  4906. for (i = 0; i < MAX_REF_FRAMES; ++i) {
  4907. x->pred_mv[i].row = INT16_MAX;
  4908. x->pred_mv[i].col = INT16_MAX;
  4909. }
  4910. vp9_rd_cost_init(&dummy_rdc);
  4911. x->color_sensitivity[0] = 0;
  4912. x->color_sensitivity[1] = 0;
  4913. x->sb_is_skin = 0;
  4914. x->skip_low_source_sad = 0;
  4915. x->lowvar_highsumdiff = 0;
  4916. x->content_state_sb = 0;
  4917. x->zero_temp_sad_source = 0;
  4918. x->sb_use_mv_part = 0;
  4919. x->sb_mvcol_part = 0;
  4920. x->sb_mvrow_part = 0;
  4921. x->sb_pickmode_part = 0;
  4922. x->arf_frame_usage = 0;
  4923. x->lastgolden_frame_usage = 0;
  4924. if (seg->enabled) {
  4925. const uint8_t *const map =
  4926. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  4927. int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
  4928. seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
  4929. if (seg_skip) {
  4930. partition_search_type = FIXED_PARTITION;
  4931. }
  4932. }
  4933. if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) {
  4934. int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3);
  4935. int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
  4936. int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2);
  4937. if (sf->adapt_partition_source_sad &&
  4938. (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref &&
  4939. source_sad > sf->adapt_partition_thresh &&
  4940. (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)))
  4941. partition_search_type = REFERENCE_PARTITION;
  4942. }
  4943. // Set the partition type of the 64X64 block
  4944. switch (partition_search_type) {
  4945. case VAR_BASED_PARTITION:
  4946. // TODO(jingning, marpan): The mode decision and encoding process
  4947. // support both intra and inter sub8x8 block coding for RTC mode.
  4948. // Tune the thresholds accordingly to use sub8x8 block coding for
  4949. // coding performance improvement.
  4950. choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
  4951. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  4952. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  4953. break;
  4954. case ML_BASED_PARTITION:
  4955. get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
  4956. x->max_partition_size = BLOCK_64X64;
  4957. x->min_partition_size = BLOCK_8X8;
  4958. x->sb_pickmode_part = 1;
  4959. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
  4960. BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
  4961. td->pc_root);
  4962. break;
  4963. case SOURCE_VAR_BASED_PARTITION:
  4964. set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
  4965. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  4966. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  4967. break;
  4968. case FIXED_PARTITION:
  4969. if (!seg_skip) bsize = sf->always_this_block_size;
  4970. set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
  4971. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  4972. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  4973. break;
  4974. default:
  4975. assert(partition_search_type == REFERENCE_PARTITION);
  4976. x->sb_pickmode_part = 1;
  4977. set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
  4978. // Use nonrd_pick_partition on scene-cut for VBR mode.
  4979. // nonrd_pick_partition does not support 4x4 partition, so avoid it
  4980. // on key frame for now.
  4981. if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
  4982. cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) &&
  4983. (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
  4984. // Use lower max_partition_size for low resoultions.
  4985. if (cm->width <= 352 && cm->height <= 288)
  4986. x->max_partition_size = BLOCK_32X32;
  4987. else
  4988. x->max_partition_size = BLOCK_64X64;
  4989. x->min_partition_size = BLOCK_8X8;
  4990. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
  4991. BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
  4992. td->pc_root);
  4993. } else {
  4994. choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
  4995. // TODO(marpan): Seems like nonrd_select_partition does not support
  4996. // 4x4 partition. Since 4x4 is used on key frame, use this switch
  4997. // for now.
  4998. if (frame_is_intra_only(cm))
  4999. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  5000. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  5001. else
  5002. nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  5003. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  5004. }
  5005. break;
  5006. }
  5007. // Update ref_frame usage for inter frame if this group is ARF group.
  5008. if (!cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame &&
  5009. !cpi->refresh_alt_ref_frame && cpi->rc.alt_ref_gf_group &&
  5010. cpi->sf.use_altref_onepass) {
  5011. int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
  5012. if (cpi->count_arf_frame_usage != NULL)
  5013. cpi->count_arf_frame_usage[sboffset] = x->arf_frame_usage;
  5014. if (cpi->count_lastgolden_frame_usage != NULL)
  5015. cpi->count_lastgolden_frame_usage[sboffset] = x->lastgolden_frame_usage;
  5016. }
  5017. (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
  5018. sb_col_in_tile, num_sb_cols);
  5019. }
  5020. }
  5021. // end RTC play code
  5022. static INLINE uint32_t variance(const diff *const d) {
  5023. return d->sse - (uint32_t)(((int64_t)d->sum * d->sum) >> 8);
  5024. }
  5025. #if CONFIG_VP9_HIGHBITDEPTH
  5026. static INLINE uint32_t variance_highbd(diff *const d) {
  5027. const int64_t var = (int64_t)d->sse - (((int64_t)d->sum * d->sum) >> 8);
  5028. return (var >= 0) ? (uint32_t)var : 0;
  5029. }
  5030. #endif // CONFIG_VP9_HIGHBITDEPTH
  5031. static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
  5032. const SPEED_FEATURES *const sf = &cpi->sf;
  5033. const VP9_COMMON *const cm = &cpi->common;
  5034. const uint8_t *src = cpi->Source->y_buffer;
  5035. const uint8_t *last_src = cpi->Last_Source->y_buffer;
  5036. const int src_stride = cpi->Source->y_stride;
  5037. const int last_stride = cpi->Last_Source->y_stride;
  5038. // Pick cutoff threshold
  5039. const int cutoff = (VPXMIN(cm->width, cm->height) >= 720)
  5040. ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100)
  5041. : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
  5042. DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
  5043. diff *var16 = cpi->source_diff_var;
  5044. int sum = 0;
  5045. int i, j;
  5046. memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
  5047. for (i = 0; i < cm->mb_rows; i++) {
  5048. for (j = 0; j < cm->mb_cols; j++) {
  5049. #if CONFIG_VP9_HIGHBITDEPTH
  5050. if (cm->use_highbitdepth) {
  5051. switch (cm->bit_depth) {
  5052. case VPX_BITS_8:
  5053. vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
  5054. &var16->sse, &var16->sum);
  5055. var16->var = variance(var16);
  5056. break;
  5057. case VPX_BITS_10:
  5058. vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
  5059. &var16->sse, &var16->sum);
  5060. var16->var = variance_highbd(var16);
  5061. break;
  5062. default:
  5063. assert(cm->bit_depth == VPX_BITS_12);
  5064. vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
  5065. &var16->sse, &var16->sum);
  5066. var16->var = variance_highbd(var16);
  5067. break;
  5068. }
  5069. } else {
  5070. vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
  5071. &var16->sum);
  5072. var16->var = variance(var16);
  5073. }
  5074. #else
  5075. vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
  5076. &var16->sum);
  5077. var16->var = variance(var16);
  5078. #endif // CONFIG_VP9_HIGHBITDEPTH
  5079. if (var16->var >= VAR_HIST_MAX_BG_VAR)
  5080. hist[VAR_HIST_BINS - 1]++;
  5081. else
  5082. hist[var16->var / VAR_HIST_FACTOR]++;
  5083. src += 16;
  5084. last_src += 16;
  5085. var16++;
  5086. }
  5087. src = src - cm->mb_cols * 16 + 16 * src_stride;
  5088. last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
  5089. }
  5090. cpi->source_var_thresh = 0;
  5091. if (hist[VAR_HIST_BINS - 1] < cutoff) {
  5092. for (i = 0; i < VAR_HIST_BINS - 1; i++) {
  5093. sum += hist[i];
  5094. if (sum > cutoff) {
  5095. cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
  5096. return 0;
  5097. }
  5098. }
  5099. }
  5100. return sf->search_type_check_frequency;
  5101. }
  5102. static void source_var_based_partition_search_method(VP9_COMP *cpi) {
  5103. VP9_COMMON *const cm = &cpi->common;
  5104. SPEED_FEATURES *const sf = &cpi->sf;
  5105. if (cm->frame_type == KEY_FRAME) {
  5106. // For key frame, use SEARCH_PARTITION.
  5107. sf->partition_search_type = SEARCH_PARTITION;
  5108. } else if (cm->intra_only) {
  5109. sf->partition_search_type = FIXED_PARTITION;
  5110. } else {
  5111. if (cm->last_width != cm->width || cm->last_height != cm->height) {
  5112. if (cpi->source_diff_var) vpx_free(cpi->source_diff_var);
  5113. CHECK_MEM_ERROR(cm, cpi->source_diff_var,
  5114. vpx_calloc(cm->MBs, sizeof(diff)));
  5115. }
  5116. if (!cpi->frames_till_next_var_check)
  5117. cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
  5118. if (cpi->frames_till_next_var_check > 0) {
  5119. sf->partition_search_type = FIXED_PARTITION;
  5120. cpi->frames_till_next_var_check--;
  5121. }
  5122. }
  5123. }
  5124. static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
  5125. unsigned int intra_count = 0, inter_count = 0;
  5126. int j;
  5127. for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
  5128. intra_count += td->counts->intra_inter[j][0];
  5129. inter_count += td->counts->intra_inter[j][1];
  5130. }
  5131. return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME &&
  5132. cm->show_frame;
  5133. }
  5134. void vp9_init_tile_data(VP9_COMP *cpi) {
  5135. VP9_COMMON *const cm = &cpi->common;
  5136. const int tile_cols = 1 << cm->log2_tile_cols;
  5137. const int tile_rows = 1 << cm->log2_tile_rows;
  5138. int tile_col, tile_row;
  5139. TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
  5140. TOKENLIST *tplist = cpi->tplist[0][0];
  5141. int tile_tok = 0;
  5142. int tplist_count = 0;
  5143. if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
  5144. if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
  5145. CHECK_MEM_ERROR(
  5146. cm, cpi->tile_data,
  5147. vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
  5148. cpi->allocated_tiles = tile_cols * tile_rows;
  5149. for (tile_row = 0; tile_row < tile_rows; ++tile_row)
  5150. for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
  5151. TileDataEnc *tile_data =
  5152. &cpi->tile_data[tile_row * tile_cols + tile_col];
  5153. int i, j;
  5154. for (i = 0; i < BLOCK_SIZES; ++i) {
  5155. for (j = 0; j < MAX_MODES; ++j) {
  5156. tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
  5157. #if CONFIG_CONSISTENT_RECODE
  5158. tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
  5159. #endif
  5160. tile_data->mode_map[i][j] = j;
  5161. }
  5162. }
  5163. #if CONFIG_MULTITHREAD
  5164. tile_data->row_base_thresh_freq_fact = NULL;
  5165. #endif
  5166. }
  5167. }
  5168. for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
  5169. for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
  5170. TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
  5171. TileInfo *tile_info = &this_tile->tile_info;
  5172. if (cpi->sf.adaptive_rd_thresh_row_mt &&
  5173. this_tile->row_base_thresh_freq_fact == NULL)
  5174. vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
  5175. vp9_tile_init(tile_info, cm, tile_row, tile_col);
  5176. cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
  5177. pre_tok = cpi->tile_tok[tile_row][tile_col];
  5178. tile_tok = allocated_tokens(*tile_info);
  5179. cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
  5180. tplist = cpi->tplist[tile_row][tile_col];
  5181. tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
  5182. }
  5183. }
  5184. }
  5185. void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row,
  5186. int tile_col, int mi_row) {
  5187. VP9_COMMON *const cm = &cpi->common;
  5188. const int tile_cols = 1 << cm->log2_tile_cols;
  5189. TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
  5190. const TileInfo *const tile_info = &this_tile->tile_info;
  5191. TOKENEXTRA *tok = NULL;
  5192. int tile_sb_row;
  5193. int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
  5194. tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >>
  5195. MI_BLOCK_SIZE_LOG2;
  5196. get_start_tok(cpi, tile_row, tile_col, mi_row, &tok);
  5197. cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok;
  5198. if (cpi->sf.use_nonrd_pick_mode)
  5199. encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
  5200. #if !CONFIG_REALTIME_ONLY
  5201. else
  5202. encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
  5203. #endif
  5204. cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok;
  5205. cpi->tplist[tile_row][tile_col][tile_sb_row].count =
  5206. (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop -
  5207. cpi->tplist[tile_row][tile_col][tile_sb_row].start);
  5208. assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <=
  5209. get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols));
  5210. (void)tile_mb_cols;
  5211. }
  5212. void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
  5213. int tile_col) {
  5214. VP9_COMMON *const cm = &cpi->common;
  5215. const int tile_cols = 1 << cm->log2_tile_cols;
  5216. TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
  5217. const TileInfo *const tile_info = &this_tile->tile_info;
  5218. const int mi_row_start = tile_info->mi_row_start;
  5219. const int mi_row_end = tile_info->mi_row_end;
  5220. int mi_row;
  5221. for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
  5222. vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
  5223. }
  5224. static void encode_tiles(VP9_COMP *cpi) {
  5225. VP9_COMMON *const cm = &cpi->common;
  5226. const int tile_cols = 1 << cm->log2_tile_cols;
  5227. const int tile_rows = 1 << cm->log2_tile_rows;
  5228. int tile_col, tile_row;
  5229. vp9_init_tile_data(cpi);
  5230. for (tile_row = 0; tile_row < tile_rows; ++tile_row)
  5231. for (tile_col = 0; tile_col < tile_cols; ++tile_col)
  5232. vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
  5233. }
  5234. #if CONFIG_FP_MB_STATS
  5235. static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
  5236. VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {
  5237. uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
  5238. cm->current_video_frame * cm->MBs * sizeof(uint8_t);
  5239. if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
  5240. *this_frame_mb_stats = mb_stats_in;
  5241. return 1;
  5242. }
  5243. #endif
  5244. static int compare_kmeans_data(const void *a, const void *b) {
  5245. if (((const KMEANS_DATA *)a)->value > ((const KMEANS_DATA *)b)->value) {
  5246. return 1;
  5247. } else if (((const KMEANS_DATA *)a)->value <
  5248. ((const KMEANS_DATA *)b)->value) {
  5249. return -1;
  5250. } else {
  5251. return 0;
  5252. }
  5253. }
  5254. static void compute_boundary_ls(const double *ctr_ls, int k,
  5255. double *boundary_ls) {
  5256. // boundary_ls[j] is the upper bound of data centered at ctr_ls[j]
  5257. int j;
  5258. for (j = 0; j < k - 1; ++j) {
  5259. boundary_ls[j] = (ctr_ls[j] + ctr_ls[j + 1]) / 2.;
  5260. }
  5261. boundary_ls[k - 1] = DBL_MAX;
  5262. }
  5263. int vp9_get_group_idx(double value, double *boundary_ls, int k) {
  5264. int group_idx = 0;
  5265. while (value >= boundary_ls[group_idx]) {
  5266. ++group_idx;
  5267. if (group_idx == k - 1) {
  5268. break;
  5269. }
  5270. }
  5271. return group_idx;
  5272. }
  5273. void vp9_kmeans(double *ctr_ls, double *boundary_ls, int *count_ls, int k,
  5274. KMEANS_DATA *arr, int size) {
  5275. int i, j;
  5276. int itr;
  5277. int group_idx;
  5278. double sum[MAX_KMEANS_GROUPS];
  5279. int count[MAX_KMEANS_GROUPS];
  5280. vpx_clear_system_state();
  5281. assert(k >= 2 && k <= MAX_KMEANS_GROUPS);
  5282. qsort(arr, size, sizeof(*arr), compare_kmeans_data);
  5283. // initialize the center points
  5284. for (j = 0; j < k; ++j) {
  5285. ctr_ls[j] = arr[(size * (2 * j + 1)) / (2 * k)].value;
  5286. }
  5287. for (itr = 0; itr < 10; ++itr) {
  5288. compute_boundary_ls(ctr_ls, k, boundary_ls);
  5289. for (i = 0; i < MAX_KMEANS_GROUPS; ++i) {
  5290. sum[i] = 0;
  5291. count[i] = 0;
  5292. }
  5293. // Both the data and centers are sorted in ascending order.
  5294. // As each data point is processed in order, its corresponding group index
  5295. // can only increase. So we only need to reset the group index to zero here.
  5296. group_idx = 0;
  5297. for (i = 0; i < size; ++i) {
  5298. while (arr[i].value >= boundary_ls[group_idx]) {
  5299. // place samples into clusters
  5300. ++group_idx;
  5301. if (group_idx == k - 1) {
  5302. break;
  5303. }
  5304. }
  5305. sum[group_idx] += arr[i].value;
  5306. ++count[group_idx];
  5307. }
  5308. for (group_idx = 0; group_idx < k; ++group_idx) {
  5309. if (count[group_idx] > 0)
  5310. ctr_ls[group_idx] = sum[group_idx] / count[group_idx];
  5311. sum[group_idx] = 0;
  5312. count[group_idx] = 0;
  5313. }
  5314. }
  5315. // compute group_idx, boundary_ls and count_ls
  5316. for (j = 0; j < k; ++j) {
  5317. count_ls[j] = 0;
  5318. }
  5319. compute_boundary_ls(ctr_ls, k, boundary_ls);
  5320. group_idx = 0;
  5321. for (i = 0; i < size; ++i) {
  5322. while (arr[i].value >= boundary_ls[group_idx]) {
  5323. ++group_idx;
  5324. if (group_idx == k - 1) {
  5325. break;
  5326. }
  5327. }
  5328. arr[i].group_idx = group_idx;
  5329. ++count_ls[group_idx];
  5330. }
  5331. }
  5332. static void encode_frame_internal(VP9_COMP *cpi) {
  5333. SPEED_FEATURES *const sf = &cpi->sf;
  5334. ThreadData *const td = &cpi->td;
  5335. MACROBLOCK *const x = &td->mb;
  5336. VP9_COMMON *const cm = &cpi->common;
  5337. MACROBLOCKD *const xd = &x->e_mbd;
  5338. const int gf_group_index = cpi->twopass.gf_group.index;
  5339. xd->mi = cm->mi_grid_visible;
  5340. xd->mi[0] = cm->mi;
  5341. vp9_zero(*td->counts);
  5342. vp9_zero(cpi->td.rd_counts);
  5343. xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 &&
  5344. cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
  5345. #if CONFIG_VP9_HIGHBITDEPTH
  5346. if (cm->use_highbitdepth)
  5347. x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
  5348. else
  5349. x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
  5350. x->highbd_inv_txfm_add =
  5351. xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
  5352. #else
  5353. x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
  5354. #endif // CONFIG_VP9_HIGHBITDEPTH
  5355. x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
  5356. #if CONFIG_CONSISTENT_RECODE
  5357. x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1;
  5358. #endif
  5359. if (xd->lossless) x->optimize = 0;
  5360. x->sharpness = cpi->oxcf.sharpness;
  5361. x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ);
  5362. cm->tx_mode = select_tx_mode(cpi, xd);
  5363. vp9_frame_init_quantizer(cpi);
  5364. vp9_initialize_rd_consts(cpi);
  5365. vp9_initialize_me_consts(cpi, x, cm->base_qindex);
  5366. init_encode_frame_mb_context(cpi);
  5367. cm->use_prev_frame_mvs =
  5368. !cm->error_resilient_mode && cm->width == cm->last_width &&
  5369. cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
  5370. // Special case: set prev_mi to NULL when the previous mode info
  5371. // context cannot be used.
  5372. cm->prev_mi =
  5373. cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL;
  5374. x->quant_fp = cpi->sf.use_quant_fp;
  5375. vp9_zero(x->skip_txfm);
  5376. if (sf->use_nonrd_pick_mode) {
  5377. // Initialize internal buffer pointers for rtc coding, where non-RD
  5378. // mode decision is used and hence no buffer pointer swap needed.
  5379. int i;
  5380. struct macroblock_plane *const p = x->plane;
  5381. struct macroblockd_plane *const pd = xd->plane;
  5382. PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
  5383. for (i = 0; i < MAX_MB_PLANE; ++i) {
  5384. p[i].coeff = ctx->coeff_pbuf[i][0];
  5385. p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
  5386. pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
  5387. p[i].eobs = ctx->eobs_pbuf[i][0];
  5388. }
  5389. vp9_zero(x->zcoeff_blk);
  5390. if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 &&
  5391. !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) &&
  5392. !cpi->use_svc)
  5393. cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
  5394. if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
  5395. source_var_based_partition_search_method(cpi);
  5396. } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE &&
  5397. cpi->sf.enable_tpl_model) {
  5398. TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
  5399. TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
  5400. int tpl_stride = tpl_frame->stride;
  5401. int64_t intra_cost_base = 0;
  5402. int64_t mc_dep_cost_base = 0;
  5403. int row, col;
  5404. for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
  5405. for (col = 0; col < cm->mi_cols; ++col) {
  5406. TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
  5407. intra_cost_base += this_stats->intra_cost;
  5408. mc_dep_cost_base += this_stats->mc_dep_cost;
  5409. }
  5410. }
  5411. vpx_clear_system_state();
  5412. if (tpl_frame->is_valid)
  5413. cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
  5414. }
  5415. // Frame segmentation
  5416. if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ) build_kmeans_segmentation(cpi);
  5417. {
  5418. struct vpx_usec_timer emr_timer;
  5419. vpx_usec_timer_start(&emr_timer);
  5420. #if CONFIG_FP_MB_STATS
  5421. if (cpi->use_fp_mb_stats) {
  5422. input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
  5423. &cpi->twopass.this_frame_mb_stats);
  5424. }
  5425. #endif
  5426. if (!cpi->row_mt) {
  5427. cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;
  5428. cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;
  5429. // If allowed, encoding tiles in parallel with one thread handling one
  5430. // tile when row based multi-threading is disabled.
  5431. if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
  5432. vp9_encode_tiles_mt(cpi);
  5433. else
  5434. encode_tiles(cpi);
  5435. } else {
  5436. cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
  5437. cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
  5438. vp9_encode_tiles_row_mt(cpi);
  5439. }
  5440. vpx_usec_timer_mark(&emr_timer);
  5441. cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
  5442. }
  5443. sf->skip_encode_frame =
  5444. sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0;
  5445. #if 0
  5446. // Keep record of the total distortion this time around for future use
  5447. cpi->last_frame_distortion = cpi->frame_distortion;
  5448. #endif
  5449. }
  5450. static INTERP_FILTER get_interp_filter(
  5451. const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
  5452. if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
  5453. threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
  5454. threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
  5455. return EIGHTTAP_SMOOTH;
  5456. } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
  5457. threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
  5458. return EIGHTTAP_SHARP;
  5459. } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
  5460. return EIGHTTAP;
  5461. } else {
  5462. return SWITCHABLE;
  5463. }
  5464. }
  5465. static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
  5466. VP9_COMMON *const cm = &cpi->common;
  5467. MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
  5468. struct segmentation *const seg = &cm->seg;
  5469. int mi_row, mi_col;
  5470. int sum_delta = 0;
  5471. int map_index = 0;
  5472. int qdelta_index;
  5473. int segment_id;
  5474. for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
  5475. MODE_INFO **mi_8x8 = mi_8x8_ptr;
  5476. for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) {
  5477. segment_id = mi_8x8[0]->segment_id;
  5478. qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
  5479. sum_delta += qdelta_index;
  5480. map_index++;
  5481. }
  5482. mi_8x8_ptr += cm->mi_stride;
  5483. }
  5484. return sum_delta / (cm->mi_rows * cm->mi_cols);
  5485. }
  5486. #if CONFIG_CONSISTENT_RECODE
  5487. static void restore_encode_params(VP9_COMP *cpi) {
  5488. VP9_COMMON *const cm = &cpi->common;
  5489. const int tile_cols = 1 << cm->log2_tile_cols;
  5490. const int tile_rows = 1 << cm->log2_tile_rows;
  5491. int tile_col, tile_row;
  5492. int i, j;
  5493. RD_OPT *rd_opt = &cpi->rd;
  5494. for (i = 0; i < MAX_REF_FRAMES; i++) {
  5495. for (j = 0; j < REFERENCE_MODES; j++)
  5496. rd_opt->prediction_type_threshes[i][j] =
  5497. rd_opt->prediction_type_threshes_prev[i][j];
  5498. for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
  5499. rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j];
  5500. }
  5501. if (cpi->tile_data != NULL) {
  5502. for (tile_row = 0; tile_row < tile_rows; ++tile_row)
  5503. for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
  5504. TileDataEnc *tile_data =
  5505. &cpi->tile_data[tile_row * tile_cols + tile_col];
  5506. for (i = 0; i < BLOCK_SIZES; ++i) {
  5507. for (j = 0; j < MAX_MODES; ++j) {
  5508. tile_data->thresh_freq_fact[i][j] =
  5509. tile_data->thresh_freq_fact_prev[i][j];
  5510. }
  5511. }
  5512. }
  5513. }
  5514. cm->interp_filter = cpi->sf.default_interp_filter;
  5515. }
  5516. #endif
  5517. void vp9_encode_frame(VP9_COMP *cpi) {
  5518. VP9_COMMON *const cm = &cpi->common;
  5519. #if CONFIG_CONSISTENT_RECODE
  5520. restore_encode_params(cpi);
  5521. #endif
  5522. #if CONFIG_MISMATCH_DEBUG
  5523. mismatch_reset_frame(MAX_MB_PLANE);
  5524. #endif
  5525. // In the longer term the encoder should be generalized to match the
  5526. // decoder such that we allow compound where one of the 3 buffers has a
  5527. // different sign bias and that buffer is then the fixed ref. However, this
  5528. // requires further work in the rd loop. For now the only supported encoder
  5529. // side behavior is where the ALT ref buffer has opposite sign bias to
  5530. // the other two.
  5531. if (!frame_is_intra_only(cm)) {
  5532. if (vp9_compound_reference_allowed(cm)) {
  5533. cpi->allow_comp_inter_inter = 1;
  5534. vp9_setup_compound_reference_mode(cm);
  5535. } else {
  5536. cpi->allow_comp_inter_inter = 0;
  5537. }
  5538. }
  5539. if (cpi->sf.frame_parameter_update) {
  5540. int i;
  5541. RD_OPT *const rd_opt = &cpi->rd;
  5542. FRAME_COUNTS *counts = cpi->td.counts;
  5543. RD_COUNTS *const rdc = &cpi->td.rd_counts;
  5544. // This code does a single RD pass over the whole frame assuming
  5545. // either compound, single or hybrid prediction as per whatever has
  5546. // worked best for that type of frame in the past.
  5547. // It also predicts whether another coding mode would have worked
  5548. // better than this coding mode. If that is the case, it remembers
  5549. // that for subsequent frames.
  5550. // It also does the same analysis for transform size selection.
  5551. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
  5552. int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
  5553. int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
  5554. const int is_alt_ref = frame_type == ALTREF_FRAME;
  5555. /* prediction (compound, single or hybrid) mode selection */
  5556. if (is_alt_ref || !cpi->allow_comp_inter_inter)
  5557. cm->reference_mode = SINGLE_REFERENCE;
  5558. else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
  5559. mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] &&
  5560. check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
  5561. cm->reference_mode = COMPOUND_REFERENCE;
  5562. else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
  5563. cm->reference_mode = SINGLE_REFERENCE;
  5564. else
  5565. cm->reference_mode = REFERENCE_MODE_SELECT;
  5566. if (cm->interp_filter == SWITCHABLE)
  5567. cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
  5568. encode_frame_internal(cpi);
  5569. for (i = 0; i < REFERENCE_MODES; ++i)
  5570. mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
  5571. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  5572. filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
  5573. if (cm->reference_mode == REFERENCE_MODE_SELECT) {
  5574. int single_count_zero = 0;
  5575. int comp_count_zero = 0;
  5576. for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
  5577. single_count_zero += counts->comp_inter[i][0];
  5578. comp_count_zero += counts->comp_inter[i][1];
  5579. }
  5580. if (comp_count_zero == 0) {
  5581. cm->reference_mode = SINGLE_REFERENCE;
  5582. vp9_zero(counts->comp_inter);
  5583. } else if (single_count_zero == 0) {
  5584. cm->reference_mode = COMPOUND_REFERENCE;
  5585. vp9_zero(counts->comp_inter);
  5586. }
  5587. }
  5588. if (cm->tx_mode == TX_MODE_SELECT) {
  5589. int count4x4 = 0;
  5590. int count8x8_lp = 0, count8x8_8x8p = 0;
  5591. int count16x16_16x16p = 0, count16x16_lp = 0;
  5592. int count32x32 = 0;
  5593. for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
  5594. count4x4 += counts->tx.p32x32[i][TX_4X4];
  5595. count4x4 += counts->tx.p16x16[i][TX_4X4];
  5596. count4x4 += counts->tx.p8x8[i][TX_4X4];
  5597. count8x8_lp += counts->tx.p32x32[i][TX_8X8];
  5598. count8x8_lp += counts->tx.p16x16[i][TX_8X8];
  5599. count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
  5600. count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
  5601. count16x16_lp += counts->tx.p32x32[i][TX_16X16];
  5602. count32x32 += counts->tx.p32x32[i][TX_32X32];
  5603. }
  5604. if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
  5605. count32x32 == 0) {
  5606. cm->tx_mode = ALLOW_8X8;
  5607. reset_skip_tx_size(cm, TX_8X8);
  5608. } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
  5609. count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
  5610. cm->tx_mode = ONLY_4X4;
  5611. reset_skip_tx_size(cm, TX_4X4);
  5612. } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
  5613. cm->tx_mode = ALLOW_32X32;
  5614. } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
  5615. cm->tx_mode = ALLOW_16X16;
  5616. reset_skip_tx_size(cm, TX_16X16);
  5617. }
  5618. }
  5619. } else {
  5620. FRAME_COUNTS *counts = cpi->td.counts;
  5621. cm->reference_mode = SINGLE_REFERENCE;
  5622. if (cpi->allow_comp_inter_inter && cpi->sf.use_compound_nonrd_pickmode &&
  5623. cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref &&
  5624. cm->frame_type != KEY_FRAME)
  5625. cm->reference_mode = REFERENCE_MODE_SELECT;
  5626. encode_frame_internal(cpi);
  5627. if (cm->reference_mode == REFERENCE_MODE_SELECT) {
  5628. int single_count_zero = 0;
  5629. int comp_count_zero = 0;
  5630. int i;
  5631. for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
  5632. single_count_zero += counts->comp_inter[i][0];
  5633. comp_count_zero += counts->comp_inter[i][1];
  5634. }
  5635. if (comp_count_zero == 0) {
  5636. cm->reference_mode = SINGLE_REFERENCE;
  5637. vp9_zero(counts->comp_inter);
  5638. } else if (single_count_zero == 0) {
  5639. cm->reference_mode = COMPOUND_REFERENCE;
  5640. vp9_zero(counts->comp_inter);
  5641. }
  5642. }
  5643. }
  5644. // If segmented AQ is enabled compute the average AQ weighting.
  5645. if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) &&
  5646. (cm->seg.update_map || cm->seg.update_data)) {
  5647. cm->seg.aq_av_offset = compute_frame_aq_offset(cpi);
  5648. }
  5649. }
  5650. static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
  5651. const PREDICTION_MODE y_mode = mi->mode;
  5652. const PREDICTION_MODE uv_mode = mi->uv_mode;
  5653. const BLOCK_SIZE bsize = mi->sb_type;
  5654. if (bsize < BLOCK_8X8) {
  5655. int idx, idy;
  5656. const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  5657. const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  5658. for (idy = 0; idy < 2; idy += num_4x4_h)
  5659. for (idx = 0; idx < 2; idx += num_4x4_w)
  5660. ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
  5661. } else {
  5662. ++counts->y_mode[size_group_lookup[bsize]][y_mode];
  5663. }
  5664. ++counts->uv_mode[y_mode][uv_mode];
  5665. }
  5666. static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi,
  5667. int mi_row, int mi_col, BLOCK_SIZE bsize) {
  5668. const VP9_COMMON *const cm = &cpi->common;
  5669. MV mv = mi->mv[0].as_mv;
  5670. const int bw = num_8x8_blocks_wide_lookup[bsize];
  5671. const int bh = num_8x8_blocks_high_lookup[bsize];
  5672. const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
  5673. const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
  5674. const int block_index = mi_row * cm->mi_cols + mi_col;
  5675. int x, y;
  5676. for (y = 0; y < ymis; y++)
  5677. for (x = 0; x < xmis; x++) {
  5678. int map_offset = block_index + y * cm->mi_cols + x;
  5679. if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) &&
  5680. mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
  5681. if (abs(mv.row) < 8 && abs(mv.col) < 8) {
  5682. if (cpi->consec_zero_mv[map_offset] < 255)
  5683. cpi->consec_zero_mv[map_offset]++;
  5684. } else {
  5685. cpi->consec_zero_mv[map_offset] = 0;
  5686. }
  5687. }
  5688. }
  5689. }
  5690. static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
  5691. int output_enabled, int mi_row, int mi_col,
  5692. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  5693. VP9_COMMON *const cm = &cpi->common;
  5694. MACROBLOCK *const x = &td->mb;
  5695. MACROBLOCKD *const xd = &x->e_mbd;
  5696. MODE_INFO *mi = xd->mi[0];
  5697. const int seg_skip =
  5698. segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP);
  5699. x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 &&
  5700. cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
  5701. cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
  5702. cpi->sf.allow_skip_recode;
  5703. if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
  5704. memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
  5705. x->skip_optimize = ctx->is_coded;
  5706. ctx->is_coded = 1;
  5707. x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
  5708. x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
  5709. x->q_index < QIDX_SKIP_THRESH);
  5710. if (x->skip_encode) return;
  5711. if (!is_inter_block(mi)) {
  5712. int plane;
  5713. #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
  5714. if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
  5715. (xd->above_mi == NULL || xd->left_mi == NULL) &&
  5716. need_top_left[mi->uv_mode])
  5717. assert(0);
  5718. #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
  5719. mi->skip = 1;
  5720. for (plane = 0; plane < MAX_MB_PLANE; ++plane)
  5721. vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
  5722. if (output_enabled) sum_intra_stats(td->counts, mi);
  5723. vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
  5724. VPXMAX(bsize, BLOCK_8X8));
  5725. } else {
  5726. int ref;
  5727. const int is_compound = has_second_ref(mi);
  5728. set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
  5729. for (ref = 0; ref < 1 + is_compound; ++ref) {
  5730. YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]);
  5731. assert(cfg != NULL);
  5732. vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
  5733. &xd->block_refs[ref]->sf);
  5734. }
  5735. if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
  5736. vp9_build_inter_predictors_sby(xd, mi_row, mi_col,
  5737. VPXMAX(bsize, BLOCK_8X8));
  5738. vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
  5739. VPXMAX(bsize, BLOCK_8X8));
  5740. #if CONFIG_MISMATCH_DEBUG
  5741. if (output_enabled) {
  5742. int plane;
  5743. for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
  5744. const struct macroblockd_plane *pd = &xd->plane[plane];
  5745. int pixel_c, pixel_r;
  5746. const BLOCK_SIZE plane_bsize =
  5747. get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]);
  5748. const int bw = get_block_width(plane_bsize);
  5749. const int bh = get_block_height(plane_bsize);
  5750. mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
  5751. pd->subsampling_x, pd->subsampling_y);
  5752. mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c,
  5753. pixel_r, bw, bh,
  5754. xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
  5755. }
  5756. }
  5757. #endif
  5758. vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8), mi_row, mi_col, output_enabled);
  5759. vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
  5760. VPXMAX(bsize, BLOCK_8X8));
  5761. }
  5762. if (seg_skip) {
  5763. assert(mi->skip);
  5764. }
  5765. if (output_enabled) {
  5766. if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 &&
  5767. !(is_inter_block(mi) && mi->skip)) {
  5768. ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
  5769. &td->counts->tx)[mi->tx_size];
  5770. } else {
  5771. // The new intra coding scheme requires no change of transform size
  5772. if (is_inter_block(mi)) {
  5773. mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
  5774. max_txsize_lookup[bsize]);
  5775. } else {
  5776. mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4;
  5777. }
  5778. }
  5779. ++td->counts->tx.tx_totals[mi->tx_size];
  5780. ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
  5781. if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
  5782. vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
  5783. if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&
  5784. (!cpi->use_svc ||
  5785. (cpi->use_svc &&
  5786. !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
  5787. cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
  5788. update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
  5789. }
  5790. }