parser.c 430 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552
  1. /*
  2. * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
  3. * implemented on top of the SAX interfaces
  4. *
  5. * References:
  6. * The XML specification:
  7. * http://www.w3.org/TR/REC-xml
  8. * Original 1.0 version:
  9. * http://www.w3.org/TR/1998/REC-xml-19980210
  10. * XML second edition working draft
  11. * http://www.w3.org/TR/2000/WD-xml-2e-20000814
  12. *
  13. * Okay this is a big file, the parser core is around 7000 lines, then it
  14. * is followed by the progressive parser top routines, then the various
  15. * high level APIs to call the parser and a few miscellaneous functions.
  16. * A number of helper functions and deprecated ones have been moved to
  17. * parserInternals.c to reduce this file size.
  18. * As much as possible the functions are associated with their relative
  19. * production in the XML specification. A few productions defining the
  20. * different ranges of character are actually implanted either in
  21. * parserInternals.h or parserInternals.c
  22. * The DOM tree build is realized from the default SAX callbacks in
  23. * the module SAX.c.
  24. * The routines doing the validation checks are in valid.c and called either
  25. * from the SAX callbacks or as standalone functions using a preparsed
  26. * document.
  27. *
  28. * See Copyright for the status of this software.
  29. *
  30. * daniel@veillard.com
  31. */
  32. /* To avoid EBCDIC trouble when parsing on zOS */
  33. #if defined(__MVS__)
  34. #pragma convert("ISO8859-1")
  35. #endif
  36. #define IN_LIBXML
  37. #include "libxml.h"
  38. #if defined(_WIN32) && !defined (__CYGWIN__)
  39. #define XML_DIR_SEP '\\'
  40. #else
  41. #define XML_DIR_SEP '/'
  42. #endif
  43. #include <stdlib.h>
  44. #include <limits.h>
  45. #include <string.h>
  46. #include <stdarg.h>
  47. #include <stddef.h>
  48. #include <libxml/xmlmemory.h>
  49. #include <libxml/threads.h>
  50. #include <libxml/globals.h>
  51. #include <libxml/tree.h>
  52. #include <libxml/parser.h>
  53. #include <libxml/parserInternals.h>
  54. #include <libxml/valid.h>
  55. #include <libxml/entities.h>
  56. #include <libxml/xmlerror.h>
  57. #include <libxml/encoding.h>
  58. #include <libxml/xmlIO.h>
  59. #include <libxml/uri.h>
  60. #ifdef LIBXML_CATALOG_ENABLED
  61. #include <libxml/catalog.h>
  62. #endif
  63. #ifdef LIBXML_SCHEMAS_ENABLED
  64. #include <libxml/xmlschemastypes.h>
  65. #include <libxml/relaxng.h>
  66. #endif
  67. #ifdef HAVE_CTYPE_H
  68. #include <ctype.h>
  69. #endif
  70. #ifdef HAVE_STDLIB_H
  71. #include <stdlib.h>
  72. #endif
  73. #ifdef HAVE_SYS_STAT_H
  74. #include <sys/stat.h>
  75. #endif
  76. #ifdef HAVE_FCNTL_H
  77. #include <fcntl.h>
  78. #endif
  79. #ifdef HAVE_UNISTD_H
  80. #include <unistd.h>
  81. #endif
  82. #include "buf.h"
  83. #include "enc.h"
  84. struct _xmlStartTag {
  85. const xmlChar *prefix;
  86. const xmlChar *URI;
  87. int line;
  88. int nsNr;
  89. };
  90. static void
  91. xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
  92. static xmlParserCtxtPtr
  93. xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
  94. const xmlChar *base, xmlParserCtxtPtr pctx);
  95. static void xmlHaltParser(xmlParserCtxtPtr ctxt);
  96. static int
  97. xmlParseElementStart(xmlParserCtxtPtr ctxt);
  98. static void
  99. xmlParseElementEnd(xmlParserCtxtPtr ctxt);
  100. /************************************************************************
  101. * *
  102. * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
  103. * *
  104. ************************************************************************/
  105. #define XML_PARSER_BIG_ENTITY 1000
  106. #define XML_PARSER_LOT_ENTITY 5000
  107. /*
  108. * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
  109. * replacement over the size in byte of the input indicates that you have
  110. * and exponential behaviour. A value of 10 correspond to at least 3 entity
  111. * replacement per byte of input.
  112. */
  113. #define XML_PARSER_NON_LINEAR 10
  114. /*
  115. * xmlParserEntityCheck
  116. *
  117. * Function to check non-linear entity expansion behaviour
  118. * This is here to detect and stop exponential linear entity expansion
  119. * This is not a limitation of the parser but a safety
  120. * boundary feature. It can be disabled with the XML_PARSE_HUGE
  121. * parser option.
  122. */
  123. static int
  124. xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
  125. xmlEntityPtr ent, size_t replacement)
  126. {
  127. size_t consumed = 0;
  128. int i;
  129. if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
  130. return (0);
  131. if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
  132. return (1);
  133. /*
  134. * This may look absurd but is needed to detect
  135. * entities problems
  136. */
  137. if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  138. (ent->content != NULL) && (ent->checked == 0) &&
  139. (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
  140. unsigned long oldnbent = ctxt->nbentities, diff;
  141. xmlChar *rep;
  142. ent->checked = 1;
  143. ++ctxt->depth;
  144. rep = xmlStringDecodeEntities(ctxt, ent->content,
  145. XML_SUBSTITUTE_REF, 0, 0, 0);
  146. --ctxt->depth;
  147. if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
  148. ent->content[0] = 0;
  149. }
  150. diff = ctxt->nbentities - oldnbent + 1;
  151. if (diff > INT_MAX / 2)
  152. diff = INT_MAX / 2;
  153. ent->checked = diff * 2;
  154. if (rep != NULL) {
  155. if (xmlStrchr(rep, '<'))
  156. ent->checked |= 1;
  157. xmlFree(rep);
  158. rep = NULL;
  159. }
  160. }
  161. /*
  162. * Prevent entity exponential check, not just replacement while
  163. * parsing the DTD
  164. * The check is potentially costly so do that only once in a thousand
  165. */
  166. if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
  167. (ctxt->nbentities % 1024 == 0)) {
  168. for (i = 0;i < ctxt->inputNr;i++) {
  169. consumed += ctxt->inputTab[i]->consumed +
  170. (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
  171. }
  172. if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
  173. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  174. ctxt->instate = XML_PARSER_EOF;
  175. return (1);
  176. }
  177. consumed = 0;
  178. }
  179. if (replacement != 0) {
  180. if (replacement < XML_MAX_TEXT_LENGTH)
  181. return(0);
  182. /*
  183. * If the volume of entity copy reaches 10 times the
  184. * amount of parsed data and over the large text threshold
  185. * then that's very likely to be an abuse.
  186. */
  187. if (ctxt->input != NULL) {
  188. consumed = ctxt->input->consumed +
  189. (ctxt->input->cur - ctxt->input->base);
  190. }
  191. consumed += ctxt->sizeentities;
  192. if (replacement < XML_PARSER_NON_LINEAR * consumed)
  193. return(0);
  194. } else if (size != 0) {
  195. /*
  196. * Do the check based on the replacement size of the entity
  197. */
  198. if (size < XML_PARSER_BIG_ENTITY)
  199. return(0);
  200. /*
  201. * A limit on the amount of text data reasonably used
  202. */
  203. if (ctxt->input != NULL) {
  204. consumed = ctxt->input->consumed +
  205. (ctxt->input->cur - ctxt->input->base);
  206. }
  207. consumed += ctxt->sizeentities;
  208. if ((size < XML_PARSER_NON_LINEAR * consumed) &&
  209. (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
  210. return (0);
  211. } else if (ent != NULL) {
  212. /*
  213. * use the number of parsed entities in the replacement
  214. */
  215. size = ent->checked / 2;
  216. /*
  217. * The amount of data parsed counting entities size only once
  218. */
  219. if (ctxt->input != NULL) {
  220. consumed = ctxt->input->consumed +
  221. (ctxt->input->cur - ctxt->input->base);
  222. }
  223. consumed += ctxt->sizeentities;
  224. /*
  225. * Check the density of entities for the amount of data
  226. * knowing an entity reference will take at least 3 bytes
  227. */
  228. if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
  229. return (0);
  230. } else {
  231. /*
  232. * strange we got no data for checking
  233. */
  234. if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
  235. (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
  236. (ctxt->nbentities <= 10000))
  237. return (0);
  238. }
  239. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  240. return (1);
  241. }
  242. /**
  243. * xmlParserMaxDepth:
  244. *
  245. * arbitrary depth limit for the XML documents that we allow to
  246. * process. This is not a limitation of the parser but a safety
  247. * boundary feature. It can be disabled with the XML_PARSE_HUGE
  248. * parser option.
  249. */
  250. unsigned int xmlParserMaxDepth = 256;
  251. #define SAX2 1
  252. #define XML_PARSER_BIG_BUFFER_SIZE 300
  253. #define XML_PARSER_BUFFER_SIZE 100
  254. #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
  255. /**
  256. * XML_PARSER_CHUNK_SIZE
  257. *
  258. * When calling GROW that's the minimal amount of data
  259. * the parser expected to have received. It is not a hard
  260. * limit but an optimization when reading strings like Names
  261. * It is not strictly needed as long as inputs available characters
  262. * are followed by 0, which should be provided by the I/O level
  263. */
  264. #define XML_PARSER_CHUNK_SIZE 100
  265. /*
  266. * List of XML prefixed PI allowed by W3C specs
  267. */
  268. static const char *xmlW3CPIs[] = {
  269. "xml-stylesheet",
  270. "xml-model",
  271. NULL
  272. };
  273. /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
  274. static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
  275. const xmlChar **str);
  276. static xmlParserErrors
  277. xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
  278. xmlSAXHandlerPtr sax,
  279. void *user_data, int depth, const xmlChar *URL,
  280. const xmlChar *ID, xmlNodePtr *list);
  281. static int
  282. xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
  283. const char *encoding);
  284. #ifdef LIBXML_LEGACY_ENABLED
  285. static void
  286. xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
  287. xmlNodePtr lastNode);
  288. #endif /* LIBXML_LEGACY_ENABLED */
  289. static xmlParserErrors
  290. xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
  291. const xmlChar *string, void *user_data, xmlNodePtr *lst);
  292. static int
  293. xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
  294. /************************************************************************
  295. * *
  296. * Some factorized error routines *
  297. * *
  298. ************************************************************************/
  299. /**
  300. * xmlErrAttributeDup:
  301. * @ctxt: an XML parser context
  302. * @prefix: the attribute prefix
  303. * @localname: the attribute localname
  304. *
  305. * Handle a redefinition of attribute error
  306. */
  307. static void
  308. xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
  309. const xmlChar * localname)
  310. {
  311. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  312. (ctxt->instate == XML_PARSER_EOF))
  313. return;
  314. if (ctxt != NULL)
  315. ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
  316. if (prefix == NULL)
  317. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
  318. XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
  319. (const char *) localname, NULL, NULL, 0, 0,
  320. "Attribute %s redefined\n", localname);
  321. else
  322. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
  323. XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
  324. (const char *) prefix, (const char *) localname,
  325. NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
  326. localname);
  327. if (ctxt != NULL) {
  328. ctxt->wellFormed = 0;
  329. if (ctxt->recovery == 0)
  330. ctxt->disableSAX = 1;
  331. }
  332. }
  333. /**
  334. * xmlFatalErr:
  335. * @ctxt: an XML parser context
  336. * @error: the error number
  337. * @extra: extra information string
  338. *
  339. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  340. */
  341. static void
  342. xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
  343. {
  344. const char *errmsg;
  345. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  346. (ctxt->instate == XML_PARSER_EOF))
  347. return;
  348. switch (error) {
  349. case XML_ERR_INVALID_HEX_CHARREF:
  350. errmsg = "CharRef: invalid hexadecimal value";
  351. break;
  352. case XML_ERR_INVALID_DEC_CHARREF:
  353. errmsg = "CharRef: invalid decimal value";
  354. break;
  355. case XML_ERR_INVALID_CHARREF:
  356. errmsg = "CharRef: invalid value";
  357. break;
  358. case XML_ERR_INTERNAL_ERROR:
  359. errmsg = "internal error";
  360. break;
  361. case XML_ERR_PEREF_AT_EOF:
  362. errmsg = "PEReference at end of document";
  363. break;
  364. case XML_ERR_PEREF_IN_PROLOG:
  365. errmsg = "PEReference in prolog";
  366. break;
  367. case XML_ERR_PEREF_IN_EPILOG:
  368. errmsg = "PEReference in epilog";
  369. break;
  370. case XML_ERR_PEREF_NO_NAME:
  371. errmsg = "PEReference: no name";
  372. break;
  373. case XML_ERR_PEREF_SEMICOL_MISSING:
  374. errmsg = "PEReference: expecting ';'";
  375. break;
  376. case XML_ERR_ENTITY_LOOP:
  377. errmsg = "Detected an entity reference loop";
  378. break;
  379. case XML_ERR_ENTITY_NOT_STARTED:
  380. errmsg = "EntityValue: \" or ' expected";
  381. break;
  382. case XML_ERR_ENTITY_PE_INTERNAL:
  383. errmsg = "PEReferences forbidden in internal subset";
  384. break;
  385. case XML_ERR_ENTITY_NOT_FINISHED:
  386. errmsg = "EntityValue: \" or ' expected";
  387. break;
  388. case XML_ERR_ATTRIBUTE_NOT_STARTED:
  389. errmsg = "AttValue: \" or ' expected";
  390. break;
  391. case XML_ERR_LT_IN_ATTRIBUTE:
  392. errmsg = "Unescaped '<' not allowed in attributes values";
  393. break;
  394. case XML_ERR_LITERAL_NOT_STARTED:
  395. errmsg = "SystemLiteral \" or ' expected";
  396. break;
  397. case XML_ERR_LITERAL_NOT_FINISHED:
  398. errmsg = "Unfinished System or Public ID \" or ' expected";
  399. break;
  400. case XML_ERR_MISPLACED_CDATA_END:
  401. errmsg = "Sequence ']]>' not allowed in content";
  402. break;
  403. case XML_ERR_URI_REQUIRED:
  404. errmsg = "SYSTEM or PUBLIC, the URI is missing";
  405. break;
  406. case XML_ERR_PUBID_REQUIRED:
  407. errmsg = "PUBLIC, the Public Identifier is missing";
  408. break;
  409. case XML_ERR_HYPHEN_IN_COMMENT:
  410. errmsg = "Comment must not contain '--' (double-hyphen)";
  411. break;
  412. case XML_ERR_PI_NOT_STARTED:
  413. errmsg = "xmlParsePI : no target name";
  414. break;
  415. case XML_ERR_RESERVED_XML_NAME:
  416. errmsg = "Invalid PI name";
  417. break;
  418. case XML_ERR_NOTATION_NOT_STARTED:
  419. errmsg = "NOTATION: Name expected here";
  420. break;
  421. case XML_ERR_NOTATION_NOT_FINISHED:
  422. errmsg = "'>' required to close NOTATION declaration";
  423. break;
  424. case XML_ERR_VALUE_REQUIRED:
  425. errmsg = "Entity value required";
  426. break;
  427. case XML_ERR_URI_FRAGMENT:
  428. errmsg = "Fragment not allowed";
  429. break;
  430. case XML_ERR_ATTLIST_NOT_STARTED:
  431. errmsg = "'(' required to start ATTLIST enumeration";
  432. break;
  433. case XML_ERR_NMTOKEN_REQUIRED:
  434. errmsg = "NmToken expected in ATTLIST enumeration";
  435. break;
  436. case XML_ERR_ATTLIST_NOT_FINISHED:
  437. errmsg = "')' required to finish ATTLIST enumeration";
  438. break;
  439. case XML_ERR_MIXED_NOT_STARTED:
  440. errmsg = "MixedContentDecl : '|' or ')*' expected";
  441. break;
  442. case XML_ERR_PCDATA_REQUIRED:
  443. errmsg = "MixedContentDecl : '#PCDATA' expected";
  444. break;
  445. case XML_ERR_ELEMCONTENT_NOT_STARTED:
  446. errmsg = "ContentDecl : Name or '(' expected";
  447. break;
  448. case XML_ERR_ELEMCONTENT_NOT_FINISHED:
  449. errmsg = "ContentDecl : ',' '|' or ')' expected";
  450. break;
  451. case XML_ERR_PEREF_IN_INT_SUBSET:
  452. errmsg =
  453. "PEReference: forbidden within markup decl in internal subset";
  454. break;
  455. case XML_ERR_GT_REQUIRED:
  456. errmsg = "expected '>'";
  457. break;
  458. case XML_ERR_CONDSEC_INVALID:
  459. errmsg = "XML conditional section '[' expected";
  460. break;
  461. case XML_ERR_EXT_SUBSET_NOT_FINISHED:
  462. errmsg = "Content error in the external subset";
  463. break;
  464. case XML_ERR_CONDSEC_INVALID_KEYWORD:
  465. errmsg =
  466. "conditional section INCLUDE or IGNORE keyword expected";
  467. break;
  468. case XML_ERR_CONDSEC_NOT_FINISHED:
  469. errmsg = "XML conditional section not closed";
  470. break;
  471. case XML_ERR_XMLDECL_NOT_STARTED:
  472. errmsg = "Text declaration '<?xml' required";
  473. break;
  474. case XML_ERR_XMLDECL_NOT_FINISHED:
  475. errmsg = "parsing XML declaration: '?>' expected";
  476. break;
  477. case XML_ERR_EXT_ENTITY_STANDALONE:
  478. errmsg = "external parsed entities cannot be standalone";
  479. break;
  480. case XML_ERR_ENTITYREF_SEMICOL_MISSING:
  481. errmsg = "EntityRef: expecting ';'";
  482. break;
  483. case XML_ERR_DOCTYPE_NOT_FINISHED:
  484. errmsg = "DOCTYPE improperly terminated";
  485. break;
  486. case XML_ERR_LTSLASH_REQUIRED:
  487. errmsg = "EndTag: '</' not found";
  488. break;
  489. case XML_ERR_EQUAL_REQUIRED:
  490. errmsg = "expected '='";
  491. break;
  492. case XML_ERR_STRING_NOT_CLOSED:
  493. errmsg = "String not closed expecting \" or '";
  494. break;
  495. case XML_ERR_STRING_NOT_STARTED:
  496. errmsg = "String not started expecting ' or \"";
  497. break;
  498. case XML_ERR_ENCODING_NAME:
  499. errmsg = "Invalid XML encoding name";
  500. break;
  501. case XML_ERR_STANDALONE_VALUE:
  502. errmsg = "standalone accepts only 'yes' or 'no'";
  503. break;
  504. case XML_ERR_DOCUMENT_EMPTY:
  505. errmsg = "Document is empty";
  506. break;
  507. case XML_ERR_DOCUMENT_END:
  508. errmsg = "Extra content at the end of the document";
  509. break;
  510. case XML_ERR_NOT_WELL_BALANCED:
  511. errmsg = "chunk is not well balanced";
  512. break;
  513. case XML_ERR_EXTRA_CONTENT:
  514. errmsg = "extra content at the end of well balanced chunk";
  515. break;
  516. case XML_ERR_VERSION_MISSING:
  517. errmsg = "Malformed declaration expecting version";
  518. break;
  519. case XML_ERR_NAME_TOO_LONG:
  520. errmsg = "Name too long use XML_PARSE_HUGE option";
  521. break;
  522. #if 0
  523. case:
  524. errmsg = "";
  525. break;
  526. #endif
  527. default:
  528. errmsg = "Unregistered error message";
  529. }
  530. if (ctxt != NULL)
  531. ctxt->errNo = error;
  532. if (info == NULL) {
  533. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
  534. XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
  535. errmsg);
  536. } else {
  537. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
  538. XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
  539. errmsg, info);
  540. }
  541. if (ctxt != NULL) {
  542. ctxt->wellFormed = 0;
  543. if (ctxt->recovery == 0)
  544. ctxt->disableSAX = 1;
  545. }
  546. }
  547. /**
  548. * xmlFatalErrMsg:
  549. * @ctxt: an XML parser context
  550. * @error: the error number
  551. * @msg: the error message
  552. *
  553. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  554. */
  555. static void LIBXML_ATTR_FORMAT(3,0)
  556. xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  557. const char *msg)
  558. {
  559. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  560. (ctxt->instate == XML_PARSER_EOF))
  561. return;
  562. if (ctxt != NULL)
  563. ctxt->errNo = error;
  564. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
  565. XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
  566. if (ctxt != NULL) {
  567. ctxt->wellFormed = 0;
  568. if (ctxt->recovery == 0)
  569. ctxt->disableSAX = 1;
  570. }
  571. }
  572. /**
  573. * xmlWarningMsg:
  574. * @ctxt: an XML parser context
  575. * @error: the error number
  576. * @msg: the error message
  577. * @str1: extra data
  578. * @str2: extra data
  579. *
  580. * Handle a warning.
  581. */
  582. static void LIBXML_ATTR_FORMAT(3,0)
  583. xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  584. const char *msg, const xmlChar *str1, const xmlChar *str2)
  585. {
  586. xmlStructuredErrorFunc schannel = NULL;
  587. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  588. (ctxt->instate == XML_PARSER_EOF))
  589. return;
  590. if ((ctxt != NULL) && (ctxt->sax != NULL) &&
  591. (ctxt->sax->initialized == XML_SAX2_MAGIC))
  592. schannel = ctxt->sax->serror;
  593. if (ctxt != NULL) {
  594. __xmlRaiseError(schannel,
  595. (ctxt->sax) ? ctxt->sax->warning : NULL,
  596. ctxt->userData,
  597. ctxt, NULL, XML_FROM_PARSER, error,
  598. XML_ERR_WARNING, NULL, 0,
  599. (const char *) str1, (const char *) str2, NULL, 0, 0,
  600. msg, (const char *) str1, (const char *) str2);
  601. } else {
  602. __xmlRaiseError(schannel, NULL, NULL,
  603. ctxt, NULL, XML_FROM_PARSER, error,
  604. XML_ERR_WARNING, NULL, 0,
  605. (const char *) str1, (const char *) str2, NULL, 0, 0,
  606. msg, (const char *) str1, (const char *) str2);
  607. }
  608. }
  609. /**
  610. * xmlValidityError:
  611. * @ctxt: an XML parser context
  612. * @error: the error number
  613. * @msg: the error message
  614. * @str1: extra data
  615. *
  616. * Handle a validity error.
  617. */
  618. static void LIBXML_ATTR_FORMAT(3,0)
  619. xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  620. const char *msg, const xmlChar *str1, const xmlChar *str2)
  621. {
  622. xmlStructuredErrorFunc schannel = NULL;
  623. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  624. (ctxt->instate == XML_PARSER_EOF))
  625. return;
  626. if (ctxt != NULL) {
  627. ctxt->errNo = error;
  628. if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
  629. schannel = ctxt->sax->serror;
  630. }
  631. if (ctxt != NULL) {
  632. __xmlRaiseError(schannel,
  633. ctxt->vctxt.error, ctxt->vctxt.userData,
  634. ctxt, NULL, XML_FROM_DTD, error,
  635. XML_ERR_ERROR, NULL, 0, (const char *) str1,
  636. (const char *) str2, NULL, 0, 0,
  637. msg, (const char *) str1, (const char *) str2);
  638. ctxt->valid = 0;
  639. } else {
  640. __xmlRaiseError(schannel, NULL, NULL,
  641. ctxt, NULL, XML_FROM_DTD, error,
  642. XML_ERR_ERROR, NULL, 0, (const char *) str1,
  643. (const char *) str2, NULL, 0, 0,
  644. msg, (const char *) str1, (const char *) str2);
  645. }
  646. }
  647. /**
  648. * xmlFatalErrMsgInt:
  649. * @ctxt: an XML parser context
  650. * @error: the error number
  651. * @msg: the error message
  652. * @val: an integer value
  653. *
  654. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  655. */
  656. static void LIBXML_ATTR_FORMAT(3,0)
  657. xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  658. const char *msg, int val)
  659. {
  660. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  661. (ctxt->instate == XML_PARSER_EOF))
  662. return;
  663. if (ctxt != NULL)
  664. ctxt->errNo = error;
  665. __xmlRaiseError(NULL, NULL, NULL,
  666. ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
  667. NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
  668. if (ctxt != NULL) {
  669. ctxt->wellFormed = 0;
  670. if (ctxt->recovery == 0)
  671. ctxt->disableSAX = 1;
  672. }
  673. }
  674. /**
  675. * xmlFatalErrMsgStrIntStr:
  676. * @ctxt: an XML parser context
  677. * @error: the error number
  678. * @msg: the error message
  679. * @str1: an string info
  680. * @val: an integer value
  681. * @str2: an string info
  682. *
  683. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  684. */
  685. static void LIBXML_ATTR_FORMAT(3,0)
  686. xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  687. const char *msg, const xmlChar *str1, int val,
  688. const xmlChar *str2)
  689. {
  690. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  691. (ctxt->instate == XML_PARSER_EOF))
  692. return;
  693. if (ctxt != NULL)
  694. ctxt->errNo = error;
  695. __xmlRaiseError(NULL, NULL, NULL,
  696. ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
  697. NULL, 0, (const char *) str1, (const char *) str2,
  698. NULL, val, 0, msg, str1, val, str2);
  699. if (ctxt != NULL) {
  700. ctxt->wellFormed = 0;
  701. if (ctxt->recovery == 0)
  702. ctxt->disableSAX = 1;
  703. }
  704. }
  705. /**
  706. * xmlFatalErrMsgStr:
  707. * @ctxt: an XML parser context
  708. * @error: the error number
  709. * @msg: the error message
  710. * @val: a string value
  711. *
  712. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  713. */
  714. static void LIBXML_ATTR_FORMAT(3,0)
  715. xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  716. const char *msg, const xmlChar * val)
  717. {
  718. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  719. (ctxt->instate == XML_PARSER_EOF))
  720. return;
  721. if (ctxt != NULL)
  722. ctxt->errNo = error;
  723. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
  724. XML_FROM_PARSER, error, XML_ERR_FATAL,
  725. NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
  726. val);
  727. if (ctxt != NULL) {
  728. ctxt->wellFormed = 0;
  729. if (ctxt->recovery == 0)
  730. ctxt->disableSAX = 1;
  731. }
  732. }
  733. /**
  734. * xmlErrMsgStr:
  735. * @ctxt: an XML parser context
  736. * @error: the error number
  737. * @msg: the error message
  738. * @val: a string value
  739. *
  740. * Handle a non fatal parser error
  741. */
  742. static void LIBXML_ATTR_FORMAT(3,0)
  743. xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  744. const char *msg, const xmlChar * val)
  745. {
  746. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  747. (ctxt->instate == XML_PARSER_EOF))
  748. return;
  749. if (ctxt != NULL)
  750. ctxt->errNo = error;
  751. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
  752. XML_FROM_PARSER, error, XML_ERR_ERROR,
  753. NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
  754. val);
  755. }
  756. /**
  757. * xmlNsErr:
  758. * @ctxt: an XML parser context
  759. * @error: the error number
  760. * @msg: the message
  761. * @info1: extra information string
  762. * @info2: extra information string
  763. *
  764. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  765. */
  766. static void LIBXML_ATTR_FORMAT(3,0)
  767. xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  768. const char *msg,
  769. const xmlChar * info1, const xmlChar * info2,
  770. const xmlChar * info3)
  771. {
  772. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  773. (ctxt->instate == XML_PARSER_EOF))
  774. return;
  775. if (ctxt != NULL)
  776. ctxt->errNo = error;
  777. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
  778. XML_ERR_ERROR, NULL, 0, (const char *) info1,
  779. (const char *) info2, (const char *) info3, 0, 0, msg,
  780. info1, info2, info3);
  781. if (ctxt != NULL)
  782. ctxt->nsWellFormed = 0;
  783. }
  784. /**
  785. * xmlNsWarn
  786. * @ctxt: an XML parser context
  787. * @error: the error number
  788. * @msg: the message
  789. * @info1: extra information string
  790. * @info2: extra information string
  791. *
  792. * Handle a namespace warning error
  793. */
  794. static void LIBXML_ATTR_FORMAT(3,0)
  795. xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  796. const char *msg,
  797. const xmlChar * info1, const xmlChar * info2,
  798. const xmlChar * info3)
  799. {
  800. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  801. (ctxt->instate == XML_PARSER_EOF))
  802. return;
  803. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
  804. XML_ERR_WARNING, NULL, 0, (const char *) info1,
  805. (const char *) info2, (const char *) info3, 0, 0, msg,
  806. info1, info2, info3);
  807. }
  808. /************************************************************************
  809. * *
  810. * Library wide options *
  811. * *
  812. ************************************************************************/
  813. /**
  814. * xmlHasFeature:
  815. * @feature: the feature to be examined
  816. *
  817. * Examines if the library has been compiled with a given feature.
  818. *
  819. * Returns a non-zero value if the feature exist, otherwise zero.
  820. * Returns zero (0) if the feature does not exist or an unknown
  821. * unknown feature is requested, non-zero otherwise.
  822. */
  823. int
  824. xmlHasFeature(xmlFeature feature)
  825. {
  826. switch (feature) {
  827. case XML_WITH_THREAD:
  828. #ifdef LIBXML_THREAD_ENABLED
  829. return(1);
  830. #else
  831. return(0);
  832. #endif
  833. case XML_WITH_TREE:
  834. #ifdef LIBXML_TREE_ENABLED
  835. return(1);
  836. #else
  837. return(0);
  838. #endif
  839. case XML_WITH_OUTPUT:
  840. #ifdef LIBXML_OUTPUT_ENABLED
  841. return(1);
  842. #else
  843. return(0);
  844. #endif
  845. case XML_WITH_PUSH:
  846. #ifdef LIBXML_PUSH_ENABLED
  847. return(1);
  848. #else
  849. return(0);
  850. #endif
  851. case XML_WITH_READER:
  852. #ifdef LIBXML_READER_ENABLED
  853. return(1);
  854. #else
  855. return(0);
  856. #endif
  857. case XML_WITH_PATTERN:
  858. #ifdef LIBXML_PATTERN_ENABLED
  859. return(1);
  860. #else
  861. return(0);
  862. #endif
  863. case XML_WITH_WRITER:
  864. #ifdef LIBXML_WRITER_ENABLED
  865. return(1);
  866. #else
  867. return(0);
  868. #endif
  869. case XML_WITH_SAX1:
  870. #ifdef LIBXML_SAX1_ENABLED
  871. return(1);
  872. #else
  873. return(0);
  874. #endif
  875. case XML_WITH_FTP:
  876. #ifdef LIBXML_FTP_ENABLED
  877. return(1);
  878. #else
  879. return(0);
  880. #endif
  881. case XML_WITH_HTTP:
  882. #ifdef LIBXML_HTTP_ENABLED
  883. return(1);
  884. #else
  885. return(0);
  886. #endif
  887. case XML_WITH_VALID:
  888. #ifdef LIBXML_VALID_ENABLED
  889. return(1);
  890. #else
  891. return(0);
  892. #endif
  893. case XML_WITH_HTML:
  894. #ifdef LIBXML_HTML_ENABLED
  895. return(1);
  896. #else
  897. return(0);
  898. #endif
  899. case XML_WITH_LEGACY:
  900. #ifdef LIBXML_LEGACY_ENABLED
  901. return(1);
  902. #else
  903. return(0);
  904. #endif
  905. case XML_WITH_C14N:
  906. #ifdef LIBXML_C14N_ENABLED
  907. return(1);
  908. #else
  909. return(0);
  910. #endif
  911. case XML_WITH_CATALOG:
  912. #ifdef LIBXML_CATALOG_ENABLED
  913. return(1);
  914. #else
  915. return(0);
  916. #endif
  917. case XML_WITH_XPATH:
  918. #ifdef LIBXML_XPATH_ENABLED
  919. return(1);
  920. #else
  921. return(0);
  922. #endif
  923. case XML_WITH_XPTR:
  924. #ifdef LIBXML_XPTR_ENABLED
  925. return(1);
  926. #else
  927. return(0);
  928. #endif
  929. case XML_WITH_XINCLUDE:
  930. #ifdef LIBXML_XINCLUDE_ENABLED
  931. return(1);
  932. #else
  933. return(0);
  934. #endif
  935. case XML_WITH_ICONV:
  936. #ifdef LIBXML_ICONV_ENABLED
  937. return(1);
  938. #else
  939. return(0);
  940. #endif
  941. case XML_WITH_ISO8859X:
  942. #ifdef LIBXML_ISO8859X_ENABLED
  943. return(1);
  944. #else
  945. return(0);
  946. #endif
  947. case XML_WITH_UNICODE:
  948. #ifdef LIBXML_UNICODE_ENABLED
  949. return(1);
  950. #else
  951. return(0);
  952. #endif
  953. case XML_WITH_REGEXP:
  954. #ifdef LIBXML_REGEXP_ENABLED
  955. return(1);
  956. #else
  957. return(0);
  958. #endif
  959. case XML_WITH_AUTOMATA:
  960. #ifdef LIBXML_AUTOMATA_ENABLED
  961. return(1);
  962. #else
  963. return(0);
  964. #endif
  965. case XML_WITH_EXPR:
  966. #ifdef LIBXML_EXPR_ENABLED
  967. return(1);
  968. #else
  969. return(0);
  970. #endif
  971. case XML_WITH_SCHEMAS:
  972. #ifdef LIBXML_SCHEMAS_ENABLED
  973. return(1);
  974. #else
  975. return(0);
  976. #endif
  977. case XML_WITH_SCHEMATRON:
  978. #ifdef LIBXML_SCHEMATRON_ENABLED
  979. return(1);
  980. #else
  981. return(0);
  982. #endif
  983. case XML_WITH_MODULES:
  984. #ifdef LIBXML_MODULES_ENABLED
  985. return(1);
  986. #else
  987. return(0);
  988. #endif
  989. case XML_WITH_DEBUG:
  990. #ifdef LIBXML_DEBUG_ENABLED
  991. return(1);
  992. #else
  993. return(0);
  994. #endif
  995. case XML_WITH_DEBUG_MEM:
  996. #ifdef DEBUG_MEMORY_LOCATION
  997. return(1);
  998. #else
  999. return(0);
  1000. #endif
  1001. case XML_WITH_DEBUG_RUN:
  1002. #ifdef LIBXML_DEBUG_RUNTIME
  1003. return(1);
  1004. #else
  1005. return(0);
  1006. #endif
  1007. case XML_WITH_ZLIB:
  1008. #ifdef LIBXML_ZLIB_ENABLED
  1009. return(1);
  1010. #else
  1011. return(0);
  1012. #endif
  1013. case XML_WITH_LZMA:
  1014. #ifdef LIBXML_LZMA_ENABLED
  1015. return(1);
  1016. #else
  1017. return(0);
  1018. #endif
  1019. case XML_WITH_ICU:
  1020. #ifdef LIBXML_ICU_ENABLED
  1021. return(1);
  1022. #else
  1023. return(0);
  1024. #endif
  1025. default:
  1026. break;
  1027. }
  1028. return(0);
  1029. }
  1030. /************************************************************************
  1031. * *
  1032. * SAX2 defaulted attributes handling *
  1033. * *
  1034. ************************************************************************/
  1035. /**
  1036. * xmlDetectSAX2:
  1037. * @ctxt: an XML parser context
  1038. *
  1039. * Do the SAX2 detection and specific initialization
  1040. */
  1041. static void
  1042. xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
  1043. xmlSAXHandlerPtr sax;
  1044. if (ctxt == NULL) return;
  1045. sax = ctxt->sax;
  1046. #ifdef LIBXML_SAX1_ENABLED
  1047. if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
  1048. ((sax->startElementNs != NULL) ||
  1049. (sax->endElementNs != NULL) ||
  1050. ((sax->startElement == NULL) && (sax->endElement == NULL))))
  1051. ctxt->sax2 = 1;
  1052. #else
  1053. ctxt->sax2 = 1;
  1054. #endif /* LIBXML_SAX1_ENABLED */
  1055. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  1056. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  1057. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  1058. if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
  1059. (ctxt->str_xml_ns == NULL)) {
  1060. xmlErrMemory(ctxt, NULL);
  1061. }
  1062. }
  1063. typedef struct _xmlDefAttrs xmlDefAttrs;
  1064. typedef xmlDefAttrs *xmlDefAttrsPtr;
  1065. struct _xmlDefAttrs {
  1066. int nbAttrs; /* number of defaulted attributes on that element */
  1067. int maxAttrs; /* the size of the array */
  1068. #if __STDC_VERSION__ >= 199901L
  1069. /* Using a C99 flexible array member avoids UBSan errors. */
  1070. const xmlChar *values[]; /* array of localname/prefix/values/external */
  1071. #else
  1072. const xmlChar *values[5];
  1073. #endif
  1074. };
  1075. /**
  1076. * xmlAttrNormalizeSpace:
  1077. * @src: the source string
  1078. * @dst: the target string
  1079. *
  1080. * Normalize the space in non CDATA attribute values:
  1081. * If the attribute type is not CDATA, then the XML processor MUST further
  1082. * process the normalized attribute value by discarding any leading and
  1083. * trailing space (#x20) characters, and by replacing sequences of space
  1084. * (#x20) characters by a single space (#x20) character.
  1085. * Note that the size of dst need to be at least src, and if one doesn't need
  1086. * to preserve dst (and it doesn't come from a dictionary or read-only) then
  1087. * passing src as dst is just fine.
  1088. *
  1089. * Returns a pointer to the normalized value (dst) or NULL if no conversion
  1090. * is needed.
  1091. */
  1092. static xmlChar *
  1093. xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
  1094. {
  1095. if ((src == NULL) || (dst == NULL))
  1096. return(NULL);
  1097. while (*src == 0x20) src++;
  1098. while (*src != 0) {
  1099. if (*src == 0x20) {
  1100. while (*src == 0x20) src++;
  1101. if (*src != 0)
  1102. *dst++ = 0x20;
  1103. } else {
  1104. *dst++ = *src++;
  1105. }
  1106. }
  1107. *dst = 0;
  1108. if (dst == src)
  1109. return(NULL);
  1110. return(dst);
  1111. }
  1112. /**
  1113. * xmlAttrNormalizeSpace2:
  1114. * @src: the source string
  1115. *
  1116. * Normalize the space in non CDATA attribute values, a slightly more complex
  1117. * front end to avoid allocation problems when running on attribute values
  1118. * coming from the input.
  1119. *
  1120. * Returns a pointer to the normalized value (dst) or NULL if no conversion
  1121. * is needed.
  1122. */
  1123. static const xmlChar *
  1124. xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
  1125. {
  1126. int i;
  1127. int remove_head = 0;
  1128. int need_realloc = 0;
  1129. const xmlChar *cur;
  1130. if ((ctxt == NULL) || (src == NULL) || (len == NULL))
  1131. return(NULL);
  1132. i = *len;
  1133. if (i <= 0)
  1134. return(NULL);
  1135. cur = src;
  1136. while (*cur == 0x20) {
  1137. cur++;
  1138. remove_head++;
  1139. }
  1140. while (*cur != 0) {
  1141. if (*cur == 0x20) {
  1142. cur++;
  1143. if ((*cur == 0x20) || (*cur == 0)) {
  1144. need_realloc = 1;
  1145. break;
  1146. }
  1147. } else
  1148. cur++;
  1149. }
  1150. if (need_realloc) {
  1151. xmlChar *ret;
  1152. ret = xmlStrndup(src + remove_head, i - remove_head + 1);
  1153. if (ret == NULL) {
  1154. xmlErrMemory(ctxt, NULL);
  1155. return(NULL);
  1156. }
  1157. xmlAttrNormalizeSpace(ret, ret);
  1158. *len = (int) strlen((const char *)ret);
  1159. return(ret);
  1160. } else if (remove_head) {
  1161. *len -= remove_head;
  1162. memmove(src, src + remove_head, 1 + *len);
  1163. return(src);
  1164. }
  1165. return(NULL);
  1166. }
  1167. /**
  1168. * xmlAddDefAttrs:
  1169. * @ctxt: an XML parser context
  1170. * @fullname: the element fullname
  1171. * @fullattr: the attribute fullname
  1172. * @value: the attribute value
  1173. *
  1174. * Add a defaulted attribute for an element
  1175. */
  1176. static void
  1177. xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
  1178. const xmlChar *fullname,
  1179. const xmlChar *fullattr,
  1180. const xmlChar *value) {
  1181. xmlDefAttrsPtr defaults;
  1182. int len;
  1183. const xmlChar *name;
  1184. const xmlChar *prefix;
  1185. /*
  1186. * Allows to detect attribute redefinitions
  1187. */
  1188. if (ctxt->attsSpecial != NULL) {
  1189. if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
  1190. return;
  1191. }
  1192. if (ctxt->attsDefault == NULL) {
  1193. ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
  1194. if (ctxt->attsDefault == NULL)
  1195. goto mem_error;
  1196. }
  1197. /*
  1198. * split the element name into prefix:localname , the string found
  1199. * are within the DTD and then not associated to namespace names.
  1200. */
  1201. name = xmlSplitQName3(fullname, &len);
  1202. if (name == NULL) {
  1203. name = xmlDictLookup(ctxt->dict, fullname, -1);
  1204. prefix = NULL;
  1205. } else {
  1206. name = xmlDictLookup(ctxt->dict, name, -1);
  1207. prefix = xmlDictLookup(ctxt->dict, fullname, len);
  1208. }
  1209. /*
  1210. * make sure there is some storage
  1211. */
  1212. defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
  1213. if (defaults == NULL) {
  1214. defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
  1215. (4 * 5) * sizeof(const xmlChar *));
  1216. if (defaults == NULL)
  1217. goto mem_error;
  1218. defaults->nbAttrs = 0;
  1219. defaults->maxAttrs = 4;
  1220. if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
  1221. defaults, NULL) < 0) {
  1222. xmlFree(defaults);
  1223. goto mem_error;
  1224. }
  1225. } else if (defaults->nbAttrs >= defaults->maxAttrs) {
  1226. xmlDefAttrsPtr temp;
  1227. temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
  1228. (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
  1229. if (temp == NULL)
  1230. goto mem_error;
  1231. defaults = temp;
  1232. defaults->maxAttrs *= 2;
  1233. if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
  1234. defaults, NULL) < 0) {
  1235. xmlFree(defaults);
  1236. goto mem_error;
  1237. }
  1238. }
  1239. /*
  1240. * Split the element name into prefix:localname , the string found
  1241. * are within the DTD and hen not associated to namespace names.
  1242. */
  1243. name = xmlSplitQName3(fullattr, &len);
  1244. if (name == NULL) {
  1245. name = xmlDictLookup(ctxt->dict, fullattr, -1);
  1246. prefix = NULL;
  1247. } else {
  1248. name = xmlDictLookup(ctxt->dict, name, -1);
  1249. prefix = xmlDictLookup(ctxt->dict, fullattr, len);
  1250. }
  1251. defaults->values[5 * defaults->nbAttrs] = name;
  1252. defaults->values[5 * defaults->nbAttrs + 1] = prefix;
  1253. /* intern the string and precompute the end */
  1254. len = xmlStrlen(value);
  1255. value = xmlDictLookup(ctxt->dict, value, len);
  1256. defaults->values[5 * defaults->nbAttrs + 2] = value;
  1257. defaults->values[5 * defaults->nbAttrs + 3] = value + len;
  1258. if (ctxt->external)
  1259. defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
  1260. else
  1261. defaults->values[5 * defaults->nbAttrs + 4] = NULL;
  1262. defaults->nbAttrs++;
  1263. return;
  1264. mem_error:
  1265. xmlErrMemory(ctxt, NULL);
  1266. return;
  1267. }
  1268. /**
  1269. * xmlAddSpecialAttr:
  1270. * @ctxt: an XML parser context
  1271. * @fullname: the element fullname
  1272. * @fullattr: the attribute fullname
  1273. * @type: the attribute type
  1274. *
  1275. * Register this attribute type
  1276. */
  1277. static void
  1278. xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
  1279. const xmlChar *fullname,
  1280. const xmlChar *fullattr,
  1281. int type)
  1282. {
  1283. if (ctxt->attsSpecial == NULL) {
  1284. ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
  1285. if (ctxt->attsSpecial == NULL)
  1286. goto mem_error;
  1287. }
  1288. if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
  1289. return;
  1290. xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
  1291. (void *) (ptrdiff_t) type);
  1292. return;
  1293. mem_error:
  1294. xmlErrMemory(ctxt, NULL);
  1295. return;
  1296. }
  1297. /**
  1298. * xmlCleanSpecialAttrCallback:
  1299. *
  1300. * Removes CDATA attributes from the special attribute table
  1301. */
  1302. static void
  1303. xmlCleanSpecialAttrCallback(void *payload, void *data,
  1304. const xmlChar *fullname, const xmlChar *fullattr,
  1305. const xmlChar *unused ATTRIBUTE_UNUSED) {
  1306. xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
  1307. if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
  1308. xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
  1309. }
  1310. }
  1311. /**
  1312. * xmlCleanSpecialAttr:
  1313. * @ctxt: an XML parser context
  1314. *
  1315. * Trim the list of attributes defined to remove all those of type
  1316. * CDATA as they are not special. This call should be done when finishing
  1317. * to parse the DTD and before starting to parse the document root.
  1318. */
  1319. static void
  1320. xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
  1321. {
  1322. if (ctxt->attsSpecial == NULL)
  1323. return;
  1324. xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
  1325. if (xmlHashSize(ctxt->attsSpecial) == 0) {
  1326. xmlHashFree(ctxt->attsSpecial, NULL);
  1327. ctxt->attsSpecial = NULL;
  1328. }
  1329. return;
  1330. }
  1331. /**
  1332. * xmlCheckLanguageID:
  1333. * @lang: pointer to the string value
  1334. *
  1335. * Checks that the value conforms to the LanguageID production:
  1336. *
  1337. * NOTE: this is somewhat deprecated, those productions were removed from
  1338. * the XML Second edition.
  1339. *
  1340. * [33] LanguageID ::= Langcode ('-' Subcode)*
  1341. * [34] Langcode ::= ISO639Code | IanaCode | UserCode
  1342. * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
  1343. * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
  1344. * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
  1345. * [38] Subcode ::= ([a-z] | [A-Z])+
  1346. *
  1347. * The current REC reference the successors of RFC 1766, currently 5646
  1348. *
  1349. * http://www.rfc-editor.org/rfc/rfc5646.txt
  1350. * langtag = language
  1351. * ["-" script]
  1352. * ["-" region]
  1353. * *("-" variant)
  1354. * *("-" extension)
  1355. * ["-" privateuse]
  1356. * language = 2*3ALPHA ; shortest ISO 639 code
  1357. * ["-" extlang] ; sometimes followed by
  1358. * ; extended language subtags
  1359. * / 4ALPHA ; or reserved for future use
  1360. * / 5*8ALPHA ; or registered language subtag
  1361. *
  1362. * extlang = 3ALPHA ; selected ISO 639 codes
  1363. * *2("-" 3ALPHA) ; permanently reserved
  1364. *
  1365. * script = 4ALPHA ; ISO 15924 code
  1366. *
  1367. * region = 2ALPHA ; ISO 3166-1 code
  1368. * / 3DIGIT ; UN M.49 code
  1369. *
  1370. * variant = 5*8alphanum ; registered variants
  1371. * / (DIGIT 3alphanum)
  1372. *
  1373. * extension = singleton 1*("-" (2*8alphanum))
  1374. *
  1375. * ; Single alphanumerics
  1376. * ; "x" reserved for private use
  1377. * singleton = DIGIT ; 0 - 9
  1378. * / %x41-57 ; A - W
  1379. * / %x59-5A ; Y - Z
  1380. * / %x61-77 ; a - w
  1381. * / %x79-7A ; y - z
  1382. *
  1383. * it sounds right to still allow Irregular i-xxx IANA and user codes too
  1384. * The parser below doesn't try to cope with extension or privateuse
  1385. * that could be added but that's not interoperable anyway
  1386. *
  1387. * Returns 1 if correct 0 otherwise
  1388. **/
  1389. int
  1390. xmlCheckLanguageID(const xmlChar * lang)
  1391. {
  1392. const xmlChar *cur = lang, *nxt;
  1393. if (cur == NULL)
  1394. return (0);
  1395. if (((cur[0] == 'i') && (cur[1] == '-')) ||
  1396. ((cur[0] == 'I') && (cur[1] == '-')) ||
  1397. ((cur[0] == 'x') && (cur[1] == '-')) ||
  1398. ((cur[0] == 'X') && (cur[1] == '-'))) {
  1399. /*
  1400. * Still allow IANA code and user code which were coming
  1401. * from the previous version of the XML-1.0 specification
  1402. * it's deprecated but we should not fail
  1403. */
  1404. cur += 2;
  1405. while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
  1406. ((cur[0] >= 'a') && (cur[0] <= 'z')))
  1407. cur++;
  1408. return(cur[0] == 0);
  1409. }
  1410. nxt = cur;
  1411. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1412. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1413. nxt++;
  1414. if (nxt - cur >= 4) {
  1415. /*
  1416. * Reserved
  1417. */
  1418. if ((nxt - cur > 8) || (nxt[0] != 0))
  1419. return(0);
  1420. return(1);
  1421. }
  1422. if (nxt - cur < 2)
  1423. return(0);
  1424. /* we got an ISO 639 code */
  1425. if (nxt[0] == 0)
  1426. return(1);
  1427. if (nxt[0] != '-')
  1428. return(0);
  1429. nxt++;
  1430. cur = nxt;
  1431. /* now we can have extlang or script or region or variant */
  1432. if ((nxt[0] >= '0') && (nxt[0] <= '9'))
  1433. goto region_m49;
  1434. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1435. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1436. nxt++;
  1437. if (nxt - cur == 4)
  1438. goto script;
  1439. if (nxt - cur == 2)
  1440. goto region;
  1441. if ((nxt - cur >= 5) && (nxt - cur <= 8))
  1442. goto variant;
  1443. if (nxt - cur != 3)
  1444. return(0);
  1445. /* we parsed an extlang */
  1446. if (nxt[0] == 0)
  1447. return(1);
  1448. if (nxt[0] != '-')
  1449. return(0);
  1450. nxt++;
  1451. cur = nxt;
  1452. /* now we can have script or region or variant */
  1453. if ((nxt[0] >= '0') && (nxt[0] <= '9'))
  1454. goto region_m49;
  1455. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1456. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1457. nxt++;
  1458. if (nxt - cur == 2)
  1459. goto region;
  1460. if ((nxt - cur >= 5) && (nxt - cur <= 8))
  1461. goto variant;
  1462. if (nxt - cur != 4)
  1463. return(0);
  1464. /* we parsed a script */
  1465. script:
  1466. if (nxt[0] == 0)
  1467. return(1);
  1468. if (nxt[0] != '-')
  1469. return(0);
  1470. nxt++;
  1471. cur = nxt;
  1472. /* now we can have region or variant */
  1473. if ((nxt[0] >= '0') && (nxt[0] <= '9'))
  1474. goto region_m49;
  1475. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1476. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1477. nxt++;
  1478. if ((nxt - cur >= 5) && (nxt - cur <= 8))
  1479. goto variant;
  1480. if (nxt - cur != 2)
  1481. return(0);
  1482. /* we parsed a region */
  1483. region:
  1484. if (nxt[0] == 0)
  1485. return(1);
  1486. if (nxt[0] != '-')
  1487. return(0);
  1488. nxt++;
  1489. cur = nxt;
  1490. /* now we can just have a variant */
  1491. while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
  1492. ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
  1493. nxt++;
  1494. if ((nxt - cur < 5) || (nxt - cur > 8))
  1495. return(0);
  1496. /* we parsed a variant */
  1497. variant:
  1498. if (nxt[0] == 0)
  1499. return(1);
  1500. if (nxt[0] != '-')
  1501. return(0);
  1502. /* extensions and private use subtags not checked */
  1503. return (1);
  1504. region_m49:
  1505. if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
  1506. ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
  1507. nxt += 3;
  1508. goto region;
  1509. }
  1510. return(0);
  1511. }
  1512. /************************************************************************
  1513. * *
  1514. * Parser stacks related functions and macros *
  1515. * *
  1516. ************************************************************************/
  1517. static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
  1518. const xmlChar ** str);
  1519. #ifdef SAX2
  1520. /**
  1521. * nsPush:
  1522. * @ctxt: an XML parser context
  1523. * @prefix: the namespace prefix or NULL
  1524. * @URL: the namespace name
  1525. *
  1526. * Pushes a new parser namespace on top of the ns stack
  1527. *
  1528. * Returns -1 in case of error, -2 if the namespace should be discarded
  1529. * and the index in the stack otherwise.
  1530. */
  1531. static int
  1532. nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
  1533. {
  1534. if (ctxt->options & XML_PARSE_NSCLEAN) {
  1535. int i;
  1536. for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
  1537. if (ctxt->nsTab[i] == prefix) {
  1538. /* in scope */
  1539. if (ctxt->nsTab[i + 1] == URL)
  1540. return(-2);
  1541. /* out of scope keep it */
  1542. break;
  1543. }
  1544. }
  1545. }
  1546. if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
  1547. ctxt->nsMax = 10;
  1548. ctxt->nsNr = 0;
  1549. ctxt->nsTab = (const xmlChar **)
  1550. xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
  1551. if (ctxt->nsTab == NULL) {
  1552. xmlErrMemory(ctxt, NULL);
  1553. ctxt->nsMax = 0;
  1554. return (-1);
  1555. }
  1556. } else if (ctxt->nsNr >= ctxt->nsMax) {
  1557. const xmlChar ** tmp;
  1558. ctxt->nsMax *= 2;
  1559. tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
  1560. ctxt->nsMax * sizeof(ctxt->nsTab[0]));
  1561. if (tmp == NULL) {
  1562. xmlErrMemory(ctxt, NULL);
  1563. ctxt->nsMax /= 2;
  1564. return (-1);
  1565. }
  1566. ctxt->nsTab = tmp;
  1567. }
  1568. ctxt->nsTab[ctxt->nsNr++] = prefix;
  1569. ctxt->nsTab[ctxt->nsNr++] = URL;
  1570. return (ctxt->nsNr);
  1571. }
  1572. /**
  1573. * nsPop:
  1574. * @ctxt: an XML parser context
  1575. * @nr: the number to pop
  1576. *
  1577. * Pops the top @nr parser prefix/namespace from the ns stack
  1578. *
  1579. * Returns the number of namespaces removed
  1580. */
  1581. static int
  1582. nsPop(xmlParserCtxtPtr ctxt, int nr)
  1583. {
  1584. int i;
  1585. if (ctxt->nsTab == NULL) return(0);
  1586. if (ctxt->nsNr < nr) {
  1587. xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
  1588. nr = ctxt->nsNr;
  1589. }
  1590. if (ctxt->nsNr <= 0)
  1591. return (0);
  1592. for (i = 0;i < nr;i++) {
  1593. ctxt->nsNr--;
  1594. ctxt->nsTab[ctxt->nsNr] = NULL;
  1595. }
  1596. return(nr);
  1597. }
  1598. #endif
  1599. static int
  1600. xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
  1601. const xmlChar **atts;
  1602. int *attallocs;
  1603. int maxatts;
  1604. if (ctxt->atts == NULL) {
  1605. maxatts = 55; /* allow for 10 attrs by default */
  1606. atts = (const xmlChar **)
  1607. xmlMalloc(maxatts * sizeof(xmlChar *));
  1608. if (atts == NULL) goto mem_error;
  1609. ctxt->atts = atts;
  1610. attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
  1611. if (attallocs == NULL) goto mem_error;
  1612. ctxt->attallocs = attallocs;
  1613. ctxt->maxatts = maxatts;
  1614. } else if (nr + 5 > ctxt->maxatts) {
  1615. maxatts = (nr + 5) * 2;
  1616. atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
  1617. maxatts * sizeof(const xmlChar *));
  1618. if (atts == NULL) goto mem_error;
  1619. ctxt->atts = atts;
  1620. attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
  1621. (maxatts / 5) * sizeof(int));
  1622. if (attallocs == NULL) goto mem_error;
  1623. ctxt->attallocs = attallocs;
  1624. ctxt->maxatts = maxatts;
  1625. }
  1626. return(ctxt->maxatts);
  1627. mem_error:
  1628. xmlErrMemory(ctxt, NULL);
  1629. return(-1);
  1630. }
  1631. /**
  1632. * inputPush:
  1633. * @ctxt: an XML parser context
  1634. * @value: the parser input
  1635. *
  1636. * Pushes a new parser input on top of the input stack
  1637. *
  1638. * Returns -1 in case of error, the index in the stack otherwise
  1639. */
  1640. int
  1641. inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
  1642. {
  1643. if ((ctxt == NULL) || (value == NULL))
  1644. return(-1);
  1645. if (ctxt->inputNr >= ctxt->inputMax) {
  1646. ctxt->inputMax *= 2;
  1647. ctxt->inputTab =
  1648. (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
  1649. ctxt->inputMax *
  1650. sizeof(ctxt->inputTab[0]));
  1651. if (ctxt->inputTab == NULL) {
  1652. xmlErrMemory(ctxt, NULL);
  1653. xmlFreeInputStream(value);
  1654. ctxt->inputMax /= 2;
  1655. value = NULL;
  1656. return (-1);
  1657. }
  1658. }
  1659. ctxt->inputTab[ctxt->inputNr] = value;
  1660. ctxt->input = value;
  1661. return (ctxt->inputNr++);
  1662. }
  1663. /**
  1664. * inputPop:
  1665. * @ctxt: an XML parser context
  1666. *
  1667. * Pops the top parser input from the input stack
  1668. *
  1669. * Returns the input just removed
  1670. */
  1671. xmlParserInputPtr
  1672. inputPop(xmlParserCtxtPtr ctxt)
  1673. {
  1674. xmlParserInputPtr ret;
  1675. if (ctxt == NULL)
  1676. return(NULL);
  1677. if (ctxt->inputNr <= 0)
  1678. return (NULL);
  1679. ctxt->inputNr--;
  1680. if (ctxt->inputNr > 0)
  1681. ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
  1682. else
  1683. ctxt->input = NULL;
  1684. ret = ctxt->inputTab[ctxt->inputNr];
  1685. ctxt->inputTab[ctxt->inputNr] = NULL;
  1686. return (ret);
  1687. }
  1688. /**
  1689. * nodePush:
  1690. * @ctxt: an XML parser context
  1691. * @value: the element node
  1692. *
  1693. * Pushes a new element node on top of the node stack
  1694. *
  1695. * Returns -1 in case of error, the index in the stack otherwise
  1696. */
  1697. int
  1698. nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
  1699. {
  1700. if (ctxt == NULL) return(0);
  1701. if (ctxt->nodeNr >= ctxt->nodeMax) {
  1702. xmlNodePtr *tmp;
  1703. tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
  1704. ctxt->nodeMax * 2 *
  1705. sizeof(ctxt->nodeTab[0]));
  1706. if (tmp == NULL) {
  1707. xmlErrMemory(ctxt, NULL);
  1708. return (-1);
  1709. }
  1710. ctxt->nodeTab = tmp;
  1711. ctxt->nodeMax *= 2;
  1712. }
  1713. if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
  1714. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  1715. xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
  1716. "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
  1717. xmlParserMaxDepth);
  1718. xmlHaltParser(ctxt);
  1719. return(-1);
  1720. }
  1721. ctxt->nodeTab[ctxt->nodeNr] = value;
  1722. ctxt->node = value;
  1723. return (ctxt->nodeNr++);
  1724. }
  1725. /**
  1726. * nodePop:
  1727. * @ctxt: an XML parser context
  1728. *
  1729. * Pops the top element node from the node stack
  1730. *
  1731. * Returns the node just removed
  1732. */
  1733. xmlNodePtr
  1734. nodePop(xmlParserCtxtPtr ctxt)
  1735. {
  1736. xmlNodePtr ret;
  1737. if (ctxt == NULL) return(NULL);
  1738. if (ctxt->nodeNr <= 0)
  1739. return (NULL);
  1740. ctxt->nodeNr--;
  1741. if (ctxt->nodeNr > 0)
  1742. ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
  1743. else
  1744. ctxt->node = NULL;
  1745. ret = ctxt->nodeTab[ctxt->nodeNr];
  1746. ctxt->nodeTab[ctxt->nodeNr] = NULL;
  1747. return (ret);
  1748. }
  1749. /**
  1750. * nameNsPush:
  1751. * @ctxt: an XML parser context
  1752. * @value: the element name
  1753. * @prefix: the element prefix
  1754. * @URI: the element namespace name
  1755. * @line: the current line number for error messages
  1756. * @nsNr: the number of namespaces pushed on the namespace table
  1757. *
  1758. * Pushes a new element name/prefix/URL on top of the name stack
  1759. *
  1760. * Returns -1 in case of error, the index in the stack otherwise
  1761. */
  1762. static int
  1763. nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
  1764. const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
  1765. {
  1766. xmlStartTag *tag;
  1767. if (ctxt->nameNr >= ctxt->nameMax) {
  1768. const xmlChar * *tmp;
  1769. xmlStartTag *tmp2;
  1770. ctxt->nameMax *= 2;
  1771. tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
  1772. ctxt->nameMax *
  1773. sizeof(ctxt->nameTab[0]));
  1774. if (tmp == NULL) {
  1775. ctxt->nameMax /= 2;
  1776. goto mem_error;
  1777. }
  1778. ctxt->nameTab = tmp;
  1779. tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
  1780. ctxt->nameMax *
  1781. sizeof(ctxt->pushTab[0]));
  1782. if (tmp2 == NULL) {
  1783. ctxt->nameMax /= 2;
  1784. goto mem_error;
  1785. }
  1786. ctxt->pushTab = tmp2;
  1787. } else if (ctxt->pushTab == NULL) {
  1788. ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
  1789. sizeof(ctxt->pushTab[0]));
  1790. if (ctxt->pushTab == NULL)
  1791. goto mem_error;
  1792. }
  1793. ctxt->nameTab[ctxt->nameNr] = value;
  1794. ctxt->name = value;
  1795. tag = &ctxt->pushTab[ctxt->nameNr];
  1796. tag->prefix = prefix;
  1797. tag->URI = URI;
  1798. tag->line = line;
  1799. tag->nsNr = nsNr;
  1800. return (ctxt->nameNr++);
  1801. mem_error:
  1802. xmlErrMemory(ctxt, NULL);
  1803. return (-1);
  1804. }
  1805. #ifdef LIBXML_PUSH_ENABLED
  1806. /**
  1807. * nameNsPop:
  1808. * @ctxt: an XML parser context
  1809. *
  1810. * Pops the top element/prefix/URI name from the name stack
  1811. *
  1812. * Returns the name just removed
  1813. */
  1814. static const xmlChar *
  1815. nameNsPop(xmlParserCtxtPtr ctxt)
  1816. {
  1817. const xmlChar *ret;
  1818. if (ctxt->nameNr <= 0)
  1819. return (NULL);
  1820. ctxt->nameNr--;
  1821. if (ctxt->nameNr > 0)
  1822. ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
  1823. else
  1824. ctxt->name = NULL;
  1825. ret = ctxt->nameTab[ctxt->nameNr];
  1826. ctxt->nameTab[ctxt->nameNr] = NULL;
  1827. return (ret);
  1828. }
  1829. #endif /* LIBXML_PUSH_ENABLED */
  1830. /**
  1831. * namePush:
  1832. * @ctxt: an XML parser context
  1833. * @value: the element name
  1834. *
  1835. * Pushes a new element name on top of the name stack
  1836. *
  1837. * Returns -1 in case of error, the index in the stack otherwise
  1838. */
  1839. int
  1840. namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
  1841. {
  1842. if (ctxt == NULL) return (-1);
  1843. if (ctxt->nameNr >= ctxt->nameMax) {
  1844. const xmlChar * *tmp;
  1845. tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
  1846. ctxt->nameMax * 2 *
  1847. sizeof(ctxt->nameTab[0]));
  1848. if (tmp == NULL) {
  1849. goto mem_error;
  1850. }
  1851. ctxt->nameTab = tmp;
  1852. ctxt->nameMax *= 2;
  1853. }
  1854. ctxt->nameTab[ctxt->nameNr] = value;
  1855. ctxt->name = value;
  1856. return (ctxt->nameNr++);
  1857. mem_error:
  1858. xmlErrMemory(ctxt, NULL);
  1859. return (-1);
  1860. }
  1861. /**
  1862. * namePop:
  1863. * @ctxt: an XML parser context
  1864. *
  1865. * Pops the top element name from the name stack
  1866. *
  1867. * Returns the name just removed
  1868. */
  1869. const xmlChar *
  1870. namePop(xmlParserCtxtPtr ctxt)
  1871. {
  1872. const xmlChar *ret;
  1873. if ((ctxt == NULL) || (ctxt->nameNr <= 0))
  1874. return (NULL);
  1875. ctxt->nameNr--;
  1876. if (ctxt->nameNr > 0)
  1877. ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
  1878. else
  1879. ctxt->name = NULL;
  1880. ret = ctxt->nameTab[ctxt->nameNr];
  1881. ctxt->nameTab[ctxt->nameNr] = NULL;
  1882. return (ret);
  1883. }
  1884. static int spacePush(xmlParserCtxtPtr ctxt, int val) {
  1885. if (ctxt->spaceNr >= ctxt->spaceMax) {
  1886. int *tmp;
  1887. ctxt->spaceMax *= 2;
  1888. tmp = (int *) xmlRealloc(ctxt->spaceTab,
  1889. ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
  1890. if (tmp == NULL) {
  1891. xmlErrMemory(ctxt, NULL);
  1892. ctxt->spaceMax /=2;
  1893. return(-1);
  1894. }
  1895. ctxt->spaceTab = tmp;
  1896. }
  1897. ctxt->spaceTab[ctxt->spaceNr] = val;
  1898. ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
  1899. return(ctxt->spaceNr++);
  1900. }
  1901. static int spacePop(xmlParserCtxtPtr ctxt) {
  1902. int ret;
  1903. if (ctxt->spaceNr <= 0) return(0);
  1904. ctxt->spaceNr--;
  1905. if (ctxt->spaceNr > 0)
  1906. ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
  1907. else
  1908. ctxt->space = &ctxt->spaceTab[0];
  1909. ret = ctxt->spaceTab[ctxt->spaceNr];
  1910. ctxt->spaceTab[ctxt->spaceNr] = -1;
  1911. return(ret);
  1912. }
  1913. /*
  1914. * Macros for accessing the content. Those should be used only by the parser,
  1915. * and not exported.
  1916. *
  1917. * Dirty macros, i.e. one often need to make assumption on the context to
  1918. * use them
  1919. *
  1920. * CUR_PTR return the current pointer to the xmlChar to be parsed.
  1921. * To be used with extreme caution since operations consuming
  1922. * characters may move the input buffer to a different location !
  1923. * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
  1924. * This should be used internally by the parser
  1925. * only to compare to ASCII values otherwise it would break when
  1926. * running with UTF-8 encoding.
  1927. * RAW same as CUR but in the input buffer, bypass any token
  1928. * extraction that may have been done
  1929. * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
  1930. * to compare on ASCII based substring.
  1931. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
  1932. * strings without newlines within the parser.
  1933. * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
  1934. * defined char within the parser.
  1935. * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
  1936. *
  1937. * NEXT Skip to the next character, this does the proper decoding
  1938. * in UTF-8 mode. It also pop-up unfinished entities on the fly.
  1939. * NEXTL(l) Skip the current unicode character of l xmlChars long.
  1940. * CUR_CHAR(l) returns the current unicode character (int), set l
  1941. * to the number of xmlChars used for the encoding [0-5].
  1942. * CUR_SCHAR same but operate on a string instead of the context
  1943. * COPY_BUF copy the current unicode char to the target buffer, increment
  1944. * the index
  1945. * GROW, SHRINK handling of input buffers
  1946. */
  1947. #define RAW (*ctxt->input->cur)
  1948. #define CUR (*ctxt->input->cur)
  1949. #define NXT(val) ctxt->input->cur[(val)]
  1950. #define CUR_PTR ctxt->input->cur
  1951. #define BASE_PTR ctxt->input->base
  1952. #define CMP4( s, c1, c2, c3, c4 ) \
  1953. ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
  1954. ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
  1955. #define CMP5( s, c1, c2, c3, c4, c5 ) \
  1956. ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
  1957. #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
  1958. ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
  1959. #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
  1960. ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
  1961. #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
  1962. ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
  1963. #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
  1964. ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
  1965. ((unsigned char *) s)[ 8 ] == c9 )
  1966. #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
  1967. ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
  1968. ((unsigned char *) s)[ 9 ] == c10 )
  1969. #define SKIP(val) do { \
  1970. ctxt->input->cur += (val),ctxt->input->col+=(val); \
  1971. if (*ctxt->input->cur == 0) \
  1972. xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
  1973. } while (0)
  1974. #define SKIPL(val) do { \
  1975. int skipl; \
  1976. for(skipl=0; skipl<val; skipl++) { \
  1977. if (*(ctxt->input->cur) == '\n') { \
  1978. ctxt->input->line++; ctxt->input->col = 1; \
  1979. } else ctxt->input->col++; \
  1980. ctxt->input->cur++; \
  1981. } \
  1982. if (*ctxt->input->cur == 0) \
  1983. xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
  1984. } while (0)
  1985. #define SHRINK if ((ctxt->progressive == 0) && \
  1986. (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
  1987. (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
  1988. xmlSHRINK (ctxt);
  1989. static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
  1990. xmlParserInputShrink(ctxt->input);
  1991. if (*ctxt->input->cur == 0)
  1992. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  1993. }
  1994. #define GROW if ((ctxt->progressive == 0) && \
  1995. (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
  1996. xmlGROW (ctxt);
  1997. static void xmlGROW (xmlParserCtxtPtr ctxt) {
  1998. ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
  1999. ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
  2000. if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
  2001. (curBase > XML_MAX_LOOKUP_LIMIT)) &&
  2002. ((ctxt->input->buf) &&
  2003. (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
  2004. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  2005. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
  2006. xmlHaltParser(ctxt);
  2007. return;
  2008. }
  2009. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  2010. if ((ctxt->input->cur > ctxt->input->end) ||
  2011. (ctxt->input->cur < ctxt->input->base)) {
  2012. xmlHaltParser(ctxt);
  2013. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
  2014. return;
  2015. }
  2016. if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
  2017. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  2018. }
  2019. #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
  2020. #define NEXT xmlNextChar(ctxt)
  2021. #define NEXT1 { \
  2022. ctxt->input->col++; \
  2023. ctxt->input->cur++; \
  2024. if (*ctxt->input->cur == 0) \
  2025. xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
  2026. }
  2027. #define NEXTL(l) do { \
  2028. if (*(ctxt->input->cur) == '\n') { \
  2029. ctxt->input->line++; ctxt->input->col = 1; \
  2030. } else ctxt->input->col++; \
  2031. ctxt->input->cur += l; \
  2032. } while (0)
  2033. #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
  2034. #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
  2035. #define COPY_BUF(l,b,i,v) \
  2036. if (l == 1) b[i++] = (xmlChar) v; \
  2037. else i += xmlCopyCharMultiByte(&b[i],v)
  2038. /**
  2039. * xmlSkipBlankChars:
  2040. * @ctxt: the XML parser context
  2041. *
  2042. * skip all blanks character found at that point in the input streams.
  2043. * It pops up finished entities in the process if allowable at that point.
  2044. *
  2045. * Returns the number of space chars skipped
  2046. */
  2047. int
  2048. xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
  2049. int res = 0;
  2050. /*
  2051. * It's Okay to use CUR/NEXT here since all the blanks are on
  2052. * the ASCII range.
  2053. */
  2054. if (ctxt->instate != XML_PARSER_DTD) {
  2055. const xmlChar *cur;
  2056. /*
  2057. * if we are in the document content, go really fast
  2058. */
  2059. cur = ctxt->input->cur;
  2060. while (IS_BLANK_CH(*cur)) {
  2061. if (*cur == '\n') {
  2062. ctxt->input->line++; ctxt->input->col = 1;
  2063. } else {
  2064. ctxt->input->col++;
  2065. }
  2066. cur++;
  2067. res++;
  2068. if (*cur == 0) {
  2069. ctxt->input->cur = cur;
  2070. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  2071. cur = ctxt->input->cur;
  2072. }
  2073. }
  2074. ctxt->input->cur = cur;
  2075. } else {
  2076. int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
  2077. while (1) {
  2078. if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
  2079. NEXT;
  2080. } else if (CUR == '%') {
  2081. /*
  2082. * Need to handle support of entities branching here
  2083. */
  2084. if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
  2085. break;
  2086. xmlParsePEReference(ctxt);
  2087. } else if (CUR == 0) {
  2088. if (ctxt->inputNr <= 1)
  2089. break;
  2090. xmlPopInput(ctxt);
  2091. } else {
  2092. break;
  2093. }
  2094. /*
  2095. * Also increase the counter when entering or exiting a PERef.
  2096. * The spec says: "When a parameter-entity reference is recognized
  2097. * in the DTD and included, its replacement text MUST be enlarged
  2098. * by the attachment of one leading and one following space (#x20)
  2099. * character."
  2100. */
  2101. res++;
  2102. }
  2103. }
  2104. return(res);
  2105. }
  2106. /************************************************************************
  2107. * *
  2108. * Commodity functions to handle entities *
  2109. * *
  2110. ************************************************************************/
  2111. /**
  2112. * xmlPopInput:
  2113. * @ctxt: an XML parser context
  2114. *
  2115. * xmlPopInput: the current input pointed by ctxt->input came to an end
  2116. * pop it and return the next char.
  2117. *
  2118. * Returns the current xmlChar in the parser context
  2119. */
  2120. xmlChar
  2121. xmlPopInput(xmlParserCtxtPtr ctxt) {
  2122. if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
  2123. if (xmlParserDebugEntities)
  2124. xmlGenericError(xmlGenericErrorContext,
  2125. "Popping input %d\n", ctxt->inputNr);
  2126. if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
  2127. (ctxt->instate != XML_PARSER_EOF))
  2128. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  2129. "Unfinished entity outside the DTD");
  2130. xmlFreeInputStream(inputPop(ctxt));
  2131. if (*ctxt->input->cur == 0)
  2132. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  2133. return(CUR);
  2134. }
  2135. /**
  2136. * xmlPushInput:
  2137. * @ctxt: an XML parser context
  2138. * @input: an XML parser input fragment (entity, XML fragment ...).
  2139. *
  2140. * xmlPushInput: switch to a new input stream which is stacked on top
  2141. * of the previous one(s).
  2142. * Returns -1 in case of error or the index in the input stack
  2143. */
  2144. int
  2145. xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
  2146. int ret;
  2147. if (input == NULL) return(-1);
  2148. if (xmlParserDebugEntities) {
  2149. if ((ctxt->input != NULL) && (ctxt->input->filename))
  2150. xmlGenericError(xmlGenericErrorContext,
  2151. "%s(%d): ", ctxt->input->filename,
  2152. ctxt->input->line);
  2153. xmlGenericError(xmlGenericErrorContext,
  2154. "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
  2155. }
  2156. if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
  2157. (ctxt->inputNr > 1024)) {
  2158. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  2159. while (ctxt->inputNr > 1)
  2160. xmlFreeInputStream(inputPop(ctxt));
  2161. return(-1);
  2162. }
  2163. ret = inputPush(ctxt, input);
  2164. if (ctxt->instate == XML_PARSER_EOF)
  2165. return(-1);
  2166. GROW;
  2167. return(ret);
  2168. }
  2169. /**
  2170. * xmlParseCharRef:
  2171. * @ctxt: an XML parser context
  2172. *
  2173. * parse Reference declarations
  2174. *
  2175. * [66] CharRef ::= '&#' [0-9]+ ';' |
  2176. * '&#x' [0-9a-fA-F]+ ';'
  2177. *
  2178. * [ WFC: Legal Character ]
  2179. * Characters referred to using character references must match the
  2180. * production for Char.
  2181. *
  2182. * Returns the value parsed (as an int), 0 in case of error
  2183. */
  2184. int
  2185. xmlParseCharRef(xmlParserCtxtPtr ctxt) {
  2186. int val = 0;
  2187. int count = 0;
  2188. /*
  2189. * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
  2190. */
  2191. if ((RAW == '&') && (NXT(1) == '#') &&
  2192. (NXT(2) == 'x')) {
  2193. SKIP(3);
  2194. GROW;
  2195. while (RAW != ';') { /* loop blocked by count */
  2196. if (count++ > 20) {
  2197. count = 0;
  2198. GROW;
  2199. if (ctxt->instate == XML_PARSER_EOF)
  2200. return(0);
  2201. }
  2202. if ((RAW >= '0') && (RAW <= '9'))
  2203. val = val * 16 + (CUR - '0');
  2204. else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
  2205. val = val * 16 + (CUR - 'a') + 10;
  2206. else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
  2207. val = val * 16 + (CUR - 'A') + 10;
  2208. else {
  2209. xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
  2210. val = 0;
  2211. break;
  2212. }
  2213. if (val > 0x110000)
  2214. val = 0x110000;
  2215. NEXT;
  2216. count++;
  2217. }
  2218. if (RAW == ';') {
  2219. /* on purpose to avoid reentrancy problems with NEXT and SKIP */
  2220. ctxt->input->col++;
  2221. ctxt->input->cur++;
  2222. }
  2223. } else if ((RAW == '&') && (NXT(1) == '#')) {
  2224. SKIP(2);
  2225. GROW;
  2226. while (RAW != ';') { /* loop blocked by count */
  2227. if (count++ > 20) {
  2228. count = 0;
  2229. GROW;
  2230. if (ctxt->instate == XML_PARSER_EOF)
  2231. return(0);
  2232. }
  2233. if ((RAW >= '0') && (RAW <= '9'))
  2234. val = val * 10 + (CUR - '0');
  2235. else {
  2236. xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
  2237. val = 0;
  2238. break;
  2239. }
  2240. if (val > 0x110000)
  2241. val = 0x110000;
  2242. NEXT;
  2243. count++;
  2244. }
  2245. if (RAW == ';') {
  2246. /* on purpose to avoid reentrancy problems with NEXT and SKIP */
  2247. ctxt->input->col++;
  2248. ctxt->input->cur++;
  2249. }
  2250. } else {
  2251. xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
  2252. }
  2253. /*
  2254. * [ WFC: Legal Character ]
  2255. * Characters referred to using character references must match the
  2256. * production for Char.
  2257. */
  2258. if (val >= 0x110000) {
  2259. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2260. "xmlParseCharRef: character reference out of bounds\n",
  2261. val);
  2262. } else if (IS_CHAR(val)) {
  2263. return(val);
  2264. } else {
  2265. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2266. "xmlParseCharRef: invalid xmlChar value %d\n",
  2267. val);
  2268. }
  2269. return(0);
  2270. }
  2271. /**
  2272. * xmlParseStringCharRef:
  2273. * @ctxt: an XML parser context
  2274. * @str: a pointer to an index in the string
  2275. *
  2276. * parse Reference declarations, variant parsing from a string rather
  2277. * than an an input flow.
  2278. *
  2279. * [66] CharRef ::= '&#' [0-9]+ ';' |
  2280. * '&#x' [0-9a-fA-F]+ ';'
  2281. *
  2282. * [ WFC: Legal Character ]
  2283. * Characters referred to using character references must match the
  2284. * production for Char.
  2285. *
  2286. * Returns the value parsed (as an int), 0 in case of error, str will be
  2287. * updated to the current value of the index
  2288. */
  2289. static int
  2290. xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
  2291. const xmlChar *ptr;
  2292. xmlChar cur;
  2293. int val = 0;
  2294. if ((str == NULL) || (*str == NULL)) return(0);
  2295. ptr = *str;
  2296. cur = *ptr;
  2297. if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
  2298. ptr += 3;
  2299. cur = *ptr;
  2300. while (cur != ';') { /* Non input consuming loop */
  2301. if ((cur >= '0') && (cur <= '9'))
  2302. val = val * 16 + (cur - '0');
  2303. else if ((cur >= 'a') && (cur <= 'f'))
  2304. val = val * 16 + (cur - 'a') + 10;
  2305. else if ((cur >= 'A') && (cur <= 'F'))
  2306. val = val * 16 + (cur - 'A') + 10;
  2307. else {
  2308. xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
  2309. val = 0;
  2310. break;
  2311. }
  2312. if (val > 0x110000)
  2313. val = 0x110000;
  2314. ptr++;
  2315. cur = *ptr;
  2316. }
  2317. if (cur == ';')
  2318. ptr++;
  2319. } else if ((cur == '&') && (ptr[1] == '#')){
  2320. ptr += 2;
  2321. cur = *ptr;
  2322. while (cur != ';') { /* Non input consuming loops */
  2323. if ((cur >= '0') && (cur <= '9'))
  2324. val = val * 10 + (cur - '0');
  2325. else {
  2326. xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
  2327. val = 0;
  2328. break;
  2329. }
  2330. if (val > 0x110000)
  2331. val = 0x110000;
  2332. ptr++;
  2333. cur = *ptr;
  2334. }
  2335. if (cur == ';')
  2336. ptr++;
  2337. } else {
  2338. xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
  2339. return(0);
  2340. }
  2341. *str = ptr;
  2342. /*
  2343. * [ WFC: Legal Character ]
  2344. * Characters referred to using character references must match the
  2345. * production for Char.
  2346. */
  2347. if (val >= 0x110000) {
  2348. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2349. "xmlParseStringCharRef: character reference out of bounds\n",
  2350. val);
  2351. } else if (IS_CHAR(val)) {
  2352. return(val);
  2353. } else {
  2354. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2355. "xmlParseStringCharRef: invalid xmlChar value %d\n",
  2356. val);
  2357. }
  2358. return(0);
  2359. }
  2360. /**
  2361. * xmlParserHandlePEReference:
  2362. * @ctxt: the parser context
  2363. *
  2364. * [69] PEReference ::= '%' Name ';'
  2365. *
  2366. * [ WFC: No Recursion ]
  2367. * A parsed entity must not contain a recursive
  2368. * reference to itself, either directly or indirectly.
  2369. *
  2370. * [ WFC: Entity Declared ]
  2371. * In a document without any DTD, a document with only an internal DTD
  2372. * subset which contains no parameter entity references, or a document
  2373. * with "standalone='yes'", ... ... The declaration of a parameter
  2374. * entity must precede any reference to it...
  2375. *
  2376. * [ VC: Entity Declared ]
  2377. * In a document with an external subset or external parameter entities
  2378. * with "standalone='no'", ... ... The declaration of a parameter entity
  2379. * must precede any reference to it...
  2380. *
  2381. * [ WFC: In DTD ]
  2382. * Parameter-entity references may only appear in the DTD.
  2383. * NOTE: misleading but this is handled.
  2384. *
  2385. * A PEReference may have been detected in the current input stream
  2386. * the handling is done accordingly to
  2387. * http://www.w3.org/TR/REC-xml#entproc
  2388. * i.e.
  2389. * - Included in literal in entity values
  2390. * - Included as Parameter Entity reference within DTDs
  2391. */
  2392. void
  2393. xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
  2394. switch(ctxt->instate) {
  2395. case XML_PARSER_CDATA_SECTION:
  2396. return;
  2397. case XML_PARSER_COMMENT:
  2398. return;
  2399. case XML_PARSER_START_TAG:
  2400. return;
  2401. case XML_PARSER_END_TAG:
  2402. return;
  2403. case XML_PARSER_EOF:
  2404. xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
  2405. return;
  2406. case XML_PARSER_PROLOG:
  2407. case XML_PARSER_START:
  2408. case XML_PARSER_MISC:
  2409. xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
  2410. return;
  2411. case XML_PARSER_ENTITY_DECL:
  2412. case XML_PARSER_CONTENT:
  2413. case XML_PARSER_ATTRIBUTE_VALUE:
  2414. case XML_PARSER_PI:
  2415. case XML_PARSER_SYSTEM_LITERAL:
  2416. case XML_PARSER_PUBLIC_LITERAL:
  2417. /* we just ignore it there */
  2418. return;
  2419. case XML_PARSER_EPILOG:
  2420. xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
  2421. return;
  2422. case XML_PARSER_ENTITY_VALUE:
  2423. /*
  2424. * NOTE: in the case of entity values, we don't do the
  2425. * substitution here since we need the literal
  2426. * entity value to be able to save the internal
  2427. * subset of the document.
  2428. * This will be handled by xmlStringDecodeEntities
  2429. */
  2430. return;
  2431. case XML_PARSER_DTD:
  2432. /*
  2433. * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
  2434. * In the internal DTD subset, parameter-entity references
  2435. * can occur only where markup declarations can occur, not
  2436. * within markup declarations.
  2437. * In that case this is handled in xmlParseMarkupDecl
  2438. */
  2439. if ((ctxt->external == 0) && (ctxt->inputNr == 1))
  2440. return;
  2441. if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
  2442. return;
  2443. break;
  2444. case XML_PARSER_IGNORE:
  2445. return;
  2446. }
  2447. xmlParsePEReference(ctxt);
  2448. }
  2449. /*
  2450. * Macro used to grow the current buffer.
  2451. * buffer##_size is expected to be a size_t
  2452. * mem_error: is expected to handle memory allocation failures
  2453. */
  2454. #define growBuffer(buffer, n) { \
  2455. xmlChar *tmp; \
  2456. size_t new_size = buffer##_size * 2 + n; \
  2457. if (new_size < buffer##_size) goto mem_error; \
  2458. tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
  2459. if (tmp == NULL) goto mem_error; \
  2460. buffer = tmp; \
  2461. buffer##_size = new_size; \
  2462. }
  2463. /**
  2464. * xmlStringLenDecodeEntities:
  2465. * @ctxt: the parser context
  2466. * @str: the input string
  2467. * @len: the string length
  2468. * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
  2469. * @end: an end marker xmlChar, 0 if none
  2470. * @end2: an end marker xmlChar, 0 if none
  2471. * @end3: an end marker xmlChar, 0 if none
  2472. *
  2473. * Takes a entity string content and process to do the adequate substitutions.
  2474. *
  2475. * [67] Reference ::= EntityRef | CharRef
  2476. *
  2477. * [69] PEReference ::= '%' Name ';'
  2478. *
  2479. * Returns A newly allocated string with the substitution done. The caller
  2480. * must deallocate it !
  2481. */
  2482. xmlChar *
  2483. xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
  2484. int what, xmlChar end, xmlChar end2, xmlChar end3) {
  2485. xmlChar *buffer = NULL;
  2486. size_t buffer_size = 0;
  2487. size_t nbchars = 0;
  2488. xmlChar *current = NULL;
  2489. xmlChar *rep = NULL;
  2490. const xmlChar *last;
  2491. xmlEntityPtr ent;
  2492. int c,l;
  2493. if ((ctxt == NULL) || (str == NULL) || (len < 0))
  2494. return(NULL);
  2495. last = str + len;
  2496. if (((ctxt->depth > 40) &&
  2497. ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
  2498. (ctxt->depth > 1024)) {
  2499. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  2500. return(NULL);
  2501. }
  2502. /*
  2503. * allocate a translation buffer.
  2504. */
  2505. buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
  2506. buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
  2507. if (buffer == NULL) goto mem_error;
  2508. /*
  2509. * OK loop until we reach one of the ending char or a size limit.
  2510. * we are operating on already parsed values.
  2511. */
  2512. if (str < last)
  2513. c = CUR_SCHAR(str, l);
  2514. else
  2515. c = 0;
  2516. while ((c != 0) && (c != end) && /* non input consuming loop */
  2517. (c != end2) && (c != end3) &&
  2518. (ctxt->instate != XML_PARSER_EOF)) {
  2519. if (c == 0) break;
  2520. if ((c == '&') && (str[1] == '#')) {
  2521. int val = xmlParseStringCharRef(ctxt, &str);
  2522. if (val == 0)
  2523. goto int_error;
  2524. COPY_BUF(0,buffer,nbchars,val);
  2525. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2526. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2527. }
  2528. } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
  2529. if (xmlParserDebugEntities)
  2530. xmlGenericError(xmlGenericErrorContext,
  2531. "String decoding Entity Reference: %.30s\n",
  2532. str);
  2533. ent = xmlParseStringEntityRef(ctxt, &str);
  2534. xmlParserEntityCheck(ctxt, 0, ent, 0);
  2535. if (ent != NULL)
  2536. ctxt->nbentities += ent->checked / 2;
  2537. if ((ent != NULL) &&
  2538. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  2539. if (ent->content != NULL) {
  2540. COPY_BUF(0,buffer,nbchars,ent->content[0]);
  2541. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2542. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2543. }
  2544. } else {
  2545. xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
  2546. "predefined entity has no content\n");
  2547. goto int_error;
  2548. }
  2549. } else if ((ent != NULL) && (ent->content != NULL)) {
  2550. ctxt->depth++;
  2551. rep = xmlStringDecodeEntities(ctxt, ent->content, what,
  2552. 0, 0, 0);
  2553. ctxt->depth--;
  2554. if (rep == NULL) {
  2555. ent->content[0] = 0;
  2556. goto int_error;
  2557. }
  2558. current = rep;
  2559. while (*current != 0) { /* non input consuming loop */
  2560. buffer[nbchars++] = *current++;
  2561. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2562. if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
  2563. goto int_error;
  2564. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2565. }
  2566. }
  2567. xmlFree(rep);
  2568. rep = NULL;
  2569. } else if (ent != NULL) {
  2570. int i = xmlStrlen(ent->name);
  2571. const xmlChar *cur = ent->name;
  2572. buffer[nbchars++] = '&';
  2573. if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2574. growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
  2575. }
  2576. for (;i > 0;i--)
  2577. buffer[nbchars++] = *cur++;
  2578. buffer[nbchars++] = ';';
  2579. }
  2580. } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
  2581. if (xmlParserDebugEntities)
  2582. xmlGenericError(xmlGenericErrorContext,
  2583. "String decoding PE Reference: %.30s\n", str);
  2584. ent = xmlParseStringPEReference(ctxt, &str);
  2585. xmlParserEntityCheck(ctxt, 0, ent, 0);
  2586. if (ent != NULL)
  2587. ctxt->nbentities += ent->checked / 2;
  2588. if (ent != NULL) {
  2589. if (ent->content == NULL) {
  2590. /*
  2591. * Note: external parsed entities will not be loaded,
  2592. * it is not required for a non-validating parser to
  2593. * complete external PEReferences coming from the
  2594. * internal subset
  2595. */
  2596. if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
  2597. ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
  2598. (ctxt->validate != 0)) {
  2599. xmlLoadEntityContent(ctxt, ent);
  2600. } else {
  2601. xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
  2602. "not validating will not read content for PE entity %s\n",
  2603. ent->name, NULL);
  2604. }
  2605. }
  2606. ctxt->depth++;
  2607. rep = xmlStringDecodeEntities(ctxt, ent->content, what,
  2608. 0, 0, 0);
  2609. ctxt->depth--;
  2610. if (rep == NULL) {
  2611. if (ent->content != NULL)
  2612. ent->content[0] = 0;
  2613. goto int_error;
  2614. }
  2615. current = rep;
  2616. while (*current != 0) { /* non input consuming loop */
  2617. buffer[nbchars++] = *current++;
  2618. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2619. if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
  2620. goto int_error;
  2621. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2622. }
  2623. }
  2624. xmlFree(rep);
  2625. rep = NULL;
  2626. }
  2627. } else {
  2628. COPY_BUF(l,buffer,nbchars,c);
  2629. str += l;
  2630. if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
  2631. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2632. }
  2633. }
  2634. if (str < last)
  2635. c = CUR_SCHAR(str, l);
  2636. else
  2637. c = 0;
  2638. }
  2639. buffer[nbchars] = 0;
  2640. return(buffer);
  2641. mem_error:
  2642. xmlErrMemory(ctxt, NULL);
  2643. int_error:
  2644. if (rep != NULL)
  2645. xmlFree(rep);
  2646. if (buffer != NULL)
  2647. xmlFree(buffer);
  2648. return(NULL);
  2649. }
  2650. /**
  2651. * xmlStringDecodeEntities:
  2652. * @ctxt: the parser context
  2653. * @str: the input string
  2654. * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
  2655. * @end: an end marker xmlChar, 0 if none
  2656. * @end2: an end marker xmlChar, 0 if none
  2657. * @end3: an end marker xmlChar, 0 if none
  2658. *
  2659. * Takes a entity string content and process to do the adequate substitutions.
  2660. *
  2661. * [67] Reference ::= EntityRef | CharRef
  2662. *
  2663. * [69] PEReference ::= '%' Name ';'
  2664. *
  2665. * Returns A newly allocated string with the substitution done. The caller
  2666. * must deallocate it !
  2667. */
  2668. xmlChar *
  2669. xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
  2670. xmlChar end, xmlChar end2, xmlChar end3) {
  2671. if ((ctxt == NULL) || (str == NULL)) return(NULL);
  2672. return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
  2673. end, end2, end3));
  2674. }
  2675. /************************************************************************
  2676. * *
  2677. * Commodity functions, cleanup needed ? *
  2678. * *
  2679. ************************************************************************/
  2680. /**
  2681. * areBlanks:
  2682. * @ctxt: an XML parser context
  2683. * @str: a xmlChar *
  2684. * @len: the size of @str
  2685. * @blank_chars: we know the chars are blanks
  2686. *
  2687. * Is this a sequence of blank chars that one can ignore ?
  2688. *
  2689. * Returns 1 if ignorable 0 otherwise.
  2690. */
  2691. static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
  2692. int blank_chars) {
  2693. int i, ret;
  2694. xmlNodePtr lastChild;
  2695. /*
  2696. * Don't spend time trying to differentiate them, the same callback is
  2697. * used !
  2698. */
  2699. if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
  2700. return(0);
  2701. /*
  2702. * Check for xml:space value.
  2703. */
  2704. if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
  2705. (*(ctxt->space) == -2))
  2706. return(0);
  2707. /*
  2708. * Check that the string is made of blanks
  2709. */
  2710. if (blank_chars == 0) {
  2711. for (i = 0;i < len;i++)
  2712. if (!(IS_BLANK_CH(str[i]))) return(0);
  2713. }
  2714. /*
  2715. * Look if the element is mixed content in the DTD if available
  2716. */
  2717. if (ctxt->node == NULL) return(0);
  2718. if (ctxt->myDoc != NULL) {
  2719. ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
  2720. if (ret == 0) return(1);
  2721. if (ret == 1) return(0);
  2722. }
  2723. /*
  2724. * Otherwise, heuristic :-\
  2725. */
  2726. if ((RAW != '<') && (RAW != 0xD)) return(0);
  2727. if ((ctxt->node->children == NULL) &&
  2728. (RAW == '<') && (NXT(1) == '/')) return(0);
  2729. lastChild = xmlGetLastChild(ctxt->node);
  2730. if (lastChild == NULL) {
  2731. if ((ctxt->node->type != XML_ELEMENT_NODE) &&
  2732. (ctxt->node->content != NULL)) return(0);
  2733. } else if (xmlNodeIsText(lastChild))
  2734. return(0);
  2735. else if ((ctxt->node->children != NULL) &&
  2736. (xmlNodeIsText(ctxt->node->children)))
  2737. return(0);
  2738. return(1);
  2739. }
  2740. /************************************************************************
  2741. * *
  2742. * Extra stuff for namespace support *
  2743. * Relates to http://www.w3.org/TR/WD-xml-names *
  2744. * *
  2745. ************************************************************************/
  2746. /**
  2747. * xmlSplitQName:
  2748. * @ctxt: an XML parser context
  2749. * @name: an XML parser context
  2750. * @prefix: a xmlChar **
  2751. *
  2752. * parse an UTF8 encoded XML qualified name string
  2753. *
  2754. * [NS 5] QName ::= (Prefix ':')? LocalPart
  2755. *
  2756. * [NS 6] Prefix ::= NCName
  2757. *
  2758. * [NS 7] LocalPart ::= NCName
  2759. *
  2760. * Returns the local part, and prefix is updated
  2761. * to get the Prefix if any.
  2762. */
  2763. xmlChar *
  2764. xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
  2765. xmlChar buf[XML_MAX_NAMELEN + 5];
  2766. xmlChar *buffer = NULL;
  2767. int len = 0;
  2768. int max = XML_MAX_NAMELEN;
  2769. xmlChar *ret = NULL;
  2770. const xmlChar *cur = name;
  2771. int c;
  2772. if (prefix == NULL) return(NULL);
  2773. *prefix = NULL;
  2774. if (cur == NULL) return(NULL);
  2775. #ifndef XML_XML_NAMESPACE
  2776. /* xml: prefix is not really a namespace */
  2777. if ((cur[0] == 'x') && (cur[1] == 'm') &&
  2778. (cur[2] == 'l') && (cur[3] == ':'))
  2779. return(xmlStrdup(name));
  2780. #endif
  2781. /* nasty but well=formed */
  2782. if (cur[0] == ':')
  2783. return(xmlStrdup(name));
  2784. c = *cur++;
  2785. while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
  2786. buf[len++] = c;
  2787. c = *cur++;
  2788. }
  2789. if (len >= max) {
  2790. /*
  2791. * Okay someone managed to make a huge name, so he's ready to pay
  2792. * for the processing speed.
  2793. */
  2794. max = len * 2;
  2795. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  2796. if (buffer == NULL) {
  2797. xmlErrMemory(ctxt, NULL);
  2798. return(NULL);
  2799. }
  2800. memcpy(buffer, buf, len);
  2801. while ((c != 0) && (c != ':')) { /* tested bigname.xml */
  2802. if (len + 10 > max) {
  2803. xmlChar *tmp;
  2804. max *= 2;
  2805. tmp = (xmlChar *) xmlRealloc(buffer,
  2806. max * sizeof(xmlChar));
  2807. if (tmp == NULL) {
  2808. xmlFree(buffer);
  2809. xmlErrMemory(ctxt, NULL);
  2810. return(NULL);
  2811. }
  2812. buffer = tmp;
  2813. }
  2814. buffer[len++] = c;
  2815. c = *cur++;
  2816. }
  2817. buffer[len] = 0;
  2818. }
  2819. if ((c == ':') && (*cur == 0)) {
  2820. if (buffer != NULL)
  2821. xmlFree(buffer);
  2822. *prefix = NULL;
  2823. return(xmlStrdup(name));
  2824. }
  2825. if (buffer == NULL)
  2826. ret = xmlStrndup(buf, len);
  2827. else {
  2828. ret = buffer;
  2829. buffer = NULL;
  2830. max = XML_MAX_NAMELEN;
  2831. }
  2832. if (c == ':') {
  2833. c = *cur;
  2834. *prefix = ret;
  2835. if (c == 0) {
  2836. return(xmlStrndup(BAD_CAST "", 0));
  2837. }
  2838. len = 0;
  2839. /*
  2840. * Check that the first character is proper to start
  2841. * a new name
  2842. */
  2843. if (!(((c >= 0x61) && (c <= 0x7A)) ||
  2844. ((c >= 0x41) && (c <= 0x5A)) ||
  2845. (c == '_') || (c == ':'))) {
  2846. int l;
  2847. int first = CUR_SCHAR(cur, l);
  2848. if (!IS_LETTER(first) && (first != '_')) {
  2849. xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
  2850. "Name %s is not XML Namespace compliant\n",
  2851. name);
  2852. }
  2853. }
  2854. cur++;
  2855. while ((c != 0) && (len < max)) { /* tested bigname2.xml */
  2856. buf[len++] = c;
  2857. c = *cur++;
  2858. }
  2859. if (len >= max) {
  2860. /*
  2861. * Okay someone managed to make a huge name, so he's ready to pay
  2862. * for the processing speed.
  2863. */
  2864. max = len * 2;
  2865. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  2866. if (buffer == NULL) {
  2867. xmlErrMemory(ctxt, NULL);
  2868. return(NULL);
  2869. }
  2870. memcpy(buffer, buf, len);
  2871. while (c != 0) { /* tested bigname2.xml */
  2872. if (len + 10 > max) {
  2873. xmlChar *tmp;
  2874. max *= 2;
  2875. tmp = (xmlChar *) xmlRealloc(buffer,
  2876. max * sizeof(xmlChar));
  2877. if (tmp == NULL) {
  2878. xmlErrMemory(ctxt, NULL);
  2879. xmlFree(buffer);
  2880. return(NULL);
  2881. }
  2882. buffer = tmp;
  2883. }
  2884. buffer[len++] = c;
  2885. c = *cur++;
  2886. }
  2887. buffer[len] = 0;
  2888. }
  2889. if (buffer == NULL)
  2890. ret = xmlStrndup(buf, len);
  2891. else {
  2892. ret = buffer;
  2893. }
  2894. }
  2895. return(ret);
  2896. }
  2897. /************************************************************************
  2898. * *
  2899. * The parser itself *
  2900. * Relates to http://www.w3.org/TR/REC-xml *
  2901. * *
  2902. ************************************************************************/
  2903. /************************************************************************
  2904. * *
  2905. * Routines to parse Name, NCName and NmToken *
  2906. * *
  2907. ************************************************************************/
  2908. #ifdef DEBUG
  2909. static unsigned long nbParseName = 0;
  2910. static unsigned long nbParseNmToken = 0;
  2911. static unsigned long nbParseNCName = 0;
  2912. static unsigned long nbParseNCNameComplex = 0;
  2913. static unsigned long nbParseNameComplex = 0;
  2914. static unsigned long nbParseStringName = 0;
  2915. #endif
  2916. /*
  2917. * The two following functions are related to the change of accepted
  2918. * characters for Name and NmToken in the Revision 5 of XML-1.0
  2919. * They correspond to the modified production [4] and the new production [4a]
  2920. * changes in that revision. Also note that the macros used for the
  2921. * productions Letter, Digit, CombiningChar and Extender are not needed
  2922. * anymore.
  2923. * We still keep compatibility to pre-revision5 parsing semantic if the
  2924. * new XML_PARSE_OLD10 option is given to the parser.
  2925. */
  2926. static int
  2927. xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
  2928. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  2929. /*
  2930. * Use the new checks of production [4] [4a] amd [5] of the
  2931. * Update 5 of XML-1.0
  2932. */
  2933. if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  2934. (((c >= 'a') && (c <= 'z')) ||
  2935. ((c >= 'A') && (c <= 'Z')) ||
  2936. (c == '_') || (c == ':') ||
  2937. ((c >= 0xC0) && (c <= 0xD6)) ||
  2938. ((c >= 0xD8) && (c <= 0xF6)) ||
  2939. ((c >= 0xF8) && (c <= 0x2FF)) ||
  2940. ((c >= 0x370) && (c <= 0x37D)) ||
  2941. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  2942. ((c >= 0x200C) && (c <= 0x200D)) ||
  2943. ((c >= 0x2070) && (c <= 0x218F)) ||
  2944. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  2945. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  2946. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  2947. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  2948. ((c >= 0x10000) && (c <= 0xEFFFF))))
  2949. return(1);
  2950. } else {
  2951. if (IS_LETTER(c) || (c == '_') || (c == ':'))
  2952. return(1);
  2953. }
  2954. return(0);
  2955. }
  2956. static int
  2957. xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
  2958. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  2959. /*
  2960. * Use the new checks of production [4] [4a] amd [5] of the
  2961. * Update 5 of XML-1.0
  2962. */
  2963. if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  2964. (((c >= 'a') && (c <= 'z')) ||
  2965. ((c >= 'A') && (c <= 'Z')) ||
  2966. ((c >= '0') && (c <= '9')) || /* !start */
  2967. (c == '_') || (c == ':') ||
  2968. (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
  2969. ((c >= 0xC0) && (c <= 0xD6)) ||
  2970. ((c >= 0xD8) && (c <= 0xF6)) ||
  2971. ((c >= 0xF8) && (c <= 0x2FF)) ||
  2972. ((c >= 0x300) && (c <= 0x36F)) || /* !start */
  2973. ((c >= 0x370) && (c <= 0x37D)) ||
  2974. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  2975. ((c >= 0x200C) && (c <= 0x200D)) ||
  2976. ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
  2977. ((c >= 0x2070) && (c <= 0x218F)) ||
  2978. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  2979. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  2980. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  2981. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  2982. ((c >= 0x10000) && (c <= 0xEFFFF))))
  2983. return(1);
  2984. } else {
  2985. if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
  2986. (c == '.') || (c == '-') ||
  2987. (c == '_') || (c == ':') ||
  2988. (IS_COMBINING(c)) ||
  2989. (IS_EXTENDER(c)))
  2990. return(1);
  2991. }
  2992. return(0);
  2993. }
  2994. static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
  2995. int *len, int *alloc, int normalize);
  2996. static const xmlChar *
  2997. xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
  2998. int len = 0, l;
  2999. int c;
  3000. int count = 0;
  3001. #ifdef DEBUG
  3002. nbParseNameComplex++;
  3003. #endif
  3004. /*
  3005. * Handler for more complex cases
  3006. */
  3007. GROW;
  3008. if (ctxt->instate == XML_PARSER_EOF)
  3009. return(NULL);
  3010. c = CUR_CHAR(l);
  3011. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  3012. /*
  3013. * Use the new checks of production [4] [4a] amd [5] of the
  3014. * Update 5 of XML-1.0
  3015. */
  3016. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  3017. (!(((c >= 'a') && (c <= 'z')) ||
  3018. ((c >= 'A') && (c <= 'Z')) ||
  3019. (c == '_') || (c == ':') ||
  3020. ((c >= 0xC0) && (c <= 0xD6)) ||
  3021. ((c >= 0xD8) && (c <= 0xF6)) ||
  3022. ((c >= 0xF8) && (c <= 0x2FF)) ||
  3023. ((c >= 0x370) && (c <= 0x37D)) ||
  3024. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  3025. ((c >= 0x200C) && (c <= 0x200D)) ||
  3026. ((c >= 0x2070) && (c <= 0x218F)) ||
  3027. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  3028. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  3029. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  3030. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  3031. ((c >= 0x10000) && (c <= 0xEFFFF))))) {
  3032. return(NULL);
  3033. }
  3034. len += l;
  3035. NEXTL(l);
  3036. c = CUR_CHAR(l);
  3037. while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  3038. (((c >= 'a') && (c <= 'z')) ||
  3039. ((c >= 'A') && (c <= 'Z')) ||
  3040. ((c >= '0') && (c <= '9')) || /* !start */
  3041. (c == '_') || (c == ':') ||
  3042. (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
  3043. ((c >= 0xC0) && (c <= 0xD6)) ||
  3044. ((c >= 0xD8) && (c <= 0xF6)) ||
  3045. ((c >= 0xF8) && (c <= 0x2FF)) ||
  3046. ((c >= 0x300) && (c <= 0x36F)) || /* !start */
  3047. ((c >= 0x370) && (c <= 0x37D)) ||
  3048. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  3049. ((c >= 0x200C) && (c <= 0x200D)) ||
  3050. ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
  3051. ((c >= 0x2070) && (c <= 0x218F)) ||
  3052. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  3053. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  3054. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  3055. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  3056. ((c >= 0x10000) && (c <= 0xEFFFF))
  3057. )) {
  3058. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3059. count = 0;
  3060. GROW;
  3061. if (ctxt->instate == XML_PARSER_EOF)
  3062. return(NULL);
  3063. }
  3064. len += l;
  3065. NEXTL(l);
  3066. c = CUR_CHAR(l);
  3067. }
  3068. } else {
  3069. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  3070. (!IS_LETTER(c) && (c != '_') &&
  3071. (c != ':'))) {
  3072. return(NULL);
  3073. }
  3074. len += l;
  3075. NEXTL(l);
  3076. c = CUR_CHAR(l);
  3077. while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
  3078. ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
  3079. (c == '.') || (c == '-') ||
  3080. (c == '_') || (c == ':') ||
  3081. (IS_COMBINING(c)) ||
  3082. (IS_EXTENDER(c)))) {
  3083. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3084. count = 0;
  3085. GROW;
  3086. if (ctxt->instate == XML_PARSER_EOF)
  3087. return(NULL);
  3088. }
  3089. len += l;
  3090. NEXTL(l);
  3091. c = CUR_CHAR(l);
  3092. }
  3093. }
  3094. if ((len > XML_MAX_NAME_LENGTH) &&
  3095. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3096. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
  3097. return(NULL);
  3098. }
  3099. if (ctxt->input->cur - ctxt->input->base < len) {
  3100. /*
  3101. * There were a couple of bugs where PERefs lead to to a change
  3102. * of the buffer. Check the buffer size to avoid passing an invalid
  3103. * pointer to xmlDictLookup.
  3104. */
  3105. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  3106. "unexpected change of input buffer");
  3107. return (NULL);
  3108. }
  3109. if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
  3110. return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
  3111. return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
  3112. }
  3113. /**
  3114. * xmlParseName:
  3115. * @ctxt: an XML parser context
  3116. *
  3117. * parse an XML name.
  3118. *
  3119. * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
  3120. * CombiningChar | Extender
  3121. *
  3122. * [5] Name ::= (Letter | '_' | ':') (NameChar)*
  3123. *
  3124. * [6] Names ::= Name (#x20 Name)*
  3125. *
  3126. * Returns the Name parsed or NULL
  3127. */
  3128. const xmlChar *
  3129. xmlParseName(xmlParserCtxtPtr ctxt) {
  3130. const xmlChar *in;
  3131. const xmlChar *ret;
  3132. int count = 0;
  3133. GROW;
  3134. #ifdef DEBUG
  3135. nbParseName++;
  3136. #endif
  3137. /*
  3138. * Accelerator for simple ASCII names
  3139. */
  3140. in = ctxt->input->cur;
  3141. if (((*in >= 0x61) && (*in <= 0x7A)) ||
  3142. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3143. (*in == '_') || (*in == ':')) {
  3144. in++;
  3145. while (((*in >= 0x61) && (*in <= 0x7A)) ||
  3146. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3147. ((*in >= 0x30) && (*in <= 0x39)) ||
  3148. (*in == '_') || (*in == '-') ||
  3149. (*in == ':') || (*in == '.'))
  3150. in++;
  3151. if ((*in > 0) && (*in < 0x80)) {
  3152. count = in - ctxt->input->cur;
  3153. if ((count > XML_MAX_NAME_LENGTH) &&
  3154. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3155. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
  3156. return(NULL);
  3157. }
  3158. ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
  3159. ctxt->input->cur = in;
  3160. ctxt->input->col += count;
  3161. if (ret == NULL)
  3162. xmlErrMemory(ctxt, NULL);
  3163. return(ret);
  3164. }
  3165. }
  3166. /* accelerator for special cases */
  3167. return(xmlParseNameComplex(ctxt));
  3168. }
  3169. static const xmlChar *
  3170. xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
  3171. int len = 0, l;
  3172. int c;
  3173. int count = 0;
  3174. size_t startPosition = 0;
  3175. #ifdef DEBUG
  3176. nbParseNCNameComplex++;
  3177. #endif
  3178. /*
  3179. * Handler for more complex cases
  3180. */
  3181. GROW;
  3182. startPosition = CUR_PTR - BASE_PTR;
  3183. c = CUR_CHAR(l);
  3184. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  3185. (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
  3186. return(NULL);
  3187. }
  3188. while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
  3189. (xmlIsNameChar(ctxt, c) && (c != ':'))) {
  3190. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3191. if ((len > XML_MAX_NAME_LENGTH) &&
  3192. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3193. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3194. return(NULL);
  3195. }
  3196. count = 0;
  3197. GROW;
  3198. if (ctxt->instate == XML_PARSER_EOF)
  3199. return(NULL);
  3200. }
  3201. len += l;
  3202. NEXTL(l);
  3203. c = CUR_CHAR(l);
  3204. if (c == 0) {
  3205. count = 0;
  3206. /*
  3207. * when shrinking to extend the buffer we really need to preserve
  3208. * the part of the name we already parsed. Hence rolling back
  3209. * by current length.
  3210. */
  3211. ctxt->input->cur -= l;
  3212. GROW;
  3213. if (ctxt->instate == XML_PARSER_EOF)
  3214. return(NULL);
  3215. ctxt->input->cur += l;
  3216. c = CUR_CHAR(l);
  3217. }
  3218. }
  3219. if ((len > XML_MAX_NAME_LENGTH) &&
  3220. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3221. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3222. return(NULL);
  3223. }
  3224. return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
  3225. }
  3226. /**
  3227. * xmlParseNCName:
  3228. * @ctxt: an XML parser context
  3229. * @len: length of the string parsed
  3230. *
  3231. * parse an XML name.
  3232. *
  3233. * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
  3234. * CombiningChar | Extender
  3235. *
  3236. * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
  3237. *
  3238. * Returns the Name parsed or NULL
  3239. */
  3240. static const xmlChar *
  3241. xmlParseNCName(xmlParserCtxtPtr ctxt) {
  3242. const xmlChar *in, *e;
  3243. const xmlChar *ret;
  3244. int count = 0;
  3245. #ifdef DEBUG
  3246. nbParseNCName++;
  3247. #endif
  3248. /*
  3249. * Accelerator for simple ASCII names
  3250. */
  3251. in = ctxt->input->cur;
  3252. e = ctxt->input->end;
  3253. if ((((*in >= 0x61) && (*in <= 0x7A)) ||
  3254. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3255. (*in == '_')) && (in < e)) {
  3256. in++;
  3257. while ((((*in >= 0x61) && (*in <= 0x7A)) ||
  3258. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3259. ((*in >= 0x30) && (*in <= 0x39)) ||
  3260. (*in == '_') || (*in == '-') ||
  3261. (*in == '.')) && (in < e))
  3262. in++;
  3263. if (in >= e)
  3264. goto complex;
  3265. if ((*in > 0) && (*in < 0x80)) {
  3266. count = in - ctxt->input->cur;
  3267. if ((count > XML_MAX_NAME_LENGTH) &&
  3268. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3269. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3270. return(NULL);
  3271. }
  3272. ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
  3273. ctxt->input->cur = in;
  3274. ctxt->input->col += count;
  3275. if (ret == NULL) {
  3276. xmlErrMemory(ctxt, NULL);
  3277. }
  3278. return(ret);
  3279. }
  3280. }
  3281. complex:
  3282. return(xmlParseNCNameComplex(ctxt));
  3283. }
  3284. /**
  3285. * xmlParseNameAndCompare:
  3286. * @ctxt: an XML parser context
  3287. *
  3288. * parse an XML name and compares for match
  3289. * (specialized for endtag parsing)
  3290. *
  3291. * Returns NULL for an illegal name, (xmlChar*) 1 for success
  3292. * and the name for mismatch
  3293. */
  3294. static const xmlChar *
  3295. xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
  3296. register const xmlChar *cmp = other;
  3297. register const xmlChar *in;
  3298. const xmlChar *ret;
  3299. GROW;
  3300. if (ctxt->instate == XML_PARSER_EOF)
  3301. return(NULL);
  3302. in = ctxt->input->cur;
  3303. while (*in != 0 && *in == *cmp) {
  3304. ++in;
  3305. ++cmp;
  3306. }
  3307. if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
  3308. /* success */
  3309. ctxt->input->col += in - ctxt->input->cur;
  3310. ctxt->input->cur = in;
  3311. return (const xmlChar*) 1;
  3312. }
  3313. /* failure (or end of input buffer), check with full function */
  3314. ret = xmlParseName (ctxt);
  3315. /* strings coming from the dictionary direct compare possible */
  3316. if (ret == other) {
  3317. return (const xmlChar*) 1;
  3318. }
  3319. return ret;
  3320. }
  3321. /**
  3322. * xmlParseStringName:
  3323. * @ctxt: an XML parser context
  3324. * @str: a pointer to the string pointer (IN/OUT)
  3325. *
  3326. * parse an XML name.
  3327. *
  3328. * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
  3329. * CombiningChar | Extender
  3330. *
  3331. * [5] Name ::= (Letter | '_' | ':') (NameChar)*
  3332. *
  3333. * [6] Names ::= Name (#x20 Name)*
  3334. *
  3335. * Returns the Name parsed or NULL. The @str pointer
  3336. * is updated to the current location in the string.
  3337. */
  3338. static xmlChar *
  3339. xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
  3340. xmlChar buf[XML_MAX_NAMELEN + 5];
  3341. const xmlChar *cur = *str;
  3342. int len = 0, l;
  3343. int c;
  3344. #ifdef DEBUG
  3345. nbParseStringName++;
  3346. #endif
  3347. c = CUR_SCHAR(cur, l);
  3348. if (!xmlIsNameStartChar(ctxt, c)) {
  3349. return(NULL);
  3350. }
  3351. COPY_BUF(l,buf,len,c);
  3352. cur += l;
  3353. c = CUR_SCHAR(cur, l);
  3354. while (xmlIsNameChar(ctxt, c)) {
  3355. COPY_BUF(l,buf,len,c);
  3356. cur += l;
  3357. c = CUR_SCHAR(cur, l);
  3358. if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
  3359. /*
  3360. * Okay someone managed to make a huge name, so he's ready to pay
  3361. * for the processing speed.
  3362. */
  3363. xmlChar *buffer;
  3364. int max = len * 2;
  3365. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  3366. if (buffer == NULL) {
  3367. xmlErrMemory(ctxt, NULL);
  3368. return(NULL);
  3369. }
  3370. memcpy(buffer, buf, len);
  3371. while (xmlIsNameChar(ctxt, c)) {
  3372. if (len + 10 > max) {
  3373. xmlChar *tmp;
  3374. if ((len > XML_MAX_NAME_LENGTH) &&
  3375. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3376. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3377. xmlFree(buffer);
  3378. return(NULL);
  3379. }
  3380. max *= 2;
  3381. tmp = (xmlChar *) xmlRealloc(buffer,
  3382. max * sizeof(xmlChar));
  3383. if (tmp == NULL) {
  3384. xmlErrMemory(ctxt, NULL);
  3385. xmlFree(buffer);
  3386. return(NULL);
  3387. }
  3388. buffer = tmp;
  3389. }
  3390. COPY_BUF(l,buffer,len,c);
  3391. cur += l;
  3392. c = CUR_SCHAR(cur, l);
  3393. }
  3394. buffer[len] = 0;
  3395. *str = cur;
  3396. return(buffer);
  3397. }
  3398. }
  3399. if ((len > XML_MAX_NAME_LENGTH) &&
  3400. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3401. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
  3402. return(NULL);
  3403. }
  3404. *str = cur;
  3405. return(xmlStrndup(buf, len));
  3406. }
  3407. /**
  3408. * xmlParseNmtoken:
  3409. * @ctxt: an XML parser context
  3410. *
  3411. * parse an XML Nmtoken.
  3412. *
  3413. * [7] Nmtoken ::= (NameChar)+
  3414. *
  3415. * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
  3416. *
  3417. * Returns the Nmtoken parsed or NULL
  3418. */
  3419. xmlChar *
  3420. xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
  3421. xmlChar buf[XML_MAX_NAMELEN + 5];
  3422. int len = 0, l;
  3423. int c;
  3424. int count = 0;
  3425. #ifdef DEBUG
  3426. nbParseNmToken++;
  3427. #endif
  3428. GROW;
  3429. if (ctxt->instate == XML_PARSER_EOF)
  3430. return(NULL);
  3431. c = CUR_CHAR(l);
  3432. while (xmlIsNameChar(ctxt, c)) {
  3433. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3434. count = 0;
  3435. GROW;
  3436. }
  3437. COPY_BUF(l,buf,len,c);
  3438. NEXTL(l);
  3439. c = CUR_CHAR(l);
  3440. if (c == 0) {
  3441. count = 0;
  3442. GROW;
  3443. if (ctxt->instate == XML_PARSER_EOF)
  3444. return(NULL);
  3445. c = CUR_CHAR(l);
  3446. }
  3447. if (len >= XML_MAX_NAMELEN) {
  3448. /*
  3449. * Okay someone managed to make a huge token, so he's ready to pay
  3450. * for the processing speed.
  3451. */
  3452. xmlChar *buffer;
  3453. int max = len * 2;
  3454. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  3455. if (buffer == NULL) {
  3456. xmlErrMemory(ctxt, NULL);
  3457. return(NULL);
  3458. }
  3459. memcpy(buffer, buf, len);
  3460. while (xmlIsNameChar(ctxt, c)) {
  3461. if (count++ > XML_PARSER_CHUNK_SIZE) {
  3462. count = 0;
  3463. GROW;
  3464. if (ctxt->instate == XML_PARSER_EOF) {
  3465. xmlFree(buffer);
  3466. return(NULL);
  3467. }
  3468. }
  3469. if (len + 10 > max) {
  3470. xmlChar *tmp;
  3471. if ((max > XML_MAX_NAME_LENGTH) &&
  3472. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3473. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
  3474. xmlFree(buffer);
  3475. return(NULL);
  3476. }
  3477. max *= 2;
  3478. tmp = (xmlChar *) xmlRealloc(buffer,
  3479. max * sizeof(xmlChar));
  3480. if (tmp == NULL) {
  3481. xmlErrMemory(ctxt, NULL);
  3482. xmlFree(buffer);
  3483. return(NULL);
  3484. }
  3485. buffer = tmp;
  3486. }
  3487. COPY_BUF(l,buffer,len,c);
  3488. NEXTL(l);
  3489. c = CUR_CHAR(l);
  3490. }
  3491. buffer[len] = 0;
  3492. return(buffer);
  3493. }
  3494. }
  3495. if (len == 0)
  3496. return(NULL);
  3497. if ((len > XML_MAX_NAME_LENGTH) &&
  3498. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3499. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
  3500. return(NULL);
  3501. }
  3502. return(xmlStrndup(buf, len));
  3503. }
  3504. /**
  3505. * xmlParseEntityValue:
  3506. * @ctxt: an XML parser context
  3507. * @orig: if non-NULL store a copy of the original entity value
  3508. *
  3509. * parse a value for ENTITY declarations
  3510. *
  3511. * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
  3512. * "'" ([^%&'] | PEReference | Reference)* "'"
  3513. *
  3514. * Returns the EntityValue parsed with reference substituted or NULL
  3515. */
  3516. xmlChar *
  3517. xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
  3518. xmlChar *buf = NULL;
  3519. int len = 0;
  3520. int size = XML_PARSER_BUFFER_SIZE;
  3521. int c, l;
  3522. xmlChar stop;
  3523. xmlChar *ret = NULL;
  3524. const xmlChar *cur = NULL;
  3525. xmlParserInputPtr input;
  3526. if (RAW == '"') stop = '"';
  3527. else if (RAW == '\'') stop = '\'';
  3528. else {
  3529. xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
  3530. return(NULL);
  3531. }
  3532. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  3533. if (buf == NULL) {
  3534. xmlErrMemory(ctxt, NULL);
  3535. return(NULL);
  3536. }
  3537. /*
  3538. * The content of the entity definition is copied in a buffer.
  3539. */
  3540. ctxt->instate = XML_PARSER_ENTITY_VALUE;
  3541. input = ctxt->input;
  3542. GROW;
  3543. if (ctxt->instate == XML_PARSER_EOF)
  3544. goto error;
  3545. NEXT;
  3546. c = CUR_CHAR(l);
  3547. /*
  3548. * NOTE: 4.4.5 Included in Literal
  3549. * When a parameter entity reference appears in a literal entity
  3550. * value, ... a single or double quote character in the replacement
  3551. * text is always treated as a normal data character and will not
  3552. * terminate the literal.
  3553. * In practice it means we stop the loop only when back at parsing
  3554. * the initial entity and the quote is found
  3555. */
  3556. while (((IS_CHAR(c)) && ((c != stop) || /* checked */
  3557. (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
  3558. if (len + 5 >= size) {
  3559. xmlChar *tmp;
  3560. size *= 2;
  3561. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  3562. if (tmp == NULL) {
  3563. xmlErrMemory(ctxt, NULL);
  3564. goto error;
  3565. }
  3566. buf = tmp;
  3567. }
  3568. COPY_BUF(l,buf,len,c);
  3569. NEXTL(l);
  3570. GROW;
  3571. c = CUR_CHAR(l);
  3572. if (c == 0) {
  3573. GROW;
  3574. c = CUR_CHAR(l);
  3575. }
  3576. }
  3577. buf[len] = 0;
  3578. if (ctxt->instate == XML_PARSER_EOF)
  3579. goto error;
  3580. if (c != stop) {
  3581. xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
  3582. goto error;
  3583. }
  3584. NEXT;
  3585. /*
  3586. * Raise problem w.r.t. '&' and '%' being used in non-entities
  3587. * reference constructs. Note Charref will be handled in
  3588. * xmlStringDecodeEntities()
  3589. */
  3590. cur = buf;
  3591. while (*cur != 0) { /* non input consuming */
  3592. if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
  3593. xmlChar *name;
  3594. xmlChar tmp = *cur;
  3595. int nameOk = 0;
  3596. cur++;
  3597. name = xmlParseStringName(ctxt, &cur);
  3598. if (name != NULL) {
  3599. nameOk = 1;
  3600. xmlFree(name);
  3601. }
  3602. if ((nameOk == 0) || (*cur != ';')) {
  3603. xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
  3604. "EntityValue: '%c' forbidden except for entities references\n",
  3605. tmp);
  3606. goto error;
  3607. }
  3608. if ((tmp == '%') && (ctxt->inSubset == 1) &&
  3609. (ctxt->inputNr == 1)) {
  3610. xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
  3611. goto error;
  3612. }
  3613. if (*cur == 0)
  3614. break;
  3615. }
  3616. cur++;
  3617. }
  3618. /*
  3619. * Then PEReference entities are substituted.
  3620. *
  3621. * NOTE: 4.4.7 Bypassed
  3622. * When a general entity reference appears in the EntityValue in
  3623. * an entity declaration, it is bypassed and left as is.
  3624. * so XML_SUBSTITUTE_REF is not set here.
  3625. */
  3626. ++ctxt->depth;
  3627. ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
  3628. 0, 0, 0);
  3629. --ctxt->depth;
  3630. if (orig != NULL) {
  3631. *orig = buf;
  3632. buf = NULL;
  3633. }
  3634. error:
  3635. if (buf != NULL)
  3636. xmlFree(buf);
  3637. return(ret);
  3638. }
  3639. /**
  3640. * xmlParseAttValueComplex:
  3641. * @ctxt: an XML parser context
  3642. * @len: the resulting attribute len
  3643. * @normalize: whether to apply the inner normalization
  3644. *
  3645. * parse a value for an attribute, this is the fallback function
  3646. * of xmlParseAttValue() when the attribute parsing requires handling
  3647. * of non-ASCII characters, or normalization compaction.
  3648. *
  3649. * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
  3650. */
  3651. static xmlChar *
  3652. xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
  3653. xmlChar limit = 0;
  3654. xmlChar *buf = NULL;
  3655. xmlChar *rep = NULL;
  3656. size_t len = 0;
  3657. size_t buf_size = 0;
  3658. int c, l, in_space = 0;
  3659. xmlChar *current = NULL;
  3660. xmlEntityPtr ent;
  3661. if (NXT(0) == '"') {
  3662. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  3663. limit = '"';
  3664. NEXT;
  3665. } else if (NXT(0) == '\'') {
  3666. limit = '\'';
  3667. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  3668. NEXT;
  3669. } else {
  3670. xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
  3671. return(NULL);
  3672. }
  3673. /*
  3674. * allocate a translation buffer.
  3675. */
  3676. buf_size = XML_PARSER_BUFFER_SIZE;
  3677. buf = (xmlChar *) xmlMallocAtomic(buf_size);
  3678. if (buf == NULL) goto mem_error;
  3679. /*
  3680. * OK loop until we reach one of the ending char or a size limit.
  3681. */
  3682. c = CUR_CHAR(l);
  3683. while (((NXT(0) != limit) && /* checked */
  3684. (IS_CHAR(c)) && (c != '<')) &&
  3685. (ctxt->instate != XML_PARSER_EOF)) {
  3686. /*
  3687. * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
  3688. * special option is given
  3689. */
  3690. if ((len > XML_MAX_TEXT_LENGTH) &&
  3691. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3692. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  3693. "AttValue length too long\n");
  3694. goto mem_error;
  3695. }
  3696. if (c == '&') {
  3697. in_space = 0;
  3698. if (NXT(1) == '#') {
  3699. int val = xmlParseCharRef(ctxt);
  3700. if (val == '&') {
  3701. if (ctxt->replaceEntities) {
  3702. if (len + 10 > buf_size) {
  3703. growBuffer(buf, 10);
  3704. }
  3705. buf[len++] = '&';
  3706. } else {
  3707. /*
  3708. * The reparsing will be done in xmlStringGetNodeList()
  3709. * called by the attribute() function in SAX.c
  3710. */
  3711. if (len + 10 > buf_size) {
  3712. growBuffer(buf, 10);
  3713. }
  3714. buf[len++] = '&';
  3715. buf[len++] = '#';
  3716. buf[len++] = '3';
  3717. buf[len++] = '8';
  3718. buf[len++] = ';';
  3719. }
  3720. } else if (val != 0) {
  3721. if (len + 10 > buf_size) {
  3722. growBuffer(buf, 10);
  3723. }
  3724. len += xmlCopyChar(0, &buf[len], val);
  3725. }
  3726. } else {
  3727. ent = xmlParseEntityRef(ctxt);
  3728. ctxt->nbentities++;
  3729. if (ent != NULL)
  3730. ctxt->nbentities += ent->owner;
  3731. if ((ent != NULL) &&
  3732. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  3733. if (len + 10 > buf_size) {
  3734. growBuffer(buf, 10);
  3735. }
  3736. if ((ctxt->replaceEntities == 0) &&
  3737. (ent->content[0] == '&')) {
  3738. buf[len++] = '&';
  3739. buf[len++] = '#';
  3740. buf[len++] = '3';
  3741. buf[len++] = '8';
  3742. buf[len++] = ';';
  3743. } else {
  3744. buf[len++] = ent->content[0];
  3745. }
  3746. } else if ((ent != NULL) &&
  3747. (ctxt->replaceEntities != 0)) {
  3748. if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
  3749. ++ctxt->depth;
  3750. rep = xmlStringDecodeEntities(ctxt, ent->content,
  3751. XML_SUBSTITUTE_REF,
  3752. 0, 0, 0);
  3753. --ctxt->depth;
  3754. if (rep != NULL) {
  3755. current = rep;
  3756. while (*current != 0) { /* non input consuming */
  3757. if ((*current == 0xD) || (*current == 0xA) ||
  3758. (*current == 0x9)) {
  3759. buf[len++] = 0x20;
  3760. current++;
  3761. } else
  3762. buf[len++] = *current++;
  3763. if (len + 10 > buf_size) {
  3764. growBuffer(buf, 10);
  3765. }
  3766. }
  3767. xmlFree(rep);
  3768. rep = NULL;
  3769. }
  3770. } else {
  3771. if (len + 10 > buf_size) {
  3772. growBuffer(buf, 10);
  3773. }
  3774. if (ent->content != NULL)
  3775. buf[len++] = ent->content[0];
  3776. }
  3777. } else if (ent != NULL) {
  3778. int i = xmlStrlen(ent->name);
  3779. const xmlChar *cur = ent->name;
  3780. /*
  3781. * This may look absurd but is needed to detect
  3782. * entities problems
  3783. */
  3784. if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  3785. (ent->content != NULL) && (ent->checked == 0)) {
  3786. unsigned long oldnbent = ctxt->nbentities, diff;
  3787. ++ctxt->depth;
  3788. rep = xmlStringDecodeEntities(ctxt, ent->content,
  3789. XML_SUBSTITUTE_REF, 0, 0, 0);
  3790. --ctxt->depth;
  3791. diff = ctxt->nbentities - oldnbent + 1;
  3792. if (diff > INT_MAX / 2)
  3793. diff = INT_MAX / 2;
  3794. ent->checked = diff * 2;
  3795. if (rep != NULL) {
  3796. if (xmlStrchr(rep, '<'))
  3797. ent->checked |= 1;
  3798. xmlFree(rep);
  3799. rep = NULL;
  3800. } else {
  3801. ent->content[0] = 0;
  3802. }
  3803. }
  3804. /*
  3805. * Just output the reference
  3806. */
  3807. buf[len++] = '&';
  3808. while (len + i + 10 > buf_size) {
  3809. growBuffer(buf, i + 10);
  3810. }
  3811. for (;i > 0;i--)
  3812. buf[len++] = *cur++;
  3813. buf[len++] = ';';
  3814. }
  3815. }
  3816. } else {
  3817. if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
  3818. if ((len != 0) || (!normalize)) {
  3819. if ((!normalize) || (!in_space)) {
  3820. COPY_BUF(l,buf,len,0x20);
  3821. while (len + 10 > buf_size) {
  3822. growBuffer(buf, 10);
  3823. }
  3824. }
  3825. in_space = 1;
  3826. }
  3827. } else {
  3828. in_space = 0;
  3829. COPY_BUF(l,buf,len,c);
  3830. if (len + 10 > buf_size) {
  3831. growBuffer(buf, 10);
  3832. }
  3833. }
  3834. NEXTL(l);
  3835. }
  3836. GROW;
  3837. c = CUR_CHAR(l);
  3838. }
  3839. if (ctxt->instate == XML_PARSER_EOF)
  3840. goto error;
  3841. if ((in_space) && (normalize)) {
  3842. while ((len > 0) && (buf[len - 1] == 0x20)) len--;
  3843. }
  3844. buf[len] = 0;
  3845. if (RAW == '<') {
  3846. xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
  3847. } else if (RAW != limit) {
  3848. if ((c != 0) && (!IS_CHAR(c))) {
  3849. xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
  3850. "invalid character in attribute value\n");
  3851. } else {
  3852. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  3853. "AttValue: ' expected\n");
  3854. }
  3855. } else
  3856. NEXT;
  3857. /*
  3858. * There we potentially risk an overflow, don't allow attribute value of
  3859. * length more than INT_MAX it is a very reasonable assumption !
  3860. */
  3861. if (len >= INT_MAX) {
  3862. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  3863. "AttValue length too long\n");
  3864. goto mem_error;
  3865. }
  3866. if (attlen != NULL) *attlen = (int) len;
  3867. return(buf);
  3868. mem_error:
  3869. xmlErrMemory(ctxt, NULL);
  3870. error:
  3871. if (buf != NULL)
  3872. xmlFree(buf);
  3873. if (rep != NULL)
  3874. xmlFree(rep);
  3875. return(NULL);
  3876. }
  3877. /**
  3878. * xmlParseAttValue:
  3879. * @ctxt: an XML parser context
  3880. *
  3881. * parse a value for an attribute
  3882. * Note: the parser won't do substitution of entities here, this
  3883. * will be handled later in xmlStringGetNodeList
  3884. *
  3885. * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
  3886. * "'" ([^<&'] | Reference)* "'"
  3887. *
  3888. * 3.3.3 Attribute-Value Normalization:
  3889. * Before the value of an attribute is passed to the application or
  3890. * checked for validity, the XML processor must normalize it as follows:
  3891. * - a character reference is processed by appending the referenced
  3892. * character to the attribute value
  3893. * - an entity reference is processed by recursively processing the
  3894. * replacement text of the entity
  3895. * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
  3896. * appending #x20 to the normalized value, except that only a single
  3897. * #x20 is appended for a "#xD#xA" sequence that is part of an external
  3898. * parsed entity or the literal entity value of an internal parsed entity
  3899. * - other characters are processed by appending them to the normalized value
  3900. * If the declared value is not CDATA, then the XML processor must further
  3901. * process the normalized attribute value by discarding any leading and
  3902. * trailing space (#x20) characters, and by replacing sequences of space
  3903. * (#x20) characters by a single space (#x20) character.
  3904. * All attributes for which no declaration has been read should be treated
  3905. * by a non-validating parser as if declared CDATA.
  3906. *
  3907. * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
  3908. */
  3909. xmlChar *
  3910. xmlParseAttValue(xmlParserCtxtPtr ctxt) {
  3911. if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
  3912. return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
  3913. }
  3914. /**
  3915. * xmlParseSystemLiteral:
  3916. * @ctxt: an XML parser context
  3917. *
  3918. * parse an XML Literal
  3919. *
  3920. * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
  3921. *
  3922. * Returns the SystemLiteral parsed or NULL
  3923. */
  3924. xmlChar *
  3925. xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
  3926. xmlChar *buf = NULL;
  3927. int len = 0;
  3928. int size = XML_PARSER_BUFFER_SIZE;
  3929. int cur, l;
  3930. xmlChar stop;
  3931. int state = ctxt->instate;
  3932. int count = 0;
  3933. SHRINK;
  3934. if (RAW == '"') {
  3935. NEXT;
  3936. stop = '"';
  3937. } else if (RAW == '\'') {
  3938. NEXT;
  3939. stop = '\'';
  3940. } else {
  3941. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
  3942. return(NULL);
  3943. }
  3944. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  3945. if (buf == NULL) {
  3946. xmlErrMemory(ctxt, NULL);
  3947. return(NULL);
  3948. }
  3949. ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
  3950. cur = CUR_CHAR(l);
  3951. while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
  3952. if (len + 5 >= size) {
  3953. xmlChar *tmp;
  3954. if ((size > XML_MAX_NAME_LENGTH) &&
  3955. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  3956. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
  3957. xmlFree(buf);
  3958. ctxt->instate = (xmlParserInputState) state;
  3959. return(NULL);
  3960. }
  3961. size *= 2;
  3962. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  3963. if (tmp == NULL) {
  3964. xmlFree(buf);
  3965. xmlErrMemory(ctxt, NULL);
  3966. ctxt->instate = (xmlParserInputState) state;
  3967. return(NULL);
  3968. }
  3969. buf = tmp;
  3970. }
  3971. count++;
  3972. if (count > 50) {
  3973. SHRINK;
  3974. GROW;
  3975. count = 0;
  3976. if (ctxt->instate == XML_PARSER_EOF) {
  3977. xmlFree(buf);
  3978. return(NULL);
  3979. }
  3980. }
  3981. COPY_BUF(l,buf,len,cur);
  3982. NEXTL(l);
  3983. cur = CUR_CHAR(l);
  3984. if (cur == 0) {
  3985. GROW;
  3986. SHRINK;
  3987. cur = CUR_CHAR(l);
  3988. }
  3989. }
  3990. buf[len] = 0;
  3991. ctxt->instate = (xmlParserInputState) state;
  3992. if (!IS_CHAR(cur)) {
  3993. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
  3994. } else {
  3995. NEXT;
  3996. }
  3997. return(buf);
  3998. }
  3999. /**
  4000. * xmlParsePubidLiteral:
  4001. * @ctxt: an XML parser context
  4002. *
  4003. * parse an XML public literal
  4004. *
  4005. * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
  4006. *
  4007. * Returns the PubidLiteral parsed or NULL.
  4008. */
  4009. xmlChar *
  4010. xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
  4011. xmlChar *buf = NULL;
  4012. int len = 0;
  4013. int size = XML_PARSER_BUFFER_SIZE;
  4014. xmlChar cur;
  4015. xmlChar stop;
  4016. int count = 0;
  4017. xmlParserInputState oldstate = ctxt->instate;
  4018. SHRINK;
  4019. if (RAW == '"') {
  4020. NEXT;
  4021. stop = '"';
  4022. } else if (RAW == '\'') {
  4023. NEXT;
  4024. stop = '\'';
  4025. } else {
  4026. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
  4027. return(NULL);
  4028. }
  4029. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4030. if (buf == NULL) {
  4031. xmlErrMemory(ctxt, NULL);
  4032. return(NULL);
  4033. }
  4034. ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
  4035. cur = CUR;
  4036. while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
  4037. if (len + 1 >= size) {
  4038. xmlChar *tmp;
  4039. if ((size > XML_MAX_NAME_LENGTH) &&
  4040. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4041. xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
  4042. xmlFree(buf);
  4043. return(NULL);
  4044. }
  4045. size *= 2;
  4046. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  4047. if (tmp == NULL) {
  4048. xmlErrMemory(ctxt, NULL);
  4049. xmlFree(buf);
  4050. return(NULL);
  4051. }
  4052. buf = tmp;
  4053. }
  4054. buf[len++] = cur;
  4055. count++;
  4056. if (count > 50) {
  4057. SHRINK;
  4058. GROW;
  4059. count = 0;
  4060. if (ctxt->instate == XML_PARSER_EOF) {
  4061. xmlFree(buf);
  4062. return(NULL);
  4063. }
  4064. }
  4065. NEXT;
  4066. cur = CUR;
  4067. if (cur == 0) {
  4068. GROW;
  4069. SHRINK;
  4070. cur = CUR;
  4071. }
  4072. }
  4073. buf[len] = 0;
  4074. if (cur != stop) {
  4075. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
  4076. } else {
  4077. NEXT;
  4078. }
  4079. ctxt->instate = oldstate;
  4080. return(buf);
  4081. }
  4082. static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
  4083. /*
  4084. * used for the test in the inner loop of the char data testing
  4085. */
  4086. static const unsigned char test_char_data[256] = {
  4087. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4088. 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
  4089. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4090. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4091. 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
  4092. 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
  4093. 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  4094. 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
  4095. 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
  4096. 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
  4097. 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
  4098. 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
  4099. 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  4100. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  4101. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  4102. 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
  4103. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
  4104. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4105. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4106. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4107. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4108. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4109. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4110. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4111. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4112. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4113. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4114. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4115. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4116. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4117. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  4118. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
  4119. };
  4120. /**
  4121. * xmlParseCharData:
  4122. * @ctxt: an XML parser context
  4123. * @cdata: int indicating whether we are within a CDATA section
  4124. *
  4125. * parse a CharData section.
  4126. * if we are within a CDATA section ']]>' marks an end of section.
  4127. *
  4128. * The right angle bracket (>) may be represented using the string "&gt;",
  4129. * and must, for compatibility, be escaped using "&gt;" or a character
  4130. * reference when it appears in the string "]]>" in content, when that
  4131. * string is not marking the end of a CDATA section.
  4132. *
  4133. * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
  4134. */
  4135. void
  4136. xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
  4137. const xmlChar *in;
  4138. int nbchar = 0;
  4139. int line = ctxt->input->line;
  4140. int col = ctxt->input->col;
  4141. int ccol;
  4142. SHRINK;
  4143. GROW;
  4144. /*
  4145. * Accelerated common case where input don't need to be
  4146. * modified before passing it to the handler.
  4147. */
  4148. if (!cdata) {
  4149. in = ctxt->input->cur;
  4150. do {
  4151. get_more_space:
  4152. while (*in == 0x20) { in++; ctxt->input->col++; }
  4153. if (*in == 0xA) {
  4154. do {
  4155. ctxt->input->line++; ctxt->input->col = 1;
  4156. in++;
  4157. } while (*in == 0xA);
  4158. goto get_more_space;
  4159. }
  4160. if (*in == '<') {
  4161. nbchar = in - ctxt->input->cur;
  4162. if (nbchar > 0) {
  4163. const xmlChar *tmp = ctxt->input->cur;
  4164. ctxt->input->cur = in;
  4165. if ((ctxt->sax != NULL) &&
  4166. (ctxt->sax->ignorableWhitespace !=
  4167. ctxt->sax->characters)) {
  4168. if (areBlanks(ctxt, tmp, nbchar, 1)) {
  4169. if (ctxt->sax->ignorableWhitespace != NULL)
  4170. ctxt->sax->ignorableWhitespace(ctxt->userData,
  4171. tmp, nbchar);
  4172. } else {
  4173. if (ctxt->sax->characters != NULL)
  4174. ctxt->sax->characters(ctxt->userData,
  4175. tmp, nbchar);
  4176. if (*ctxt->space == -1)
  4177. *ctxt->space = -2;
  4178. }
  4179. } else if ((ctxt->sax != NULL) &&
  4180. (ctxt->sax->characters != NULL)) {
  4181. ctxt->sax->characters(ctxt->userData,
  4182. tmp, nbchar);
  4183. }
  4184. }
  4185. return;
  4186. }
  4187. get_more:
  4188. ccol = ctxt->input->col;
  4189. while (test_char_data[*in]) {
  4190. in++;
  4191. ccol++;
  4192. }
  4193. ctxt->input->col = ccol;
  4194. if (*in == 0xA) {
  4195. do {
  4196. ctxt->input->line++; ctxt->input->col = 1;
  4197. in++;
  4198. } while (*in == 0xA);
  4199. goto get_more;
  4200. }
  4201. if (*in == ']') {
  4202. if ((in[1] == ']') && (in[2] == '>')) {
  4203. xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
  4204. ctxt->input->cur = in + 1;
  4205. return;
  4206. }
  4207. in++;
  4208. ctxt->input->col++;
  4209. goto get_more;
  4210. }
  4211. nbchar = in - ctxt->input->cur;
  4212. if (nbchar > 0) {
  4213. if ((ctxt->sax != NULL) &&
  4214. (ctxt->sax->ignorableWhitespace !=
  4215. ctxt->sax->characters) &&
  4216. (IS_BLANK_CH(*ctxt->input->cur))) {
  4217. const xmlChar *tmp = ctxt->input->cur;
  4218. ctxt->input->cur = in;
  4219. if (areBlanks(ctxt, tmp, nbchar, 0)) {
  4220. if (ctxt->sax->ignorableWhitespace != NULL)
  4221. ctxt->sax->ignorableWhitespace(ctxt->userData,
  4222. tmp, nbchar);
  4223. } else {
  4224. if (ctxt->sax->characters != NULL)
  4225. ctxt->sax->characters(ctxt->userData,
  4226. tmp, nbchar);
  4227. if (*ctxt->space == -1)
  4228. *ctxt->space = -2;
  4229. }
  4230. line = ctxt->input->line;
  4231. col = ctxt->input->col;
  4232. } else if (ctxt->sax != NULL) {
  4233. if (ctxt->sax->characters != NULL)
  4234. ctxt->sax->characters(ctxt->userData,
  4235. ctxt->input->cur, nbchar);
  4236. line = ctxt->input->line;
  4237. col = ctxt->input->col;
  4238. }
  4239. /* something really bad happened in the SAX callback */
  4240. if (ctxt->instate != XML_PARSER_CONTENT)
  4241. return;
  4242. }
  4243. ctxt->input->cur = in;
  4244. if (*in == 0xD) {
  4245. in++;
  4246. if (*in == 0xA) {
  4247. ctxt->input->cur = in;
  4248. in++;
  4249. ctxt->input->line++; ctxt->input->col = 1;
  4250. continue; /* while */
  4251. }
  4252. in--;
  4253. }
  4254. if (*in == '<') {
  4255. return;
  4256. }
  4257. if (*in == '&') {
  4258. return;
  4259. }
  4260. SHRINK;
  4261. GROW;
  4262. if (ctxt->instate == XML_PARSER_EOF)
  4263. return;
  4264. in = ctxt->input->cur;
  4265. } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
  4266. nbchar = 0;
  4267. }
  4268. ctxt->input->line = line;
  4269. ctxt->input->col = col;
  4270. xmlParseCharDataComplex(ctxt, cdata);
  4271. }
  4272. /**
  4273. * xmlParseCharDataComplex:
  4274. * @ctxt: an XML parser context
  4275. * @cdata: int indicating whether we are within a CDATA section
  4276. *
  4277. * parse a CharData section.this is the fallback function
  4278. * of xmlParseCharData() when the parsing requires handling
  4279. * of non-ASCII characters.
  4280. */
  4281. static void
  4282. xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
  4283. xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
  4284. int nbchar = 0;
  4285. int cur, l;
  4286. int count = 0;
  4287. SHRINK;
  4288. GROW;
  4289. cur = CUR_CHAR(l);
  4290. while ((cur != '<') && /* checked */
  4291. (cur != '&') &&
  4292. (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
  4293. if ((cur == ']') && (NXT(1) == ']') &&
  4294. (NXT(2) == '>')) {
  4295. if (cdata) break;
  4296. else {
  4297. xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
  4298. }
  4299. }
  4300. COPY_BUF(l,buf,nbchar,cur);
  4301. if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
  4302. buf[nbchar] = 0;
  4303. /*
  4304. * OK the segment is to be consumed as chars.
  4305. */
  4306. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  4307. if (areBlanks(ctxt, buf, nbchar, 0)) {
  4308. if (ctxt->sax->ignorableWhitespace != NULL)
  4309. ctxt->sax->ignorableWhitespace(ctxt->userData,
  4310. buf, nbchar);
  4311. } else {
  4312. if (ctxt->sax->characters != NULL)
  4313. ctxt->sax->characters(ctxt->userData, buf, nbchar);
  4314. if ((ctxt->sax->characters !=
  4315. ctxt->sax->ignorableWhitespace) &&
  4316. (*ctxt->space == -1))
  4317. *ctxt->space = -2;
  4318. }
  4319. }
  4320. nbchar = 0;
  4321. /* something really bad happened in the SAX callback */
  4322. if (ctxt->instate != XML_PARSER_CONTENT)
  4323. return;
  4324. }
  4325. count++;
  4326. if (count > 50) {
  4327. SHRINK;
  4328. GROW;
  4329. count = 0;
  4330. if (ctxt->instate == XML_PARSER_EOF)
  4331. return;
  4332. }
  4333. NEXTL(l);
  4334. cur = CUR_CHAR(l);
  4335. }
  4336. if (nbchar != 0) {
  4337. buf[nbchar] = 0;
  4338. /*
  4339. * OK the segment is to be consumed as chars.
  4340. */
  4341. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  4342. if (areBlanks(ctxt, buf, nbchar, 0)) {
  4343. if (ctxt->sax->ignorableWhitespace != NULL)
  4344. ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
  4345. } else {
  4346. if (ctxt->sax->characters != NULL)
  4347. ctxt->sax->characters(ctxt->userData, buf, nbchar);
  4348. if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
  4349. (*ctxt->space == -1))
  4350. *ctxt->space = -2;
  4351. }
  4352. }
  4353. }
  4354. if ((cur != 0) && (!IS_CHAR(cur))) {
  4355. /* Generate the error and skip the offending character */
  4356. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4357. "PCDATA invalid Char value %d\n",
  4358. cur);
  4359. NEXTL(l);
  4360. }
  4361. }
  4362. /**
  4363. * xmlParseExternalID:
  4364. * @ctxt: an XML parser context
  4365. * @publicID: a xmlChar** receiving PubidLiteral
  4366. * @strict: indicate whether we should restrict parsing to only
  4367. * production [75], see NOTE below
  4368. *
  4369. * Parse an External ID or a Public ID
  4370. *
  4371. * NOTE: Productions [75] and [83] interact badly since [75] can generate
  4372. * 'PUBLIC' S PubidLiteral S SystemLiteral
  4373. *
  4374. * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
  4375. * | 'PUBLIC' S PubidLiteral S SystemLiteral
  4376. *
  4377. * [83] PublicID ::= 'PUBLIC' S PubidLiteral
  4378. *
  4379. * Returns the function returns SystemLiteral and in the second
  4380. * case publicID receives PubidLiteral, is strict is off
  4381. * it is possible to return NULL and have publicID set.
  4382. */
  4383. xmlChar *
  4384. xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
  4385. xmlChar *URI = NULL;
  4386. SHRINK;
  4387. *publicID = NULL;
  4388. if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
  4389. SKIP(6);
  4390. if (SKIP_BLANKS == 0) {
  4391. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4392. "Space required after 'SYSTEM'\n");
  4393. }
  4394. URI = xmlParseSystemLiteral(ctxt);
  4395. if (URI == NULL) {
  4396. xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
  4397. }
  4398. } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
  4399. SKIP(6);
  4400. if (SKIP_BLANKS == 0) {
  4401. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4402. "Space required after 'PUBLIC'\n");
  4403. }
  4404. *publicID = xmlParsePubidLiteral(ctxt);
  4405. if (*publicID == NULL) {
  4406. xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
  4407. }
  4408. if (strict) {
  4409. /*
  4410. * We don't handle [83] so "S SystemLiteral" is required.
  4411. */
  4412. if (SKIP_BLANKS == 0) {
  4413. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4414. "Space required after the Public Identifier\n");
  4415. }
  4416. } else {
  4417. /*
  4418. * We handle [83] so we return immediately, if
  4419. * "S SystemLiteral" is not detected. We skip blanks if no
  4420. * system literal was found, but this is harmless since we must
  4421. * be at the end of a NotationDecl.
  4422. */
  4423. if (SKIP_BLANKS == 0) return(NULL);
  4424. if ((CUR != '\'') && (CUR != '"')) return(NULL);
  4425. }
  4426. URI = xmlParseSystemLiteral(ctxt);
  4427. if (URI == NULL) {
  4428. xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
  4429. }
  4430. }
  4431. return(URI);
  4432. }
  4433. /**
  4434. * xmlParseCommentComplex:
  4435. * @ctxt: an XML parser context
  4436. * @buf: the already parsed part of the buffer
  4437. * @len: number of bytes in the buffer
  4438. * @size: allocated size of the buffer
  4439. *
  4440. * Skip an XML (SGML) comment <!-- .... -->
  4441. * The spec says that "For compatibility, the string "--" (double-hyphen)
  4442. * must not occur within comments. "
  4443. * This is the slow routine in case the accelerator for ascii didn't work
  4444. *
  4445. * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  4446. */
  4447. static void
  4448. xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
  4449. size_t len, size_t size) {
  4450. int q, ql;
  4451. int r, rl;
  4452. int cur, l;
  4453. size_t count = 0;
  4454. int inputid;
  4455. inputid = ctxt->input->id;
  4456. if (buf == NULL) {
  4457. len = 0;
  4458. size = XML_PARSER_BUFFER_SIZE;
  4459. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4460. if (buf == NULL) {
  4461. xmlErrMemory(ctxt, NULL);
  4462. return;
  4463. }
  4464. }
  4465. GROW; /* Assure there's enough input data */
  4466. q = CUR_CHAR(ql);
  4467. if (q == 0)
  4468. goto not_terminated;
  4469. if (!IS_CHAR(q)) {
  4470. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4471. "xmlParseComment: invalid xmlChar value %d\n",
  4472. q);
  4473. xmlFree (buf);
  4474. return;
  4475. }
  4476. NEXTL(ql);
  4477. r = CUR_CHAR(rl);
  4478. if (r == 0)
  4479. goto not_terminated;
  4480. if (!IS_CHAR(r)) {
  4481. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4482. "xmlParseComment: invalid xmlChar value %d\n",
  4483. q);
  4484. xmlFree (buf);
  4485. return;
  4486. }
  4487. NEXTL(rl);
  4488. cur = CUR_CHAR(l);
  4489. if (cur == 0)
  4490. goto not_terminated;
  4491. while (IS_CHAR(cur) && /* checked */
  4492. ((cur != '>') ||
  4493. (r != '-') || (q != '-'))) {
  4494. if ((r == '-') && (q == '-')) {
  4495. xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
  4496. }
  4497. if ((len > XML_MAX_TEXT_LENGTH) &&
  4498. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4499. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4500. "Comment too big found", NULL);
  4501. xmlFree (buf);
  4502. return;
  4503. }
  4504. if (len + 5 >= size) {
  4505. xmlChar *new_buf;
  4506. size_t new_size;
  4507. new_size = size * 2;
  4508. new_buf = (xmlChar *) xmlRealloc(buf, new_size);
  4509. if (new_buf == NULL) {
  4510. xmlFree (buf);
  4511. xmlErrMemory(ctxt, NULL);
  4512. return;
  4513. }
  4514. buf = new_buf;
  4515. size = new_size;
  4516. }
  4517. COPY_BUF(ql,buf,len,q);
  4518. q = r;
  4519. ql = rl;
  4520. r = cur;
  4521. rl = l;
  4522. count++;
  4523. if (count > 50) {
  4524. SHRINK;
  4525. GROW;
  4526. count = 0;
  4527. if (ctxt->instate == XML_PARSER_EOF) {
  4528. xmlFree(buf);
  4529. return;
  4530. }
  4531. }
  4532. NEXTL(l);
  4533. cur = CUR_CHAR(l);
  4534. if (cur == 0) {
  4535. SHRINK;
  4536. GROW;
  4537. cur = CUR_CHAR(l);
  4538. }
  4539. }
  4540. buf[len] = 0;
  4541. if (cur == 0) {
  4542. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4543. "Comment not terminated \n<!--%.50s\n", buf);
  4544. } else if (!IS_CHAR(cur)) {
  4545. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4546. "xmlParseComment: invalid xmlChar value %d\n",
  4547. cur);
  4548. } else {
  4549. if (inputid != ctxt->input->id) {
  4550. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4551. "Comment doesn't start and stop in the same"
  4552. " entity\n");
  4553. }
  4554. NEXT;
  4555. if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
  4556. (!ctxt->disableSAX))
  4557. ctxt->sax->comment(ctxt->userData, buf);
  4558. }
  4559. xmlFree(buf);
  4560. return;
  4561. not_terminated:
  4562. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4563. "Comment not terminated\n", NULL);
  4564. xmlFree(buf);
  4565. return;
  4566. }
  4567. /**
  4568. * xmlParseComment:
  4569. * @ctxt: an XML parser context
  4570. *
  4571. * Skip an XML (SGML) comment <!-- .... -->
  4572. * The spec says that "For compatibility, the string "--" (double-hyphen)
  4573. * must not occur within comments. "
  4574. *
  4575. * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  4576. */
  4577. void
  4578. xmlParseComment(xmlParserCtxtPtr ctxt) {
  4579. xmlChar *buf = NULL;
  4580. size_t size = XML_PARSER_BUFFER_SIZE;
  4581. size_t len = 0;
  4582. xmlParserInputState state;
  4583. const xmlChar *in;
  4584. size_t nbchar = 0;
  4585. int ccol;
  4586. int inputid;
  4587. /*
  4588. * Check that there is a comment right here.
  4589. */
  4590. if ((RAW != '<') || (NXT(1) != '!') ||
  4591. (NXT(2) != '-') || (NXT(3) != '-')) return;
  4592. state = ctxt->instate;
  4593. ctxt->instate = XML_PARSER_COMMENT;
  4594. inputid = ctxt->input->id;
  4595. SKIP(4);
  4596. SHRINK;
  4597. GROW;
  4598. /*
  4599. * Accelerated common case where input don't need to be
  4600. * modified before passing it to the handler.
  4601. */
  4602. in = ctxt->input->cur;
  4603. do {
  4604. if (*in == 0xA) {
  4605. do {
  4606. ctxt->input->line++; ctxt->input->col = 1;
  4607. in++;
  4608. } while (*in == 0xA);
  4609. }
  4610. get_more:
  4611. ccol = ctxt->input->col;
  4612. while (((*in > '-') && (*in <= 0x7F)) ||
  4613. ((*in >= 0x20) && (*in < '-')) ||
  4614. (*in == 0x09)) {
  4615. in++;
  4616. ccol++;
  4617. }
  4618. ctxt->input->col = ccol;
  4619. if (*in == 0xA) {
  4620. do {
  4621. ctxt->input->line++; ctxt->input->col = 1;
  4622. in++;
  4623. } while (*in == 0xA);
  4624. goto get_more;
  4625. }
  4626. nbchar = in - ctxt->input->cur;
  4627. /*
  4628. * save current set of data
  4629. */
  4630. if (nbchar > 0) {
  4631. if ((ctxt->sax != NULL) &&
  4632. (ctxt->sax->comment != NULL)) {
  4633. if (buf == NULL) {
  4634. if ((*in == '-') && (in[1] == '-'))
  4635. size = nbchar + 1;
  4636. else
  4637. size = XML_PARSER_BUFFER_SIZE + nbchar;
  4638. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4639. if (buf == NULL) {
  4640. xmlErrMemory(ctxt, NULL);
  4641. ctxt->instate = state;
  4642. return;
  4643. }
  4644. len = 0;
  4645. } else if (len + nbchar + 1 >= size) {
  4646. xmlChar *new_buf;
  4647. size += len + nbchar + XML_PARSER_BUFFER_SIZE;
  4648. new_buf = (xmlChar *) xmlRealloc(buf,
  4649. size * sizeof(xmlChar));
  4650. if (new_buf == NULL) {
  4651. xmlFree (buf);
  4652. xmlErrMemory(ctxt, NULL);
  4653. ctxt->instate = state;
  4654. return;
  4655. }
  4656. buf = new_buf;
  4657. }
  4658. memcpy(&buf[len], ctxt->input->cur, nbchar);
  4659. len += nbchar;
  4660. buf[len] = 0;
  4661. }
  4662. }
  4663. if ((len > XML_MAX_TEXT_LENGTH) &&
  4664. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4665. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4666. "Comment too big found", NULL);
  4667. xmlFree (buf);
  4668. return;
  4669. }
  4670. ctxt->input->cur = in;
  4671. if (*in == 0xA) {
  4672. in++;
  4673. ctxt->input->line++; ctxt->input->col = 1;
  4674. }
  4675. if (*in == 0xD) {
  4676. in++;
  4677. if (*in == 0xA) {
  4678. ctxt->input->cur = in;
  4679. in++;
  4680. ctxt->input->line++; ctxt->input->col = 1;
  4681. continue; /* while */
  4682. }
  4683. in--;
  4684. }
  4685. SHRINK;
  4686. GROW;
  4687. if (ctxt->instate == XML_PARSER_EOF) {
  4688. xmlFree(buf);
  4689. return;
  4690. }
  4691. in = ctxt->input->cur;
  4692. if (*in == '-') {
  4693. if (in[1] == '-') {
  4694. if (in[2] == '>') {
  4695. if (ctxt->input->id != inputid) {
  4696. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4697. "comment doesn't start and stop in the"
  4698. " same entity\n");
  4699. }
  4700. SKIP(3);
  4701. if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
  4702. (!ctxt->disableSAX)) {
  4703. if (buf != NULL)
  4704. ctxt->sax->comment(ctxt->userData, buf);
  4705. else
  4706. ctxt->sax->comment(ctxt->userData, BAD_CAST "");
  4707. }
  4708. if (buf != NULL)
  4709. xmlFree(buf);
  4710. if (ctxt->instate != XML_PARSER_EOF)
  4711. ctxt->instate = state;
  4712. return;
  4713. }
  4714. if (buf != NULL) {
  4715. xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
  4716. "Double hyphen within comment: "
  4717. "<!--%.50s\n",
  4718. buf);
  4719. } else
  4720. xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
  4721. "Double hyphen within comment\n", NULL);
  4722. if (ctxt->instate == XML_PARSER_EOF) {
  4723. xmlFree(buf);
  4724. return;
  4725. }
  4726. in++;
  4727. ctxt->input->col++;
  4728. }
  4729. in++;
  4730. ctxt->input->col++;
  4731. goto get_more;
  4732. }
  4733. } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
  4734. xmlParseCommentComplex(ctxt, buf, len, size);
  4735. ctxt->instate = state;
  4736. return;
  4737. }
  4738. /**
  4739. * xmlParsePITarget:
  4740. * @ctxt: an XML parser context
  4741. *
  4742. * parse the name of a PI
  4743. *
  4744. * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  4745. *
  4746. * Returns the PITarget name or NULL
  4747. */
  4748. const xmlChar *
  4749. xmlParsePITarget(xmlParserCtxtPtr ctxt) {
  4750. const xmlChar *name;
  4751. name = xmlParseName(ctxt);
  4752. if ((name != NULL) &&
  4753. ((name[0] == 'x') || (name[0] == 'X')) &&
  4754. ((name[1] == 'm') || (name[1] == 'M')) &&
  4755. ((name[2] == 'l') || (name[2] == 'L'))) {
  4756. int i;
  4757. if ((name[0] == 'x') && (name[1] == 'm') &&
  4758. (name[2] == 'l') && (name[3] == 0)) {
  4759. xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
  4760. "XML declaration allowed only at the start of the document\n");
  4761. return(name);
  4762. } else if (name[3] == 0) {
  4763. xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
  4764. return(name);
  4765. }
  4766. for (i = 0;;i++) {
  4767. if (xmlW3CPIs[i] == NULL) break;
  4768. if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
  4769. return(name);
  4770. }
  4771. xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
  4772. "xmlParsePITarget: invalid name prefix 'xml'\n",
  4773. NULL, NULL);
  4774. }
  4775. if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
  4776. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  4777. "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
  4778. }
  4779. return(name);
  4780. }
  4781. #ifdef LIBXML_CATALOG_ENABLED
  4782. /**
  4783. * xmlParseCatalogPI:
  4784. * @ctxt: an XML parser context
  4785. * @catalog: the PI value string
  4786. *
  4787. * parse an XML Catalog Processing Instruction.
  4788. *
  4789. * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
  4790. *
  4791. * Occurs only if allowed by the user and if happening in the Misc
  4792. * part of the document before any doctype information
  4793. * This will add the given catalog to the parsing context in order
  4794. * to be used if there is a resolution need further down in the document
  4795. */
  4796. static void
  4797. xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
  4798. xmlChar *URL = NULL;
  4799. const xmlChar *tmp, *base;
  4800. xmlChar marker;
  4801. tmp = catalog;
  4802. while (IS_BLANK_CH(*tmp)) tmp++;
  4803. if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
  4804. goto error;
  4805. tmp += 7;
  4806. while (IS_BLANK_CH(*tmp)) tmp++;
  4807. if (*tmp != '=') {
  4808. return;
  4809. }
  4810. tmp++;
  4811. while (IS_BLANK_CH(*tmp)) tmp++;
  4812. marker = *tmp;
  4813. if ((marker != '\'') && (marker != '"'))
  4814. goto error;
  4815. tmp++;
  4816. base = tmp;
  4817. while ((*tmp != 0) && (*tmp != marker)) tmp++;
  4818. if (*tmp == 0)
  4819. goto error;
  4820. URL = xmlStrndup(base, tmp - base);
  4821. tmp++;
  4822. while (IS_BLANK_CH(*tmp)) tmp++;
  4823. if (*tmp != 0)
  4824. goto error;
  4825. if (URL != NULL) {
  4826. ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
  4827. xmlFree(URL);
  4828. }
  4829. return;
  4830. error:
  4831. xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
  4832. "Catalog PI syntax error: %s\n",
  4833. catalog, NULL);
  4834. if (URL != NULL)
  4835. xmlFree(URL);
  4836. }
  4837. #endif
  4838. /**
  4839. * xmlParsePI:
  4840. * @ctxt: an XML parser context
  4841. *
  4842. * parse an XML Processing Instruction.
  4843. *
  4844. * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  4845. *
  4846. * The processing is transferred to SAX once parsed.
  4847. */
  4848. void
  4849. xmlParsePI(xmlParserCtxtPtr ctxt) {
  4850. xmlChar *buf = NULL;
  4851. size_t len = 0;
  4852. size_t size = XML_PARSER_BUFFER_SIZE;
  4853. int cur, l;
  4854. const xmlChar *target;
  4855. xmlParserInputState state;
  4856. int count = 0;
  4857. if ((RAW == '<') && (NXT(1) == '?')) {
  4858. int inputid = ctxt->input->id;
  4859. state = ctxt->instate;
  4860. ctxt->instate = XML_PARSER_PI;
  4861. /*
  4862. * this is a Processing Instruction.
  4863. */
  4864. SKIP(2);
  4865. SHRINK;
  4866. /*
  4867. * Parse the target name and check for special support like
  4868. * namespace.
  4869. */
  4870. target = xmlParsePITarget(ctxt);
  4871. if (target != NULL) {
  4872. if ((RAW == '?') && (NXT(1) == '>')) {
  4873. if (inputid != ctxt->input->id) {
  4874. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4875. "PI declaration doesn't start and stop in"
  4876. " the same entity\n");
  4877. }
  4878. SKIP(2);
  4879. /*
  4880. * SAX: PI detected.
  4881. */
  4882. if ((ctxt->sax) && (!ctxt->disableSAX) &&
  4883. (ctxt->sax->processingInstruction != NULL))
  4884. ctxt->sax->processingInstruction(ctxt->userData,
  4885. target, NULL);
  4886. if (ctxt->instate != XML_PARSER_EOF)
  4887. ctxt->instate = state;
  4888. return;
  4889. }
  4890. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4891. if (buf == NULL) {
  4892. xmlErrMemory(ctxt, NULL);
  4893. ctxt->instate = state;
  4894. return;
  4895. }
  4896. if (SKIP_BLANKS == 0) {
  4897. xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
  4898. "ParsePI: PI %s space expected\n", target);
  4899. }
  4900. cur = CUR_CHAR(l);
  4901. while (IS_CHAR(cur) && /* checked */
  4902. ((cur != '?') || (NXT(1) != '>'))) {
  4903. if (len + 5 >= size) {
  4904. xmlChar *tmp;
  4905. size_t new_size = size * 2;
  4906. tmp = (xmlChar *) xmlRealloc(buf, new_size);
  4907. if (tmp == NULL) {
  4908. xmlErrMemory(ctxt, NULL);
  4909. xmlFree(buf);
  4910. ctxt->instate = state;
  4911. return;
  4912. }
  4913. buf = tmp;
  4914. size = new_size;
  4915. }
  4916. count++;
  4917. if (count > 50) {
  4918. SHRINK;
  4919. GROW;
  4920. if (ctxt->instate == XML_PARSER_EOF) {
  4921. xmlFree(buf);
  4922. return;
  4923. }
  4924. count = 0;
  4925. if ((len > XML_MAX_TEXT_LENGTH) &&
  4926. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4927. xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
  4928. "PI %s too big found", target);
  4929. xmlFree(buf);
  4930. ctxt->instate = state;
  4931. return;
  4932. }
  4933. }
  4934. COPY_BUF(l,buf,len,cur);
  4935. NEXTL(l);
  4936. cur = CUR_CHAR(l);
  4937. if (cur == 0) {
  4938. SHRINK;
  4939. GROW;
  4940. cur = CUR_CHAR(l);
  4941. }
  4942. }
  4943. if ((len > XML_MAX_TEXT_LENGTH) &&
  4944. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  4945. xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
  4946. "PI %s too big found", target);
  4947. xmlFree(buf);
  4948. ctxt->instate = state;
  4949. return;
  4950. }
  4951. buf[len] = 0;
  4952. if (cur != '?') {
  4953. xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
  4954. "ParsePI: PI %s never end ...\n", target);
  4955. } else {
  4956. if (inputid != ctxt->input->id) {
  4957. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4958. "PI declaration doesn't start and stop in"
  4959. " the same entity\n");
  4960. }
  4961. SKIP(2);
  4962. #ifdef LIBXML_CATALOG_ENABLED
  4963. if (((state == XML_PARSER_MISC) ||
  4964. (state == XML_PARSER_START)) &&
  4965. (xmlStrEqual(target, XML_CATALOG_PI))) {
  4966. xmlCatalogAllow allow = xmlCatalogGetDefaults();
  4967. if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
  4968. (allow == XML_CATA_ALLOW_ALL))
  4969. xmlParseCatalogPI(ctxt, buf);
  4970. }
  4971. #endif
  4972. /*
  4973. * SAX: PI detected.
  4974. */
  4975. if ((ctxt->sax) && (!ctxt->disableSAX) &&
  4976. (ctxt->sax->processingInstruction != NULL))
  4977. ctxt->sax->processingInstruction(ctxt->userData,
  4978. target, buf);
  4979. }
  4980. xmlFree(buf);
  4981. } else {
  4982. xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
  4983. }
  4984. if (ctxt->instate != XML_PARSER_EOF)
  4985. ctxt->instate = state;
  4986. }
  4987. }
  4988. /**
  4989. * xmlParseNotationDecl:
  4990. * @ctxt: an XML parser context
  4991. *
  4992. * parse a notation declaration
  4993. *
  4994. * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  4995. *
  4996. * Hence there is actually 3 choices:
  4997. * 'PUBLIC' S PubidLiteral
  4998. * 'PUBLIC' S PubidLiteral S SystemLiteral
  4999. * and 'SYSTEM' S SystemLiteral
  5000. *
  5001. * See the NOTE on xmlParseExternalID().
  5002. */
  5003. void
  5004. xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
  5005. const xmlChar *name;
  5006. xmlChar *Pubid;
  5007. xmlChar *Systemid;
  5008. if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
  5009. int inputid = ctxt->input->id;
  5010. SHRINK;
  5011. SKIP(10);
  5012. if (SKIP_BLANKS == 0) {
  5013. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5014. "Space required after '<!NOTATION'\n");
  5015. return;
  5016. }
  5017. name = xmlParseName(ctxt);
  5018. if (name == NULL) {
  5019. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
  5020. return;
  5021. }
  5022. if (xmlStrchr(name, ':') != NULL) {
  5023. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  5024. "colons are forbidden from notation names '%s'\n",
  5025. name, NULL, NULL);
  5026. }
  5027. if (SKIP_BLANKS == 0) {
  5028. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5029. "Space required after the NOTATION name'\n");
  5030. return;
  5031. }
  5032. /*
  5033. * Parse the IDs.
  5034. */
  5035. Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
  5036. SKIP_BLANKS;
  5037. if (RAW == '>') {
  5038. if (inputid != ctxt->input->id) {
  5039. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5040. "Notation declaration doesn't start and stop"
  5041. " in the same entity\n");
  5042. }
  5043. NEXT;
  5044. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  5045. (ctxt->sax->notationDecl != NULL))
  5046. ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
  5047. } else {
  5048. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
  5049. }
  5050. if (Systemid != NULL) xmlFree(Systemid);
  5051. if (Pubid != NULL) xmlFree(Pubid);
  5052. }
  5053. }
  5054. /**
  5055. * xmlParseEntityDecl:
  5056. * @ctxt: an XML parser context
  5057. *
  5058. * parse <!ENTITY declarations
  5059. *
  5060. * [70] EntityDecl ::= GEDecl | PEDecl
  5061. *
  5062. * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
  5063. *
  5064. * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
  5065. *
  5066. * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  5067. *
  5068. * [74] PEDef ::= EntityValue | ExternalID
  5069. *
  5070. * [76] NDataDecl ::= S 'NDATA' S Name
  5071. *
  5072. * [ VC: Notation Declared ]
  5073. * The Name must match the declared name of a notation.
  5074. */
  5075. void
  5076. xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
  5077. const xmlChar *name = NULL;
  5078. xmlChar *value = NULL;
  5079. xmlChar *URI = NULL, *literal = NULL;
  5080. const xmlChar *ndata = NULL;
  5081. int isParameter = 0;
  5082. xmlChar *orig = NULL;
  5083. /* GROW; done in the caller */
  5084. if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
  5085. int inputid = ctxt->input->id;
  5086. SHRINK;
  5087. SKIP(8);
  5088. if (SKIP_BLANKS == 0) {
  5089. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5090. "Space required after '<!ENTITY'\n");
  5091. }
  5092. if (RAW == '%') {
  5093. NEXT;
  5094. if (SKIP_BLANKS == 0) {
  5095. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5096. "Space required after '%%'\n");
  5097. }
  5098. isParameter = 1;
  5099. }
  5100. name = xmlParseName(ctxt);
  5101. if (name == NULL) {
  5102. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5103. "xmlParseEntityDecl: no name\n");
  5104. return;
  5105. }
  5106. if (xmlStrchr(name, ':') != NULL) {
  5107. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  5108. "colons are forbidden from entities names '%s'\n",
  5109. name, NULL, NULL);
  5110. }
  5111. if (SKIP_BLANKS == 0) {
  5112. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5113. "Space required after the entity name\n");
  5114. }
  5115. ctxt->instate = XML_PARSER_ENTITY_DECL;
  5116. /*
  5117. * handle the various case of definitions...
  5118. */
  5119. if (isParameter) {
  5120. if ((RAW == '"') || (RAW == '\'')) {
  5121. value = xmlParseEntityValue(ctxt, &orig);
  5122. if (value) {
  5123. if ((ctxt->sax != NULL) &&
  5124. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  5125. ctxt->sax->entityDecl(ctxt->userData, name,
  5126. XML_INTERNAL_PARAMETER_ENTITY,
  5127. NULL, NULL, value);
  5128. }
  5129. } else {
  5130. URI = xmlParseExternalID(ctxt, &literal, 1);
  5131. if ((URI == NULL) && (literal == NULL)) {
  5132. xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
  5133. }
  5134. if (URI) {
  5135. xmlURIPtr uri;
  5136. uri = xmlParseURI((const char *) URI);
  5137. if (uri == NULL) {
  5138. xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
  5139. "Invalid URI: %s\n", URI);
  5140. /*
  5141. * This really ought to be a well formedness error
  5142. * but the XML Core WG decided otherwise c.f. issue
  5143. * E26 of the XML erratas.
  5144. */
  5145. } else {
  5146. if (uri->fragment != NULL) {
  5147. /*
  5148. * Okay this is foolish to block those but not
  5149. * invalid URIs.
  5150. */
  5151. xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
  5152. } else {
  5153. if ((ctxt->sax != NULL) &&
  5154. (!ctxt->disableSAX) &&
  5155. (ctxt->sax->entityDecl != NULL))
  5156. ctxt->sax->entityDecl(ctxt->userData, name,
  5157. XML_EXTERNAL_PARAMETER_ENTITY,
  5158. literal, URI, NULL);
  5159. }
  5160. xmlFreeURI(uri);
  5161. }
  5162. }
  5163. }
  5164. } else {
  5165. if ((RAW == '"') || (RAW == '\'')) {
  5166. value = xmlParseEntityValue(ctxt, &orig);
  5167. if ((ctxt->sax != NULL) &&
  5168. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  5169. ctxt->sax->entityDecl(ctxt->userData, name,
  5170. XML_INTERNAL_GENERAL_ENTITY,
  5171. NULL, NULL, value);
  5172. /*
  5173. * For expat compatibility in SAX mode.
  5174. */
  5175. if ((ctxt->myDoc == NULL) ||
  5176. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
  5177. if (ctxt->myDoc == NULL) {
  5178. ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
  5179. if (ctxt->myDoc == NULL) {
  5180. xmlErrMemory(ctxt, "New Doc failed");
  5181. return;
  5182. }
  5183. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  5184. }
  5185. if (ctxt->myDoc->intSubset == NULL)
  5186. ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
  5187. BAD_CAST "fake", NULL, NULL);
  5188. xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
  5189. NULL, NULL, value);
  5190. }
  5191. } else {
  5192. URI = xmlParseExternalID(ctxt, &literal, 1);
  5193. if ((URI == NULL) && (literal == NULL)) {
  5194. xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
  5195. }
  5196. if (URI) {
  5197. xmlURIPtr uri;
  5198. uri = xmlParseURI((const char *)URI);
  5199. if (uri == NULL) {
  5200. xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
  5201. "Invalid URI: %s\n", URI);
  5202. /*
  5203. * This really ought to be a well formedness error
  5204. * but the XML Core WG decided otherwise c.f. issue
  5205. * E26 of the XML erratas.
  5206. */
  5207. } else {
  5208. if (uri->fragment != NULL) {
  5209. /*
  5210. * Okay this is foolish to block those but not
  5211. * invalid URIs.
  5212. */
  5213. xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
  5214. }
  5215. xmlFreeURI(uri);
  5216. }
  5217. }
  5218. if ((RAW != '>') && (SKIP_BLANKS == 0)) {
  5219. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5220. "Space required before 'NDATA'\n");
  5221. }
  5222. if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
  5223. SKIP(5);
  5224. if (SKIP_BLANKS == 0) {
  5225. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5226. "Space required after 'NDATA'\n");
  5227. }
  5228. ndata = xmlParseName(ctxt);
  5229. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  5230. (ctxt->sax->unparsedEntityDecl != NULL))
  5231. ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
  5232. literal, URI, ndata);
  5233. } else {
  5234. if ((ctxt->sax != NULL) &&
  5235. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  5236. ctxt->sax->entityDecl(ctxt->userData, name,
  5237. XML_EXTERNAL_GENERAL_PARSED_ENTITY,
  5238. literal, URI, NULL);
  5239. /*
  5240. * For expat compatibility in SAX mode.
  5241. * assuming the entity replacement was asked for
  5242. */
  5243. if ((ctxt->replaceEntities != 0) &&
  5244. ((ctxt->myDoc == NULL) ||
  5245. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
  5246. if (ctxt->myDoc == NULL) {
  5247. ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
  5248. if (ctxt->myDoc == NULL) {
  5249. xmlErrMemory(ctxt, "New Doc failed");
  5250. return;
  5251. }
  5252. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  5253. }
  5254. if (ctxt->myDoc->intSubset == NULL)
  5255. ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
  5256. BAD_CAST "fake", NULL, NULL);
  5257. xmlSAX2EntityDecl(ctxt, name,
  5258. XML_EXTERNAL_GENERAL_PARSED_ENTITY,
  5259. literal, URI, NULL);
  5260. }
  5261. }
  5262. }
  5263. }
  5264. if (ctxt->instate == XML_PARSER_EOF)
  5265. goto done;
  5266. SKIP_BLANKS;
  5267. if (RAW != '>') {
  5268. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
  5269. "xmlParseEntityDecl: entity %s not terminated\n", name);
  5270. xmlHaltParser(ctxt);
  5271. } else {
  5272. if (inputid != ctxt->input->id) {
  5273. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5274. "Entity declaration doesn't start and stop in"
  5275. " the same entity\n");
  5276. }
  5277. NEXT;
  5278. }
  5279. if (orig != NULL) {
  5280. /*
  5281. * Ugly mechanism to save the raw entity value.
  5282. */
  5283. xmlEntityPtr cur = NULL;
  5284. if (isParameter) {
  5285. if ((ctxt->sax != NULL) &&
  5286. (ctxt->sax->getParameterEntity != NULL))
  5287. cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
  5288. } else {
  5289. if ((ctxt->sax != NULL) &&
  5290. (ctxt->sax->getEntity != NULL))
  5291. cur = ctxt->sax->getEntity(ctxt->userData, name);
  5292. if ((cur == NULL) && (ctxt->userData==ctxt)) {
  5293. cur = xmlSAX2GetEntity(ctxt, name);
  5294. }
  5295. }
  5296. if ((cur != NULL) && (cur->orig == NULL)) {
  5297. cur->orig = orig;
  5298. orig = NULL;
  5299. }
  5300. }
  5301. done:
  5302. if (value != NULL) xmlFree(value);
  5303. if (URI != NULL) xmlFree(URI);
  5304. if (literal != NULL) xmlFree(literal);
  5305. if (orig != NULL) xmlFree(orig);
  5306. }
  5307. }
  5308. /**
  5309. * xmlParseDefaultDecl:
  5310. * @ctxt: an XML parser context
  5311. * @value: Receive a possible fixed default value for the attribute
  5312. *
  5313. * Parse an attribute default declaration
  5314. *
  5315. * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
  5316. *
  5317. * [ VC: Required Attribute ]
  5318. * if the default declaration is the keyword #REQUIRED, then the
  5319. * attribute must be specified for all elements of the type in the
  5320. * attribute-list declaration.
  5321. *
  5322. * [ VC: Attribute Default Legal ]
  5323. * The declared default value must meet the lexical constraints of
  5324. * the declared attribute type c.f. xmlValidateAttributeDecl()
  5325. *
  5326. * [ VC: Fixed Attribute Default ]
  5327. * if an attribute has a default value declared with the #FIXED
  5328. * keyword, instances of that attribute must match the default value.
  5329. *
  5330. * [ WFC: No < in Attribute Values ]
  5331. * handled in xmlParseAttValue()
  5332. *
  5333. * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
  5334. * or XML_ATTRIBUTE_FIXED.
  5335. */
  5336. int
  5337. xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
  5338. int val;
  5339. xmlChar *ret;
  5340. *value = NULL;
  5341. if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
  5342. SKIP(9);
  5343. return(XML_ATTRIBUTE_REQUIRED);
  5344. }
  5345. if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
  5346. SKIP(8);
  5347. return(XML_ATTRIBUTE_IMPLIED);
  5348. }
  5349. val = XML_ATTRIBUTE_NONE;
  5350. if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
  5351. SKIP(6);
  5352. val = XML_ATTRIBUTE_FIXED;
  5353. if (SKIP_BLANKS == 0) {
  5354. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5355. "Space required after '#FIXED'\n");
  5356. }
  5357. }
  5358. ret = xmlParseAttValue(ctxt);
  5359. ctxt->instate = XML_PARSER_DTD;
  5360. if (ret == NULL) {
  5361. xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
  5362. "Attribute default value declaration error\n");
  5363. } else
  5364. *value = ret;
  5365. return(val);
  5366. }
  5367. /**
  5368. * xmlParseNotationType:
  5369. * @ctxt: an XML parser context
  5370. *
  5371. * parse an Notation attribute type.
  5372. *
  5373. * Note: the leading 'NOTATION' S part has already being parsed...
  5374. *
  5375. * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  5376. *
  5377. * [ VC: Notation Attributes ]
  5378. * Values of this type must match one of the notation names included
  5379. * in the declaration; all notation names in the declaration must be declared.
  5380. *
  5381. * Returns: the notation attribute tree built while parsing
  5382. */
  5383. xmlEnumerationPtr
  5384. xmlParseNotationType(xmlParserCtxtPtr ctxt) {
  5385. const xmlChar *name;
  5386. xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
  5387. if (RAW != '(') {
  5388. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
  5389. return(NULL);
  5390. }
  5391. SHRINK;
  5392. do {
  5393. NEXT;
  5394. SKIP_BLANKS;
  5395. name = xmlParseName(ctxt);
  5396. if (name == NULL) {
  5397. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5398. "Name expected in NOTATION declaration\n");
  5399. xmlFreeEnumeration(ret);
  5400. return(NULL);
  5401. }
  5402. tmp = ret;
  5403. while (tmp != NULL) {
  5404. if (xmlStrEqual(name, tmp->name)) {
  5405. xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
  5406. "standalone: attribute notation value token %s duplicated\n",
  5407. name, NULL);
  5408. if (!xmlDictOwns(ctxt->dict, name))
  5409. xmlFree((xmlChar *) name);
  5410. break;
  5411. }
  5412. tmp = tmp->next;
  5413. }
  5414. if (tmp == NULL) {
  5415. cur = xmlCreateEnumeration(name);
  5416. if (cur == NULL) {
  5417. xmlFreeEnumeration(ret);
  5418. return(NULL);
  5419. }
  5420. if (last == NULL) ret = last = cur;
  5421. else {
  5422. last->next = cur;
  5423. last = cur;
  5424. }
  5425. }
  5426. SKIP_BLANKS;
  5427. } while (RAW == '|');
  5428. if (RAW != ')') {
  5429. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
  5430. xmlFreeEnumeration(ret);
  5431. return(NULL);
  5432. }
  5433. NEXT;
  5434. return(ret);
  5435. }
  5436. /**
  5437. * xmlParseEnumerationType:
  5438. * @ctxt: an XML parser context
  5439. *
  5440. * parse an Enumeration attribute type.
  5441. *
  5442. * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  5443. *
  5444. * [ VC: Enumeration ]
  5445. * Values of this type must match one of the Nmtoken tokens in
  5446. * the declaration
  5447. *
  5448. * Returns: the enumeration attribute tree built while parsing
  5449. */
  5450. xmlEnumerationPtr
  5451. xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
  5452. xmlChar *name;
  5453. xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
  5454. if (RAW != '(') {
  5455. xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
  5456. return(NULL);
  5457. }
  5458. SHRINK;
  5459. do {
  5460. NEXT;
  5461. SKIP_BLANKS;
  5462. name = xmlParseNmtoken(ctxt);
  5463. if (name == NULL) {
  5464. xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
  5465. return(ret);
  5466. }
  5467. tmp = ret;
  5468. while (tmp != NULL) {
  5469. if (xmlStrEqual(name, tmp->name)) {
  5470. xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
  5471. "standalone: attribute enumeration value token %s duplicated\n",
  5472. name, NULL);
  5473. if (!xmlDictOwns(ctxt->dict, name))
  5474. xmlFree(name);
  5475. break;
  5476. }
  5477. tmp = tmp->next;
  5478. }
  5479. if (tmp == NULL) {
  5480. cur = xmlCreateEnumeration(name);
  5481. if (!xmlDictOwns(ctxt->dict, name))
  5482. xmlFree(name);
  5483. if (cur == NULL) {
  5484. xmlFreeEnumeration(ret);
  5485. return(NULL);
  5486. }
  5487. if (last == NULL) ret = last = cur;
  5488. else {
  5489. last->next = cur;
  5490. last = cur;
  5491. }
  5492. }
  5493. SKIP_BLANKS;
  5494. } while (RAW == '|');
  5495. if (RAW != ')') {
  5496. xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
  5497. return(ret);
  5498. }
  5499. NEXT;
  5500. return(ret);
  5501. }
  5502. /**
  5503. * xmlParseEnumeratedType:
  5504. * @ctxt: an XML parser context
  5505. * @tree: the enumeration tree built while parsing
  5506. *
  5507. * parse an Enumerated attribute type.
  5508. *
  5509. * [57] EnumeratedType ::= NotationType | Enumeration
  5510. *
  5511. * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  5512. *
  5513. *
  5514. * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
  5515. */
  5516. int
  5517. xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
  5518. if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
  5519. SKIP(8);
  5520. if (SKIP_BLANKS == 0) {
  5521. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5522. "Space required after 'NOTATION'\n");
  5523. return(0);
  5524. }
  5525. *tree = xmlParseNotationType(ctxt);
  5526. if (*tree == NULL) return(0);
  5527. return(XML_ATTRIBUTE_NOTATION);
  5528. }
  5529. *tree = xmlParseEnumerationType(ctxt);
  5530. if (*tree == NULL) return(0);
  5531. return(XML_ATTRIBUTE_ENUMERATION);
  5532. }
  5533. /**
  5534. * xmlParseAttributeType:
  5535. * @ctxt: an XML parser context
  5536. * @tree: the enumeration tree built while parsing
  5537. *
  5538. * parse the Attribute list def for an element
  5539. *
  5540. * [54] AttType ::= StringType | TokenizedType | EnumeratedType
  5541. *
  5542. * [55] StringType ::= 'CDATA'
  5543. *
  5544. * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
  5545. * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
  5546. *
  5547. * Validity constraints for attribute values syntax are checked in
  5548. * xmlValidateAttributeValue()
  5549. *
  5550. * [ VC: ID ]
  5551. * Values of type ID must match the Name production. A name must not
  5552. * appear more than once in an XML document as a value of this type;
  5553. * i.e., ID values must uniquely identify the elements which bear them.
  5554. *
  5555. * [ VC: One ID per Element Type ]
  5556. * No element type may have more than one ID attribute specified.
  5557. *
  5558. * [ VC: ID Attribute Default ]
  5559. * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
  5560. *
  5561. * [ VC: IDREF ]
  5562. * Values of type IDREF must match the Name production, and values
  5563. * of type IDREFS must match Names; each IDREF Name must match the value
  5564. * of an ID attribute on some element in the XML document; i.e. IDREF
  5565. * values must match the value of some ID attribute.
  5566. *
  5567. * [ VC: Entity Name ]
  5568. * Values of type ENTITY must match the Name production, values
  5569. * of type ENTITIES must match Names; each Entity Name must match the
  5570. * name of an unparsed entity declared in the DTD.
  5571. *
  5572. * [ VC: Name Token ]
  5573. * Values of type NMTOKEN must match the Nmtoken production; values
  5574. * of type NMTOKENS must match Nmtokens.
  5575. *
  5576. * Returns the attribute type
  5577. */
  5578. int
  5579. xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
  5580. SHRINK;
  5581. if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
  5582. SKIP(5);
  5583. return(XML_ATTRIBUTE_CDATA);
  5584. } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
  5585. SKIP(6);
  5586. return(XML_ATTRIBUTE_IDREFS);
  5587. } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
  5588. SKIP(5);
  5589. return(XML_ATTRIBUTE_IDREF);
  5590. } else if ((RAW == 'I') && (NXT(1) == 'D')) {
  5591. SKIP(2);
  5592. return(XML_ATTRIBUTE_ID);
  5593. } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
  5594. SKIP(6);
  5595. return(XML_ATTRIBUTE_ENTITY);
  5596. } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
  5597. SKIP(8);
  5598. return(XML_ATTRIBUTE_ENTITIES);
  5599. } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
  5600. SKIP(8);
  5601. return(XML_ATTRIBUTE_NMTOKENS);
  5602. } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
  5603. SKIP(7);
  5604. return(XML_ATTRIBUTE_NMTOKEN);
  5605. }
  5606. return(xmlParseEnumeratedType(ctxt, tree));
  5607. }
  5608. /**
  5609. * xmlParseAttributeListDecl:
  5610. * @ctxt: an XML parser context
  5611. *
  5612. * : parse the Attribute list def for an element
  5613. *
  5614. * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  5615. *
  5616. * [53] AttDef ::= S Name S AttType S DefaultDecl
  5617. *
  5618. */
  5619. void
  5620. xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
  5621. const xmlChar *elemName;
  5622. const xmlChar *attrName;
  5623. xmlEnumerationPtr tree;
  5624. if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
  5625. int inputid = ctxt->input->id;
  5626. SKIP(9);
  5627. if (SKIP_BLANKS == 0) {
  5628. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5629. "Space required after '<!ATTLIST'\n");
  5630. }
  5631. elemName = xmlParseName(ctxt);
  5632. if (elemName == NULL) {
  5633. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5634. "ATTLIST: no name for Element\n");
  5635. return;
  5636. }
  5637. SKIP_BLANKS;
  5638. GROW;
  5639. while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
  5640. int type;
  5641. int def;
  5642. xmlChar *defaultValue = NULL;
  5643. GROW;
  5644. tree = NULL;
  5645. attrName = xmlParseName(ctxt);
  5646. if (attrName == NULL) {
  5647. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5648. "ATTLIST: no name for Attribute\n");
  5649. break;
  5650. }
  5651. GROW;
  5652. if (SKIP_BLANKS == 0) {
  5653. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5654. "Space required after the attribute name\n");
  5655. break;
  5656. }
  5657. type = xmlParseAttributeType(ctxt, &tree);
  5658. if (type <= 0) {
  5659. break;
  5660. }
  5661. GROW;
  5662. if (SKIP_BLANKS == 0) {
  5663. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5664. "Space required after the attribute type\n");
  5665. if (tree != NULL)
  5666. xmlFreeEnumeration(tree);
  5667. break;
  5668. }
  5669. def = xmlParseDefaultDecl(ctxt, &defaultValue);
  5670. if (def <= 0) {
  5671. if (defaultValue != NULL)
  5672. xmlFree(defaultValue);
  5673. if (tree != NULL)
  5674. xmlFreeEnumeration(tree);
  5675. break;
  5676. }
  5677. if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
  5678. xmlAttrNormalizeSpace(defaultValue, defaultValue);
  5679. GROW;
  5680. if (RAW != '>') {
  5681. if (SKIP_BLANKS == 0) {
  5682. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5683. "Space required after the attribute default value\n");
  5684. if (defaultValue != NULL)
  5685. xmlFree(defaultValue);
  5686. if (tree != NULL)
  5687. xmlFreeEnumeration(tree);
  5688. break;
  5689. }
  5690. }
  5691. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  5692. (ctxt->sax->attributeDecl != NULL))
  5693. ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
  5694. type, def, defaultValue, tree);
  5695. else if (tree != NULL)
  5696. xmlFreeEnumeration(tree);
  5697. if ((ctxt->sax2) && (defaultValue != NULL) &&
  5698. (def != XML_ATTRIBUTE_IMPLIED) &&
  5699. (def != XML_ATTRIBUTE_REQUIRED)) {
  5700. xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
  5701. }
  5702. if (ctxt->sax2) {
  5703. xmlAddSpecialAttr(ctxt, elemName, attrName, type);
  5704. }
  5705. if (defaultValue != NULL)
  5706. xmlFree(defaultValue);
  5707. GROW;
  5708. }
  5709. if (RAW == '>') {
  5710. if (inputid != ctxt->input->id) {
  5711. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5712. "Attribute list declaration doesn't start and"
  5713. " stop in the same entity\n");
  5714. }
  5715. NEXT;
  5716. }
  5717. }
  5718. }
  5719. /**
  5720. * xmlParseElementMixedContentDecl:
  5721. * @ctxt: an XML parser context
  5722. * @inputchk: the input used for the current entity, needed for boundary checks
  5723. *
  5724. * parse the declaration for a Mixed Element content
  5725. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  5726. *
  5727. * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
  5728. * '(' S? '#PCDATA' S? ')'
  5729. *
  5730. * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
  5731. *
  5732. * [ VC: No Duplicate Types ]
  5733. * The same name must not appear more than once in a single
  5734. * mixed-content declaration.
  5735. *
  5736. * returns: the list of the xmlElementContentPtr describing the element choices
  5737. */
  5738. xmlElementContentPtr
  5739. xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
  5740. xmlElementContentPtr ret = NULL, cur = NULL, n;
  5741. const xmlChar *elem = NULL;
  5742. GROW;
  5743. if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
  5744. SKIP(7);
  5745. SKIP_BLANKS;
  5746. SHRINK;
  5747. if (RAW == ')') {
  5748. if (ctxt->input->id != inputchk) {
  5749. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5750. "Element content declaration doesn't start and"
  5751. " stop in the same entity\n");
  5752. }
  5753. NEXT;
  5754. ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
  5755. if (ret == NULL)
  5756. return(NULL);
  5757. if (RAW == '*') {
  5758. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5759. NEXT;
  5760. }
  5761. return(ret);
  5762. }
  5763. if ((RAW == '(') || (RAW == '|')) {
  5764. ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
  5765. if (ret == NULL) return(NULL);
  5766. }
  5767. while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
  5768. NEXT;
  5769. if (elem == NULL) {
  5770. ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5771. if (ret == NULL) {
  5772. xmlFreeDocElementContent(ctxt->myDoc, cur);
  5773. return(NULL);
  5774. }
  5775. ret->c1 = cur;
  5776. if (cur != NULL)
  5777. cur->parent = ret;
  5778. cur = ret;
  5779. } else {
  5780. n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5781. if (n == NULL) {
  5782. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5783. return(NULL);
  5784. }
  5785. n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  5786. if (n->c1 != NULL)
  5787. n->c1->parent = n;
  5788. cur->c2 = n;
  5789. if (n != NULL)
  5790. n->parent = cur;
  5791. cur = n;
  5792. }
  5793. SKIP_BLANKS;
  5794. elem = xmlParseName(ctxt);
  5795. if (elem == NULL) {
  5796. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5797. "xmlParseElementMixedContentDecl : Name expected\n");
  5798. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5799. return(NULL);
  5800. }
  5801. SKIP_BLANKS;
  5802. GROW;
  5803. }
  5804. if ((RAW == ')') && (NXT(1) == '*')) {
  5805. if (elem != NULL) {
  5806. cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
  5807. XML_ELEMENT_CONTENT_ELEMENT);
  5808. if (cur->c2 != NULL)
  5809. cur->c2->parent = cur;
  5810. }
  5811. if (ret != NULL)
  5812. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5813. if (ctxt->input->id != inputchk) {
  5814. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5815. "Element content declaration doesn't start and"
  5816. " stop in the same entity\n");
  5817. }
  5818. SKIP(2);
  5819. } else {
  5820. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5821. xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
  5822. return(NULL);
  5823. }
  5824. } else {
  5825. xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
  5826. }
  5827. return(ret);
  5828. }
  5829. /**
  5830. * xmlParseElementChildrenContentDeclPriv:
  5831. * @ctxt: an XML parser context
  5832. * @inputchk: the input used for the current entity, needed for boundary checks
  5833. * @depth: the level of recursion
  5834. *
  5835. * parse the declaration for a Mixed Element content
  5836. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  5837. *
  5838. *
  5839. * [47] children ::= (choice | seq) ('?' | '*' | '+')?
  5840. *
  5841. * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  5842. *
  5843. * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
  5844. *
  5845. * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  5846. *
  5847. * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
  5848. * TODO Parameter-entity replacement text must be properly nested
  5849. * with parenthesized groups. That is to say, if either of the
  5850. * opening or closing parentheses in a choice, seq, or Mixed
  5851. * construct is contained in the replacement text for a parameter
  5852. * entity, both must be contained in the same replacement text. For
  5853. * interoperability, if a parameter-entity reference appears in a
  5854. * choice, seq, or Mixed construct, its replacement text should not
  5855. * be empty, and neither the first nor last non-blank character of
  5856. * the replacement text should be a connector (| or ,).
  5857. *
  5858. * Returns the tree of xmlElementContentPtr describing the element
  5859. * hierarchy.
  5860. */
  5861. static xmlElementContentPtr
  5862. xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
  5863. int depth) {
  5864. xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
  5865. const xmlChar *elem;
  5866. xmlChar type = 0;
  5867. if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
  5868. (depth > 2048)) {
  5869. xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
  5870. "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
  5871. depth);
  5872. return(NULL);
  5873. }
  5874. SKIP_BLANKS;
  5875. GROW;
  5876. if (RAW == '(') {
  5877. int inputid = ctxt->input->id;
  5878. /* Recurse on first child */
  5879. NEXT;
  5880. SKIP_BLANKS;
  5881. cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
  5882. depth + 1);
  5883. if (cur == NULL)
  5884. return(NULL);
  5885. SKIP_BLANKS;
  5886. GROW;
  5887. } else {
  5888. elem = xmlParseName(ctxt);
  5889. if (elem == NULL) {
  5890. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
  5891. return(NULL);
  5892. }
  5893. cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  5894. if (cur == NULL) {
  5895. xmlErrMemory(ctxt, NULL);
  5896. return(NULL);
  5897. }
  5898. GROW;
  5899. if (RAW == '?') {
  5900. cur->ocur = XML_ELEMENT_CONTENT_OPT;
  5901. NEXT;
  5902. } else if (RAW == '*') {
  5903. cur->ocur = XML_ELEMENT_CONTENT_MULT;
  5904. NEXT;
  5905. } else if (RAW == '+') {
  5906. cur->ocur = XML_ELEMENT_CONTENT_PLUS;
  5907. NEXT;
  5908. } else {
  5909. cur->ocur = XML_ELEMENT_CONTENT_ONCE;
  5910. }
  5911. GROW;
  5912. }
  5913. SKIP_BLANKS;
  5914. SHRINK;
  5915. while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
  5916. /*
  5917. * Each loop we parse one separator and one element.
  5918. */
  5919. if (RAW == ',') {
  5920. if (type == 0) type = CUR;
  5921. /*
  5922. * Detect "Name | Name , Name" error
  5923. */
  5924. else if (type != CUR) {
  5925. xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
  5926. "xmlParseElementChildrenContentDecl : '%c' expected\n",
  5927. type);
  5928. if ((last != NULL) && (last != ret))
  5929. xmlFreeDocElementContent(ctxt->myDoc, last);
  5930. if (ret != NULL)
  5931. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5932. return(NULL);
  5933. }
  5934. NEXT;
  5935. op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
  5936. if (op == NULL) {
  5937. if ((last != NULL) && (last != ret))
  5938. xmlFreeDocElementContent(ctxt->myDoc, last);
  5939. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5940. return(NULL);
  5941. }
  5942. if (last == NULL) {
  5943. op->c1 = ret;
  5944. if (ret != NULL)
  5945. ret->parent = op;
  5946. ret = cur = op;
  5947. } else {
  5948. cur->c2 = op;
  5949. if (op != NULL)
  5950. op->parent = cur;
  5951. op->c1 = last;
  5952. if (last != NULL)
  5953. last->parent = op;
  5954. cur =op;
  5955. last = NULL;
  5956. }
  5957. } else if (RAW == '|') {
  5958. if (type == 0) type = CUR;
  5959. /*
  5960. * Detect "Name , Name | Name" error
  5961. */
  5962. else if (type != CUR) {
  5963. xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
  5964. "xmlParseElementChildrenContentDecl : '%c' expected\n",
  5965. type);
  5966. if ((last != NULL) && (last != ret))
  5967. xmlFreeDocElementContent(ctxt->myDoc, last);
  5968. if (ret != NULL)
  5969. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5970. return(NULL);
  5971. }
  5972. NEXT;
  5973. op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5974. if (op == NULL) {
  5975. if ((last != NULL) && (last != ret))
  5976. xmlFreeDocElementContent(ctxt->myDoc, last);
  5977. if (ret != NULL)
  5978. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5979. return(NULL);
  5980. }
  5981. if (last == NULL) {
  5982. op->c1 = ret;
  5983. if (ret != NULL)
  5984. ret->parent = op;
  5985. ret = cur = op;
  5986. } else {
  5987. cur->c2 = op;
  5988. if (op != NULL)
  5989. op->parent = cur;
  5990. op->c1 = last;
  5991. if (last != NULL)
  5992. last->parent = op;
  5993. cur =op;
  5994. last = NULL;
  5995. }
  5996. } else {
  5997. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
  5998. if ((last != NULL) && (last != ret))
  5999. xmlFreeDocElementContent(ctxt->myDoc, last);
  6000. if (ret != NULL)
  6001. xmlFreeDocElementContent(ctxt->myDoc, ret);
  6002. return(NULL);
  6003. }
  6004. GROW;
  6005. SKIP_BLANKS;
  6006. GROW;
  6007. if (RAW == '(') {
  6008. int inputid = ctxt->input->id;
  6009. /* Recurse on second child */
  6010. NEXT;
  6011. SKIP_BLANKS;
  6012. last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
  6013. depth + 1);
  6014. if (last == NULL) {
  6015. if (ret != NULL)
  6016. xmlFreeDocElementContent(ctxt->myDoc, ret);
  6017. return(NULL);
  6018. }
  6019. SKIP_BLANKS;
  6020. } else {
  6021. elem = xmlParseName(ctxt);
  6022. if (elem == NULL) {
  6023. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
  6024. if (ret != NULL)
  6025. xmlFreeDocElementContent(ctxt->myDoc, ret);
  6026. return(NULL);
  6027. }
  6028. last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  6029. if (last == NULL) {
  6030. if (ret != NULL)
  6031. xmlFreeDocElementContent(ctxt->myDoc, ret);
  6032. return(NULL);
  6033. }
  6034. if (RAW == '?') {
  6035. last->ocur = XML_ELEMENT_CONTENT_OPT;
  6036. NEXT;
  6037. } else if (RAW == '*') {
  6038. last->ocur = XML_ELEMENT_CONTENT_MULT;
  6039. NEXT;
  6040. } else if (RAW == '+') {
  6041. last->ocur = XML_ELEMENT_CONTENT_PLUS;
  6042. NEXT;
  6043. } else {
  6044. last->ocur = XML_ELEMENT_CONTENT_ONCE;
  6045. }
  6046. }
  6047. SKIP_BLANKS;
  6048. GROW;
  6049. }
  6050. if ((cur != NULL) && (last != NULL)) {
  6051. cur->c2 = last;
  6052. if (last != NULL)
  6053. last->parent = cur;
  6054. }
  6055. if (ctxt->input->id != inputchk) {
  6056. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6057. "Element content declaration doesn't start and stop in"
  6058. " the same entity\n");
  6059. }
  6060. NEXT;
  6061. if (RAW == '?') {
  6062. if (ret != NULL) {
  6063. if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
  6064. (ret->ocur == XML_ELEMENT_CONTENT_MULT))
  6065. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  6066. else
  6067. ret->ocur = XML_ELEMENT_CONTENT_OPT;
  6068. }
  6069. NEXT;
  6070. } else if (RAW == '*') {
  6071. if (ret != NULL) {
  6072. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  6073. cur = ret;
  6074. /*
  6075. * Some normalization:
  6076. * (a | b* | c?)* == (a | b | c)*
  6077. */
  6078. while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
  6079. if ((cur->c1 != NULL) &&
  6080. ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6081. (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
  6082. cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
  6083. if ((cur->c2 != NULL) &&
  6084. ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6085. (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
  6086. cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
  6087. cur = cur->c2;
  6088. }
  6089. }
  6090. NEXT;
  6091. } else if (RAW == '+') {
  6092. if (ret != NULL) {
  6093. int found = 0;
  6094. if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6095. (ret->ocur == XML_ELEMENT_CONTENT_MULT))
  6096. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  6097. else
  6098. ret->ocur = XML_ELEMENT_CONTENT_PLUS;
  6099. /*
  6100. * Some normalization:
  6101. * (a | b*)+ == (a | b)*
  6102. * (a | b?)+ == (a | b)*
  6103. */
  6104. while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
  6105. if ((cur->c1 != NULL) &&
  6106. ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6107. (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
  6108. cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
  6109. found = 1;
  6110. }
  6111. if ((cur->c2 != NULL) &&
  6112. ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
  6113. (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
  6114. cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
  6115. found = 1;
  6116. }
  6117. cur = cur->c2;
  6118. }
  6119. if (found)
  6120. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  6121. }
  6122. NEXT;
  6123. }
  6124. return(ret);
  6125. }
  6126. /**
  6127. * xmlParseElementChildrenContentDecl:
  6128. * @ctxt: an XML parser context
  6129. * @inputchk: the input used for the current entity, needed for boundary checks
  6130. *
  6131. * parse the declaration for a Mixed Element content
  6132. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  6133. *
  6134. * [47] children ::= (choice | seq) ('?' | '*' | '+')?
  6135. *
  6136. * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  6137. *
  6138. * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
  6139. *
  6140. * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  6141. *
  6142. * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
  6143. * TODO Parameter-entity replacement text must be properly nested
  6144. * with parenthesized groups. That is to say, if either of the
  6145. * opening or closing parentheses in a choice, seq, or Mixed
  6146. * construct is contained in the replacement text for a parameter
  6147. * entity, both must be contained in the same replacement text. For
  6148. * interoperability, if a parameter-entity reference appears in a
  6149. * choice, seq, or Mixed construct, its replacement text should not
  6150. * be empty, and neither the first nor last non-blank character of
  6151. * the replacement text should be a connector (| or ,).
  6152. *
  6153. * Returns the tree of xmlElementContentPtr describing the element
  6154. * hierarchy.
  6155. */
  6156. xmlElementContentPtr
  6157. xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
  6158. /* stub left for API/ABI compat */
  6159. return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
  6160. }
  6161. /**
  6162. * xmlParseElementContentDecl:
  6163. * @ctxt: an XML parser context
  6164. * @name: the name of the element being defined.
  6165. * @result: the Element Content pointer will be stored here if any
  6166. *
  6167. * parse the declaration for an Element content either Mixed or Children,
  6168. * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
  6169. *
  6170. * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  6171. *
  6172. * returns: the type of element content XML_ELEMENT_TYPE_xxx
  6173. */
  6174. int
  6175. xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
  6176. xmlElementContentPtr *result) {
  6177. xmlElementContentPtr tree = NULL;
  6178. int inputid = ctxt->input->id;
  6179. int res;
  6180. *result = NULL;
  6181. if (RAW != '(') {
  6182. xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
  6183. "xmlParseElementContentDecl : %s '(' expected\n", name);
  6184. return(-1);
  6185. }
  6186. NEXT;
  6187. GROW;
  6188. if (ctxt->instate == XML_PARSER_EOF)
  6189. return(-1);
  6190. SKIP_BLANKS;
  6191. if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
  6192. tree = xmlParseElementMixedContentDecl(ctxt, inputid);
  6193. res = XML_ELEMENT_TYPE_MIXED;
  6194. } else {
  6195. tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
  6196. res = XML_ELEMENT_TYPE_ELEMENT;
  6197. }
  6198. SKIP_BLANKS;
  6199. *result = tree;
  6200. return(res);
  6201. }
  6202. /**
  6203. * xmlParseElementDecl:
  6204. * @ctxt: an XML parser context
  6205. *
  6206. * parse an Element declaration.
  6207. *
  6208. * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  6209. *
  6210. * [ VC: Unique Element Type Declaration ]
  6211. * No element type may be declared more than once
  6212. *
  6213. * Returns the type of the element, or -1 in case of error
  6214. */
  6215. int
  6216. xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
  6217. const xmlChar *name;
  6218. int ret = -1;
  6219. xmlElementContentPtr content = NULL;
  6220. /* GROW; done in the caller */
  6221. if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
  6222. int inputid = ctxt->input->id;
  6223. SKIP(9);
  6224. if (SKIP_BLANKS == 0) {
  6225. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6226. "Space required after 'ELEMENT'\n");
  6227. return(-1);
  6228. }
  6229. name = xmlParseName(ctxt);
  6230. if (name == NULL) {
  6231. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  6232. "xmlParseElementDecl: no name for Element\n");
  6233. return(-1);
  6234. }
  6235. if (SKIP_BLANKS == 0) {
  6236. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6237. "Space required after the element name\n");
  6238. }
  6239. if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
  6240. SKIP(5);
  6241. /*
  6242. * Element must always be empty.
  6243. */
  6244. ret = XML_ELEMENT_TYPE_EMPTY;
  6245. } else if ((RAW == 'A') && (NXT(1) == 'N') &&
  6246. (NXT(2) == 'Y')) {
  6247. SKIP(3);
  6248. /*
  6249. * Element is a generic container.
  6250. */
  6251. ret = XML_ELEMENT_TYPE_ANY;
  6252. } else if (RAW == '(') {
  6253. ret = xmlParseElementContentDecl(ctxt, name, &content);
  6254. } else {
  6255. /*
  6256. * [ WFC: PEs in Internal Subset ] error handling.
  6257. */
  6258. if ((RAW == '%') && (ctxt->external == 0) &&
  6259. (ctxt->inputNr == 1)) {
  6260. xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
  6261. "PEReference: forbidden within markup decl in internal subset\n");
  6262. } else {
  6263. xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
  6264. "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
  6265. }
  6266. return(-1);
  6267. }
  6268. SKIP_BLANKS;
  6269. if (RAW != '>') {
  6270. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  6271. if (content != NULL) {
  6272. xmlFreeDocElementContent(ctxt->myDoc, content);
  6273. }
  6274. } else {
  6275. if (inputid != ctxt->input->id) {
  6276. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6277. "Element declaration doesn't start and stop in"
  6278. " the same entity\n");
  6279. }
  6280. NEXT;
  6281. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  6282. (ctxt->sax->elementDecl != NULL)) {
  6283. if (content != NULL)
  6284. content->parent = NULL;
  6285. ctxt->sax->elementDecl(ctxt->userData, name, ret,
  6286. content);
  6287. if ((content != NULL) && (content->parent == NULL)) {
  6288. /*
  6289. * this is a trick: if xmlAddElementDecl is called,
  6290. * instead of copying the full tree it is plugged directly
  6291. * if called from the parser. Avoid duplicating the
  6292. * interfaces or change the API/ABI
  6293. */
  6294. xmlFreeDocElementContent(ctxt->myDoc, content);
  6295. }
  6296. } else if (content != NULL) {
  6297. xmlFreeDocElementContent(ctxt->myDoc, content);
  6298. }
  6299. }
  6300. }
  6301. return(ret);
  6302. }
  6303. /**
  6304. * xmlParseConditionalSections
  6305. * @ctxt: an XML parser context
  6306. *
  6307. * [61] conditionalSect ::= includeSect | ignoreSect
  6308. * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
  6309. * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
  6310. * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
  6311. * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
  6312. */
  6313. static void
  6314. xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
  6315. int *inputIds = NULL;
  6316. size_t inputIdsSize = 0;
  6317. size_t depth = 0;
  6318. while (ctxt->instate != XML_PARSER_EOF) {
  6319. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6320. int id = ctxt->input->id;
  6321. SKIP(3);
  6322. SKIP_BLANKS;
  6323. if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
  6324. SKIP(7);
  6325. SKIP_BLANKS;
  6326. if (RAW != '[') {
  6327. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
  6328. xmlHaltParser(ctxt);
  6329. goto error;
  6330. }
  6331. if (ctxt->input->id != id) {
  6332. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6333. "All markup of the conditional section is"
  6334. " not in the same entity\n");
  6335. }
  6336. NEXT;
  6337. if (inputIdsSize <= depth) {
  6338. int *tmp;
  6339. inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
  6340. tmp = (int *) xmlRealloc(inputIds,
  6341. inputIdsSize * sizeof(int));
  6342. if (tmp == NULL) {
  6343. xmlErrMemory(ctxt, NULL);
  6344. goto error;
  6345. }
  6346. inputIds = tmp;
  6347. }
  6348. inputIds[depth] = id;
  6349. depth++;
  6350. } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
  6351. int state;
  6352. xmlParserInputState instate;
  6353. size_t ignoreDepth = 0;
  6354. SKIP(6);
  6355. SKIP_BLANKS;
  6356. if (RAW != '[') {
  6357. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
  6358. xmlHaltParser(ctxt);
  6359. goto error;
  6360. }
  6361. if (ctxt->input->id != id) {
  6362. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6363. "All markup of the conditional section is"
  6364. " not in the same entity\n");
  6365. }
  6366. NEXT;
  6367. /*
  6368. * Parse up to the end of the conditional section but disable
  6369. * SAX event generating DTD building in the meantime
  6370. */
  6371. state = ctxt->disableSAX;
  6372. instate = ctxt->instate;
  6373. if (ctxt->recovery == 0) ctxt->disableSAX = 1;
  6374. ctxt->instate = XML_PARSER_IGNORE;
  6375. while (RAW != 0) {
  6376. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6377. SKIP(3);
  6378. ignoreDepth++;
  6379. /* Check for integer overflow */
  6380. if (ignoreDepth == 0) {
  6381. xmlErrMemory(ctxt, NULL);
  6382. goto error;
  6383. }
  6384. } else if ((RAW == ']') && (NXT(1) == ']') &&
  6385. (NXT(2) == '>')) {
  6386. if (ignoreDepth == 0)
  6387. break;
  6388. SKIP(3);
  6389. ignoreDepth--;
  6390. } else {
  6391. NEXT;
  6392. }
  6393. }
  6394. ctxt->disableSAX = state;
  6395. ctxt->instate = instate;
  6396. if (RAW == 0) {
  6397. xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
  6398. goto error;
  6399. }
  6400. if (ctxt->input->id != id) {
  6401. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6402. "All markup of the conditional section is"
  6403. " not in the same entity\n");
  6404. }
  6405. SKIP(3);
  6406. } else {
  6407. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
  6408. xmlHaltParser(ctxt);
  6409. goto error;
  6410. }
  6411. } else if ((depth > 0) &&
  6412. (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
  6413. depth--;
  6414. if (ctxt->input->id != inputIds[depth]) {
  6415. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6416. "All markup of the conditional section is not"
  6417. " in the same entity\n");
  6418. }
  6419. SKIP(3);
  6420. } else {
  6421. const xmlChar *check = CUR_PTR;
  6422. unsigned int cons = ctxt->input->consumed;
  6423. xmlParseMarkupDecl(ctxt);
  6424. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  6425. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  6426. xmlHaltParser(ctxt);
  6427. goto error;
  6428. }
  6429. }
  6430. if (depth == 0)
  6431. break;
  6432. SKIP_BLANKS;
  6433. GROW;
  6434. }
  6435. error:
  6436. xmlFree(inputIds);
  6437. }
  6438. /**
  6439. * xmlParseMarkupDecl:
  6440. * @ctxt: an XML parser context
  6441. *
  6442. * parse Markup declarations
  6443. *
  6444. * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
  6445. * NotationDecl | PI | Comment
  6446. *
  6447. * [ VC: Proper Declaration/PE Nesting ]
  6448. * Parameter-entity replacement text must be properly nested with
  6449. * markup declarations. That is to say, if either the first character
  6450. * or the last character of a markup declaration (markupdecl above) is
  6451. * contained in the replacement text for a parameter-entity reference,
  6452. * both must be contained in the same replacement text.
  6453. *
  6454. * [ WFC: PEs in Internal Subset ]
  6455. * In the internal DTD subset, parameter-entity references can occur
  6456. * only where markup declarations can occur, not within markup declarations.
  6457. * (This does not apply to references that occur in external parameter
  6458. * entities or to the external subset.)
  6459. */
  6460. void
  6461. xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
  6462. GROW;
  6463. if (CUR == '<') {
  6464. if (NXT(1) == '!') {
  6465. switch (NXT(2)) {
  6466. case 'E':
  6467. if (NXT(3) == 'L')
  6468. xmlParseElementDecl(ctxt);
  6469. else if (NXT(3) == 'N')
  6470. xmlParseEntityDecl(ctxt);
  6471. break;
  6472. case 'A':
  6473. xmlParseAttributeListDecl(ctxt);
  6474. break;
  6475. case 'N':
  6476. xmlParseNotationDecl(ctxt);
  6477. break;
  6478. case '-':
  6479. xmlParseComment(ctxt);
  6480. break;
  6481. default:
  6482. /* there is an error but it will be detected later */
  6483. break;
  6484. }
  6485. } else if (NXT(1) == '?') {
  6486. xmlParsePI(ctxt);
  6487. }
  6488. }
  6489. /*
  6490. * detect requirement to exit there and act accordingly
  6491. * and avoid having instate overridden later on
  6492. */
  6493. if (ctxt->instate == XML_PARSER_EOF)
  6494. return;
  6495. ctxt->instate = XML_PARSER_DTD;
  6496. }
  6497. /**
  6498. * xmlParseTextDecl:
  6499. * @ctxt: an XML parser context
  6500. *
  6501. * parse an XML declaration header for external entities
  6502. *
  6503. * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  6504. */
  6505. void
  6506. xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
  6507. xmlChar *version;
  6508. const xmlChar *encoding;
  6509. int oldstate;
  6510. /*
  6511. * We know that '<?xml' is here.
  6512. */
  6513. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  6514. SKIP(5);
  6515. } else {
  6516. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
  6517. return;
  6518. }
  6519. /* Avoid expansion of parameter entities when skipping blanks. */
  6520. oldstate = ctxt->instate;
  6521. ctxt->instate = XML_PARSER_START;
  6522. if (SKIP_BLANKS == 0) {
  6523. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6524. "Space needed after '<?xml'\n");
  6525. }
  6526. /*
  6527. * We may have the VersionInfo here.
  6528. */
  6529. version = xmlParseVersionInfo(ctxt);
  6530. if (version == NULL)
  6531. version = xmlCharStrdup(XML_DEFAULT_VERSION);
  6532. else {
  6533. if (SKIP_BLANKS == 0) {
  6534. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6535. "Space needed here\n");
  6536. }
  6537. }
  6538. ctxt->input->version = version;
  6539. /*
  6540. * We must have the encoding declaration
  6541. */
  6542. encoding = xmlParseEncodingDecl(ctxt);
  6543. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  6544. /*
  6545. * The XML REC instructs us to stop parsing right here
  6546. */
  6547. ctxt->instate = oldstate;
  6548. return;
  6549. }
  6550. if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
  6551. xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
  6552. "Missing encoding in text declaration\n");
  6553. }
  6554. SKIP_BLANKS;
  6555. if ((RAW == '?') && (NXT(1) == '>')) {
  6556. SKIP(2);
  6557. } else if (RAW == '>') {
  6558. /* Deprecated old WD ... */
  6559. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  6560. NEXT;
  6561. } else {
  6562. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  6563. MOVETO_ENDTAG(CUR_PTR);
  6564. NEXT;
  6565. }
  6566. ctxt->instate = oldstate;
  6567. }
  6568. /**
  6569. * xmlParseExternalSubset:
  6570. * @ctxt: an XML parser context
  6571. * @ExternalID: the external identifier
  6572. * @SystemID: the system identifier (or URL)
  6573. *
  6574. * parse Markup declarations from an external subset
  6575. *
  6576. * [30] extSubset ::= textDecl? extSubsetDecl
  6577. *
  6578. * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
  6579. */
  6580. void
  6581. xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
  6582. const xmlChar *SystemID) {
  6583. xmlDetectSAX2(ctxt);
  6584. GROW;
  6585. if ((ctxt->encoding == NULL) &&
  6586. (ctxt->input->end - ctxt->input->cur >= 4)) {
  6587. xmlChar start[4];
  6588. xmlCharEncoding enc;
  6589. start[0] = RAW;
  6590. start[1] = NXT(1);
  6591. start[2] = NXT(2);
  6592. start[3] = NXT(3);
  6593. enc = xmlDetectCharEncoding(start, 4);
  6594. if (enc != XML_CHAR_ENCODING_NONE)
  6595. xmlSwitchEncoding(ctxt, enc);
  6596. }
  6597. if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
  6598. xmlParseTextDecl(ctxt);
  6599. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  6600. /*
  6601. * The XML REC instructs us to stop parsing right here
  6602. */
  6603. xmlHaltParser(ctxt);
  6604. return;
  6605. }
  6606. }
  6607. if (ctxt->myDoc == NULL) {
  6608. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  6609. if (ctxt->myDoc == NULL) {
  6610. xmlErrMemory(ctxt, "New Doc failed");
  6611. return;
  6612. }
  6613. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  6614. }
  6615. if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
  6616. xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
  6617. ctxt->instate = XML_PARSER_DTD;
  6618. ctxt->external = 1;
  6619. SKIP_BLANKS;
  6620. while (((RAW == '<') && (NXT(1) == '?')) ||
  6621. ((RAW == '<') && (NXT(1) == '!')) ||
  6622. (RAW == '%')) {
  6623. const xmlChar *check = CUR_PTR;
  6624. unsigned int cons = ctxt->input->consumed;
  6625. GROW;
  6626. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6627. xmlParseConditionalSections(ctxt);
  6628. } else
  6629. xmlParseMarkupDecl(ctxt);
  6630. SKIP_BLANKS;
  6631. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  6632. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  6633. break;
  6634. }
  6635. }
  6636. if (RAW != 0) {
  6637. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  6638. }
  6639. }
  6640. /**
  6641. * xmlParseReference:
  6642. * @ctxt: an XML parser context
  6643. *
  6644. * parse and handle entity references in content, depending on the SAX
  6645. * interface, this may end-up in a call to character() if this is a
  6646. * CharRef, a predefined entity, if there is no reference() callback.
  6647. * or if the parser was asked to switch to that mode.
  6648. *
  6649. * [67] Reference ::= EntityRef | CharRef
  6650. */
  6651. void
  6652. xmlParseReference(xmlParserCtxtPtr ctxt) {
  6653. xmlEntityPtr ent;
  6654. xmlChar *val;
  6655. int was_checked;
  6656. xmlNodePtr list = NULL;
  6657. xmlParserErrors ret = XML_ERR_OK;
  6658. if (RAW != '&')
  6659. return;
  6660. /*
  6661. * Simple case of a CharRef
  6662. */
  6663. if (NXT(1) == '#') {
  6664. int i = 0;
  6665. xmlChar out[16];
  6666. int hex = NXT(2);
  6667. int value = xmlParseCharRef(ctxt);
  6668. if (value == 0)
  6669. return;
  6670. if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
  6671. /*
  6672. * So we are using non-UTF-8 buffers
  6673. * Check that the char fit on 8bits, if not
  6674. * generate a CharRef.
  6675. */
  6676. if (value <= 0xFF) {
  6677. out[0] = value;
  6678. out[1] = 0;
  6679. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6680. (!ctxt->disableSAX))
  6681. ctxt->sax->characters(ctxt->userData, out, 1);
  6682. } else {
  6683. if ((hex == 'x') || (hex == 'X'))
  6684. snprintf((char *)out, sizeof(out), "#x%X", value);
  6685. else
  6686. snprintf((char *)out, sizeof(out), "#%d", value);
  6687. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6688. (!ctxt->disableSAX))
  6689. ctxt->sax->reference(ctxt->userData, out);
  6690. }
  6691. } else {
  6692. /*
  6693. * Just encode the value in UTF-8
  6694. */
  6695. COPY_BUF(0 ,out, i, value);
  6696. out[i] = 0;
  6697. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6698. (!ctxt->disableSAX))
  6699. ctxt->sax->characters(ctxt->userData, out, i);
  6700. }
  6701. return;
  6702. }
  6703. /*
  6704. * We are seeing an entity reference
  6705. */
  6706. ent = xmlParseEntityRef(ctxt);
  6707. if (ent == NULL) return;
  6708. if (!ctxt->wellFormed)
  6709. return;
  6710. was_checked = ent->checked;
  6711. /* special case of predefined entities */
  6712. if ((ent->name == NULL) ||
  6713. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  6714. val = ent->content;
  6715. if (val == NULL) return;
  6716. /*
  6717. * inline the entity.
  6718. */
  6719. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6720. (!ctxt->disableSAX))
  6721. ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
  6722. return;
  6723. }
  6724. /*
  6725. * The first reference to the entity trigger a parsing phase
  6726. * where the ent->children is filled with the result from
  6727. * the parsing.
  6728. * Note: external parsed entities will not be loaded, it is not
  6729. * required for a non-validating parser, unless the parsing option
  6730. * of validating, or substituting entities were given. Doing so is
  6731. * far more secure as the parser will only process data coming from
  6732. * the document entity by default.
  6733. */
  6734. if (((ent->checked == 0) ||
  6735. ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
  6736. ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
  6737. (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
  6738. unsigned long oldnbent = ctxt->nbentities, diff;
  6739. /*
  6740. * This is a bit hackish but this seems the best
  6741. * way to make sure both SAX and DOM entity support
  6742. * behaves okay.
  6743. */
  6744. void *user_data;
  6745. if (ctxt->userData == ctxt)
  6746. user_data = NULL;
  6747. else
  6748. user_data = ctxt->userData;
  6749. /*
  6750. * Check that this entity is well formed
  6751. * 4.3.2: An internal general parsed entity is well-formed
  6752. * if its replacement text matches the production labeled
  6753. * content.
  6754. */
  6755. if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
  6756. ctxt->depth++;
  6757. ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
  6758. user_data, &list);
  6759. ctxt->depth--;
  6760. } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
  6761. ctxt->depth++;
  6762. ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
  6763. user_data, ctxt->depth, ent->URI,
  6764. ent->ExternalID, &list);
  6765. ctxt->depth--;
  6766. } else {
  6767. ret = XML_ERR_ENTITY_PE_INTERNAL;
  6768. xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
  6769. "invalid entity type found\n", NULL);
  6770. }
  6771. /*
  6772. * Store the number of entities needing parsing for this entity
  6773. * content and do checkings
  6774. */
  6775. diff = ctxt->nbentities - oldnbent + 1;
  6776. if (diff > INT_MAX / 2)
  6777. diff = INT_MAX / 2;
  6778. ent->checked = diff * 2;
  6779. if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
  6780. ent->checked |= 1;
  6781. if (ret == XML_ERR_ENTITY_LOOP) {
  6782. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  6783. xmlHaltParser(ctxt);
  6784. xmlFreeNodeList(list);
  6785. return;
  6786. }
  6787. if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
  6788. xmlFreeNodeList(list);
  6789. return;
  6790. }
  6791. if ((ret == XML_ERR_OK) && (list != NULL)) {
  6792. if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
  6793. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
  6794. (ent->children == NULL)) {
  6795. ent->children = list;
  6796. /*
  6797. * Prune it directly in the generated document
  6798. * except for single text nodes.
  6799. */
  6800. if ((ctxt->replaceEntities == 0) ||
  6801. (ctxt->parseMode == XML_PARSE_READER) ||
  6802. ((list->type == XML_TEXT_NODE) &&
  6803. (list->next == NULL))) {
  6804. ent->owner = 1;
  6805. while (list != NULL) {
  6806. list->parent = (xmlNodePtr) ent;
  6807. xmlSetTreeDoc(list, ent->doc);
  6808. if (list->next == NULL)
  6809. ent->last = list;
  6810. list = list->next;
  6811. }
  6812. list = NULL;
  6813. } else {
  6814. ent->owner = 0;
  6815. while (list != NULL) {
  6816. list->parent = (xmlNodePtr) ctxt->node;
  6817. list->doc = ctxt->myDoc;
  6818. if (list->next == NULL)
  6819. ent->last = list;
  6820. list = list->next;
  6821. }
  6822. list = ent->children;
  6823. #ifdef LIBXML_LEGACY_ENABLED
  6824. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  6825. xmlAddEntityReference(ent, list, NULL);
  6826. #endif /* LIBXML_LEGACY_ENABLED */
  6827. }
  6828. } else {
  6829. xmlFreeNodeList(list);
  6830. list = NULL;
  6831. }
  6832. } else if ((ret != XML_ERR_OK) &&
  6833. (ret != XML_WAR_UNDECLARED_ENTITY)) {
  6834. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  6835. "Entity '%s' failed to parse\n", ent->name);
  6836. if (ent->content != NULL)
  6837. ent->content[0] = 0;
  6838. xmlParserEntityCheck(ctxt, 0, ent, 0);
  6839. } else if (list != NULL) {
  6840. xmlFreeNodeList(list);
  6841. list = NULL;
  6842. }
  6843. if (ent->checked == 0)
  6844. ent->checked = 2;
  6845. /* Prevent entity from being parsed and expanded twice (Bug 760367). */
  6846. was_checked = 0;
  6847. } else if (ent->checked != 1) {
  6848. ctxt->nbentities += ent->checked / 2;
  6849. }
  6850. /*
  6851. * Now that the entity content has been gathered
  6852. * provide it to the application, this can take different forms based
  6853. * on the parsing modes.
  6854. */
  6855. if (ent->children == NULL) {
  6856. /*
  6857. * Probably running in SAX mode and the callbacks don't
  6858. * build the entity content. So unless we already went
  6859. * though parsing for first checking go though the entity
  6860. * content to generate callbacks associated to the entity
  6861. */
  6862. if (was_checked != 0) {
  6863. void *user_data;
  6864. /*
  6865. * This is a bit hackish but this seems the best
  6866. * way to make sure both SAX and DOM entity support
  6867. * behaves okay.
  6868. */
  6869. if (ctxt->userData == ctxt)
  6870. user_data = NULL;
  6871. else
  6872. user_data = ctxt->userData;
  6873. if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
  6874. ctxt->depth++;
  6875. ret = xmlParseBalancedChunkMemoryInternal(ctxt,
  6876. ent->content, user_data, NULL);
  6877. ctxt->depth--;
  6878. } else if (ent->etype ==
  6879. XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
  6880. ctxt->depth++;
  6881. ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
  6882. ctxt->sax, user_data, ctxt->depth,
  6883. ent->URI, ent->ExternalID, NULL);
  6884. ctxt->depth--;
  6885. } else {
  6886. ret = XML_ERR_ENTITY_PE_INTERNAL;
  6887. xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
  6888. "invalid entity type found\n", NULL);
  6889. }
  6890. if (ret == XML_ERR_ENTITY_LOOP) {
  6891. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  6892. return;
  6893. }
  6894. }
  6895. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6896. (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
  6897. /*
  6898. * Entity reference callback comes second, it's somewhat
  6899. * superfluous but a compatibility to historical behaviour
  6900. */
  6901. ctxt->sax->reference(ctxt->userData, ent->name);
  6902. }
  6903. return;
  6904. }
  6905. /*
  6906. * If we didn't get any children for the entity being built
  6907. */
  6908. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6909. (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
  6910. /*
  6911. * Create a node.
  6912. */
  6913. ctxt->sax->reference(ctxt->userData, ent->name);
  6914. return;
  6915. }
  6916. if ((ctxt->replaceEntities) || (ent->children == NULL)) {
  6917. /*
  6918. * There is a problem on the handling of _private for entities
  6919. * (bug 155816): Should we copy the content of the field from
  6920. * the entity (possibly overwriting some value set by the user
  6921. * when a copy is created), should we leave it alone, or should
  6922. * we try to take care of different situations? The problem
  6923. * is exacerbated by the usage of this field by the xmlReader.
  6924. * To fix this bug, we look at _private on the created node
  6925. * and, if it's NULL, we copy in whatever was in the entity.
  6926. * If it's not NULL we leave it alone. This is somewhat of a
  6927. * hack - maybe we should have further tests to determine
  6928. * what to do.
  6929. */
  6930. if ((ctxt->node != NULL) && (ent->children != NULL)) {
  6931. /*
  6932. * Seems we are generating the DOM content, do
  6933. * a simple tree copy for all references except the first
  6934. * In the first occurrence list contains the replacement.
  6935. */
  6936. if (((list == NULL) && (ent->owner == 0)) ||
  6937. (ctxt->parseMode == XML_PARSE_READER)) {
  6938. xmlNodePtr nw = NULL, cur, firstChild = NULL;
  6939. /*
  6940. * We are copying here, make sure there is no abuse
  6941. */
  6942. ctxt->sizeentcopy += ent->length + 5;
  6943. if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
  6944. return;
  6945. /*
  6946. * when operating on a reader, the entities definitions
  6947. * are always owning the entities subtree.
  6948. if (ctxt->parseMode == XML_PARSE_READER)
  6949. ent->owner = 1;
  6950. */
  6951. cur = ent->children;
  6952. while (cur != NULL) {
  6953. nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
  6954. if (nw != NULL) {
  6955. if (nw->_private == NULL)
  6956. nw->_private = cur->_private;
  6957. if (firstChild == NULL){
  6958. firstChild = nw;
  6959. }
  6960. nw = xmlAddChild(ctxt->node, nw);
  6961. }
  6962. if (cur == ent->last) {
  6963. /*
  6964. * needed to detect some strange empty
  6965. * node cases in the reader tests
  6966. */
  6967. if ((ctxt->parseMode == XML_PARSE_READER) &&
  6968. (nw != NULL) &&
  6969. (nw->type == XML_ELEMENT_NODE) &&
  6970. (nw->children == NULL))
  6971. nw->extra = 1;
  6972. break;
  6973. }
  6974. cur = cur->next;
  6975. }
  6976. #ifdef LIBXML_LEGACY_ENABLED
  6977. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  6978. xmlAddEntityReference(ent, firstChild, nw);
  6979. #endif /* LIBXML_LEGACY_ENABLED */
  6980. } else if ((list == NULL) || (ctxt->inputNr > 0)) {
  6981. xmlNodePtr nw = NULL, cur, next, last,
  6982. firstChild = NULL;
  6983. /*
  6984. * We are copying here, make sure there is no abuse
  6985. */
  6986. ctxt->sizeentcopy += ent->length + 5;
  6987. if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
  6988. return;
  6989. /*
  6990. * Copy the entity child list and make it the new
  6991. * entity child list. The goal is to make sure any
  6992. * ID or REF referenced will be the one from the
  6993. * document content and not the entity copy.
  6994. */
  6995. cur = ent->children;
  6996. ent->children = NULL;
  6997. last = ent->last;
  6998. ent->last = NULL;
  6999. while (cur != NULL) {
  7000. next = cur->next;
  7001. cur->next = NULL;
  7002. cur->parent = NULL;
  7003. nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
  7004. if (nw != NULL) {
  7005. if (nw->_private == NULL)
  7006. nw->_private = cur->_private;
  7007. if (firstChild == NULL){
  7008. firstChild = cur;
  7009. }
  7010. xmlAddChild((xmlNodePtr) ent, nw);
  7011. xmlAddChild(ctxt->node, cur);
  7012. }
  7013. if (cur == last)
  7014. break;
  7015. cur = next;
  7016. }
  7017. if (ent->owner == 0)
  7018. ent->owner = 1;
  7019. #ifdef LIBXML_LEGACY_ENABLED
  7020. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  7021. xmlAddEntityReference(ent, firstChild, nw);
  7022. #endif /* LIBXML_LEGACY_ENABLED */
  7023. } else {
  7024. const xmlChar *nbktext;
  7025. /*
  7026. * the name change is to avoid coalescing of the
  7027. * node with a possible previous text one which
  7028. * would make ent->children a dangling pointer
  7029. */
  7030. nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
  7031. -1);
  7032. if (ent->children->type == XML_TEXT_NODE)
  7033. ent->children->name = nbktext;
  7034. if ((ent->last != ent->children) &&
  7035. (ent->last->type == XML_TEXT_NODE))
  7036. ent->last->name = nbktext;
  7037. xmlAddChildList(ctxt->node, ent->children);
  7038. }
  7039. /*
  7040. * This is to avoid a nasty side effect, see
  7041. * characters() in SAX.c
  7042. */
  7043. ctxt->nodemem = 0;
  7044. ctxt->nodelen = 0;
  7045. return;
  7046. }
  7047. }
  7048. }
  7049. /**
  7050. * xmlParseEntityRef:
  7051. * @ctxt: an XML parser context
  7052. *
  7053. * parse ENTITY references declarations
  7054. *
  7055. * [68] EntityRef ::= '&' Name ';'
  7056. *
  7057. * [ WFC: Entity Declared ]
  7058. * In a document without any DTD, a document with only an internal DTD
  7059. * subset which contains no parameter entity references, or a document
  7060. * with "standalone='yes'", the Name given in the entity reference
  7061. * must match that in an entity declaration, except that well-formed
  7062. * documents need not declare any of the following entities: amp, lt,
  7063. * gt, apos, quot. The declaration of a parameter entity must precede
  7064. * any reference to it. Similarly, the declaration of a general entity
  7065. * must precede any reference to it which appears in a default value in an
  7066. * attribute-list declaration. Note that if entities are declared in the
  7067. * external subset or in external parameter entities, a non-validating
  7068. * processor is not obligated to read and process their declarations;
  7069. * for such documents, the rule that an entity must be declared is a
  7070. * well-formedness constraint only if standalone='yes'.
  7071. *
  7072. * [ WFC: Parsed Entity ]
  7073. * An entity reference must not contain the name of an unparsed entity
  7074. *
  7075. * Returns the xmlEntityPtr if found, or NULL otherwise.
  7076. */
  7077. xmlEntityPtr
  7078. xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
  7079. const xmlChar *name;
  7080. xmlEntityPtr ent = NULL;
  7081. GROW;
  7082. if (ctxt->instate == XML_PARSER_EOF)
  7083. return(NULL);
  7084. if (RAW != '&')
  7085. return(NULL);
  7086. NEXT;
  7087. name = xmlParseName(ctxt);
  7088. if (name == NULL) {
  7089. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7090. "xmlParseEntityRef: no name\n");
  7091. return(NULL);
  7092. }
  7093. if (RAW != ';') {
  7094. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  7095. return(NULL);
  7096. }
  7097. NEXT;
  7098. /*
  7099. * Predefined entities override any extra definition
  7100. */
  7101. if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
  7102. ent = xmlGetPredefinedEntity(name);
  7103. if (ent != NULL)
  7104. return(ent);
  7105. }
  7106. /*
  7107. * Increase the number of entity references parsed
  7108. */
  7109. ctxt->nbentities++;
  7110. /*
  7111. * Ask first SAX for entity resolution, otherwise try the
  7112. * entities which may have stored in the parser context.
  7113. */
  7114. if (ctxt->sax != NULL) {
  7115. if (ctxt->sax->getEntity != NULL)
  7116. ent = ctxt->sax->getEntity(ctxt->userData, name);
  7117. if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
  7118. (ctxt->options & XML_PARSE_OLDSAX))
  7119. ent = xmlGetPredefinedEntity(name);
  7120. if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
  7121. (ctxt->userData==ctxt)) {
  7122. ent = xmlSAX2GetEntity(ctxt, name);
  7123. }
  7124. }
  7125. if (ctxt->instate == XML_PARSER_EOF)
  7126. return(NULL);
  7127. /*
  7128. * [ WFC: Entity Declared ]
  7129. * In a document without any DTD, a document with only an
  7130. * internal DTD subset which contains no parameter entity
  7131. * references, or a document with "standalone='yes'", the
  7132. * Name given in the entity reference must match that in an
  7133. * entity declaration, except that well-formed documents
  7134. * need not declare any of the following entities: amp, lt,
  7135. * gt, apos, quot.
  7136. * The declaration of a parameter entity must precede any
  7137. * reference to it.
  7138. * Similarly, the declaration of a general entity must
  7139. * precede any reference to it which appears in a default
  7140. * value in an attribute-list declaration. Note that if
  7141. * entities are declared in the external subset or in
  7142. * external parameter entities, a non-validating processor
  7143. * is not obligated to read and process their declarations;
  7144. * for such documents, the rule that an entity must be
  7145. * declared is a well-formedness constraint only if
  7146. * standalone='yes'.
  7147. */
  7148. if (ent == NULL) {
  7149. if ((ctxt->standalone == 1) ||
  7150. ((ctxt->hasExternalSubset == 0) &&
  7151. (ctxt->hasPErefs == 0))) {
  7152. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7153. "Entity '%s' not defined\n", name);
  7154. } else {
  7155. xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7156. "Entity '%s' not defined\n", name);
  7157. if ((ctxt->inSubset == 0) &&
  7158. (ctxt->sax != NULL) &&
  7159. (ctxt->sax->reference != NULL)) {
  7160. ctxt->sax->reference(ctxt->userData, name);
  7161. }
  7162. }
  7163. xmlParserEntityCheck(ctxt, 0, ent, 0);
  7164. ctxt->valid = 0;
  7165. }
  7166. /*
  7167. * [ WFC: Parsed Entity ]
  7168. * An entity reference must not contain the name of an
  7169. * unparsed entity
  7170. */
  7171. else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
  7172. xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
  7173. "Entity reference to unparsed entity %s\n", name);
  7174. }
  7175. /*
  7176. * [ WFC: No External Entity References ]
  7177. * Attribute values cannot contain direct or indirect
  7178. * entity references to external entities.
  7179. */
  7180. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  7181. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
  7182. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
  7183. "Attribute references external entity '%s'\n", name);
  7184. }
  7185. /*
  7186. * [ WFC: No < in Attribute Values ]
  7187. * The replacement text of any entity referred to directly or
  7188. * indirectly in an attribute value (other than "&lt;") must
  7189. * not contain a <.
  7190. */
  7191. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  7192. (ent != NULL) &&
  7193. (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
  7194. if (((ent->checked & 1) || (ent->checked == 0)) &&
  7195. (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
  7196. xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
  7197. "'<' in entity '%s' is not allowed in attributes values\n", name);
  7198. }
  7199. }
  7200. /*
  7201. * Internal check, no parameter entities here ...
  7202. */
  7203. else {
  7204. switch (ent->etype) {
  7205. case XML_INTERNAL_PARAMETER_ENTITY:
  7206. case XML_EXTERNAL_PARAMETER_ENTITY:
  7207. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
  7208. "Attempt to reference the parameter entity '%s'\n",
  7209. name);
  7210. break;
  7211. default:
  7212. break;
  7213. }
  7214. }
  7215. /*
  7216. * [ WFC: No Recursion ]
  7217. * A parsed entity must not contain a recursive reference
  7218. * to itself, either directly or indirectly.
  7219. * Done somewhere else
  7220. */
  7221. return(ent);
  7222. }
  7223. /**
  7224. * xmlParseStringEntityRef:
  7225. * @ctxt: an XML parser context
  7226. * @str: a pointer to an index in the string
  7227. *
  7228. * parse ENTITY references declarations, but this version parses it from
  7229. * a string value.
  7230. *
  7231. * [68] EntityRef ::= '&' Name ';'
  7232. *
  7233. * [ WFC: Entity Declared ]
  7234. * In a document without any DTD, a document with only an internal DTD
  7235. * subset which contains no parameter entity references, or a document
  7236. * with "standalone='yes'", the Name given in the entity reference
  7237. * must match that in an entity declaration, except that well-formed
  7238. * documents need not declare any of the following entities: amp, lt,
  7239. * gt, apos, quot. The declaration of a parameter entity must precede
  7240. * any reference to it. Similarly, the declaration of a general entity
  7241. * must precede any reference to it which appears in a default value in an
  7242. * attribute-list declaration. Note that if entities are declared in the
  7243. * external subset or in external parameter entities, a non-validating
  7244. * processor is not obligated to read and process their declarations;
  7245. * for such documents, the rule that an entity must be declared is a
  7246. * well-formedness constraint only if standalone='yes'.
  7247. *
  7248. * [ WFC: Parsed Entity ]
  7249. * An entity reference must not contain the name of an unparsed entity
  7250. *
  7251. * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
  7252. * is updated to the current location in the string.
  7253. */
  7254. static xmlEntityPtr
  7255. xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
  7256. xmlChar *name;
  7257. const xmlChar *ptr;
  7258. xmlChar cur;
  7259. xmlEntityPtr ent = NULL;
  7260. if ((str == NULL) || (*str == NULL))
  7261. return(NULL);
  7262. ptr = *str;
  7263. cur = *ptr;
  7264. if (cur != '&')
  7265. return(NULL);
  7266. ptr++;
  7267. name = xmlParseStringName(ctxt, &ptr);
  7268. if (name == NULL) {
  7269. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7270. "xmlParseStringEntityRef: no name\n");
  7271. *str = ptr;
  7272. return(NULL);
  7273. }
  7274. if (*ptr != ';') {
  7275. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  7276. xmlFree(name);
  7277. *str = ptr;
  7278. return(NULL);
  7279. }
  7280. ptr++;
  7281. /*
  7282. * Predefined entities override any extra definition
  7283. */
  7284. if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
  7285. ent = xmlGetPredefinedEntity(name);
  7286. if (ent != NULL) {
  7287. xmlFree(name);
  7288. *str = ptr;
  7289. return(ent);
  7290. }
  7291. }
  7292. /*
  7293. * Increase the number of entity references parsed
  7294. */
  7295. ctxt->nbentities++;
  7296. /*
  7297. * Ask first SAX for entity resolution, otherwise try the
  7298. * entities which may have stored in the parser context.
  7299. */
  7300. if (ctxt->sax != NULL) {
  7301. if (ctxt->sax->getEntity != NULL)
  7302. ent = ctxt->sax->getEntity(ctxt->userData, name);
  7303. if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
  7304. ent = xmlGetPredefinedEntity(name);
  7305. if ((ent == NULL) && (ctxt->userData==ctxt)) {
  7306. ent = xmlSAX2GetEntity(ctxt, name);
  7307. }
  7308. }
  7309. if (ctxt->instate == XML_PARSER_EOF) {
  7310. xmlFree(name);
  7311. return(NULL);
  7312. }
  7313. /*
  7314. * [ WFC: Entity Declared ]
  7315. * In a document without any DTD, a document with only an
  7316. * internal DTD subset which contains no parameter entity
  7317. * references, or a document with "standalone='yes'", the
  7318. * Name given in the entity reference must match that in an
  7319. * entity declaration, except that well-formed documents
  7320. * need not declare any of the following entities: amp, lt,
  7321. * gt, apos, quot.
  7322. * The declaration of a parameter entity must precede any
  7323. * reference to it.
  7324. * Similarly, the declaration of a general entity must
  7325. * precede any reference to it which appears in a default
  7326. * value in an attribute-list declaration. Note that if
  7327. * entities are declared in the external subset or in
  7328. * external parameter entities, a non-validating processor
  7329. * is not obligated to read and process their declarations;
  7330. * for such documents, the rule that an entity must be
  7331. * declared is a well-formedness constraint only if
  7332. * standalone='yes'.
  7333. */
  7334. if (ent == NULL) {
  7335. if ((ctxt->standalone == 1) ||
  7336. ((ctxt->hasExternalSubset == 0) &&
  7337. (ctxt->hasPErefs == 0))) {
  7338. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7339. "Entity '%s' not defined\n", name);
  7340. } else {
  7341. xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7342. "Entity '%s' not defined\n",
  7343. name);
  7344. }
  7345. xmlParserEntityCheck(ctxt, 0, ent, 0);
  7346. /* TODO ? check regressions ctxt->valid = 0; */
  7347. }
  7348. /*
  7349. * [ WFC: Parsed Entity ]
  7350. * An entity reference must not contain the name of an
  7351. * unparsed entity
  7352. */
  7353. else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
  7354. xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
  7355. "Entity reference to unparsed entity %s\n", name);
  7356. }
  7357. /*
  7358. * [ WFC: No External Entity References ]
  7359. * Attribute values cannot contain direct or indirect
  7360. * entity references to external entities.
  7361. */
  7362. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  7363. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
  7364. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
  7365. "Attribute references external entity '%s'\n", name);
  7366. }
  7367. /*
  7368. * [ WFC: No < in Attribute Values ]
  7369. * The replacement text of any entity referred to directly or
  7370. * indirectly in an attribute value (other than "&lt;") must
  7371. * not contain a <.
  7372. */
  7373. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  7374. (ent != NULL) && (ent->content != NULL) &&
  7375. (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  7376. (xmlStrchr(ent->content, '<'))) {
  7377. xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
  7378. "'<' in entity '%s' is not allowed in attributes values\n",
  7379. name);
  7380. }
  7381. /*
  7382. * Internal check, no parameter entities here ...
  7383. */
  7384. else {
  7385. switch (ent->etype) {
  7386. case XML_INTERNAL_PARAMETER_ENTITY:
  7387. case XML_EXTERNAL_PARAMETER_ENTITY:
  7388. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
  7389. "Attempt to reference the parameter entity '%s'\n",
  7390. name);
  7391. break;
  7392. default:
  7393. break;
  7394. }
  7395. }
  7396. /*
  7397. * [ WFC: No Recursion ]
  7398. * A parsed entity must not contain a recursive reference
  7399. * to itself, either directly or indirectly.
  7400. * Done somewhere else
  7401. */
  7402. xmlFree(name);
  7403. *str = ptr;
  7404. return(ent);
  7405. }
  7406. /**
  7407. * xmlParsePEReference:
  7408. * @ctxt: an XML parser context
  7409. *
  7410. * parse PEReference declarations
  7411. * The entity content is handled directly by pushing it's content as
  7412. * a new input stream.
  7413. *
  7414. * [69] PEReference ::= '%' Name ';'
  7415. *
  7416. * [ WFC: No Recursion ]
  7417. * A parsed entity must not contain a recursive
  7418. * reference to itself, either directly or indirectly.
  7419. *
  7420. * [ WFC: Entity Declared ]
  7421. * In a document without any DTD, a document with only an internal DTD
  7422. * subset which contains no parameter entity references, or a document
  7423. * with "standalone='yes'", ... ... The declaration of a parameter
  7424. * entity must precede any reference to it...
  7425. *
  7426. * [ VC: Entity Declared ]
  7427. * In a document with an external subset or external parameter entities
  7428. * with "standalone='no'", ... ... The declaration of a parameter entity
  7429. * must precede any reference to it...
  7430. *
  7431. * [ WFC: In DTD ]
  7432. * Parameter-entity references may only appear in the DTD.
  7433. * NOTE: misleading but this is handled.
  7434. */
  7435. void
  7436. xmlParsePEReference(xmlParserCtxtPtr ctxt)
  7437. {
  7438. const xmlChar *name;
  7439. xmlEntityPtr entity = NULL;
  7440. xmlParserInputPtr input;
  7441. if (RAW != '%')
  7442. return;
  7443. NEXT;
  7444. name = xmlParseName(ctxt);
  7445. if (name == NULL) {
  7446. xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
  7447. return;
  7448. }
  7449. if (xmlParserDebugEntities)
  7450. xmlGenericError(xmlGenericErrorContext,
  7451. "PEReference: %s\n", name);
  7452. if (RAW != ';') {
  7453. xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
  7454. return;
  7455. }
  7456. NEXT;
  7457. /*
  7458. * Increase the number of entity references parsed
  7459. */
  7460. ctxt->nbentities++;
  7461. /*
  7462. * Request the entity from SAX
  7463. */
  7464. if ((ctxt->sax != NULL) &&
  7465. (ctxt->sax->getParameterEntity != NULL))
  7466. entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
  7467. if (ctxt->instate == XML_PARSER_EOF)
  7468. return;
  7469. if (entity == NULL) {
  7470. /*
  7471. * [ WFC: Entity Declared ]
  7472. * In a document without any DTD, a document with only an
  7473. * internal DTD subset which contains no parameter entity
  7474. * references, or a document with "standalone='yes'", ...
  7475. * ... The declaration of a parameter entity must precede
  7476. * any reference to it...
  7477. */
  7478. if ((ctxt->standalone == 1) ||
  7479. ((ctxt->hasExternalSubset == 0) &&
  7480. (ctxt->hasPErefs == 0))) {
  7481. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7482. "PEReference: %%%s; not found\n",
  7483. name);
  7484. } else {
  7485. /*
  7486. * [ VC: Entity Declared ]
  7487. * In a document with an external subset or external
  7488. * parameter entities with "standalone='no'", ...
  7489. * ... The declaration of a parameter entity must
  7490. * precede any reference to it...
  7491. */
  7492. if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
  7493. xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7494. "PEReference: %%%s; not found\n",
  7495. name, NULL);
  7496. } else
  7497. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7498. "PEReference: %%%s; not found\n",
  7499. name, NULL);
  7500. ctxt->valid = 0;
  7501. }
  7502. xmlParserEntityCheck(ctxt, 0, NULL, 0);
  7503. } else {
  7504. /*
  7505. * Internal checking in case the entity quest barfed
  7506. */
  7507. if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
  7508. (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
  7509. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7510. "Internal: %%%s; is not a parameter entity\n",
  7511. name, NULL);
  7512. } else {
  7513. xmlChar start[4];
  7514. xmlCharEncoding enc;
  7515. if (xmlParserEntityCheck(ctxt, 0, entity, 0))
  7516. return;
  7517. if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
  7518. ((ctxt->options & XML_PARSE_NOENT) == 0) &&
  7519. ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
  7520. ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
  7521. ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
  7522. (ctxt->replaceEntities == 0) &&
  7523. (ctxt->validate == 0))
  7524. return;
  7525. input = xmlNewEntityInputStream(ctxt, entity);
  7526. if (xmlPushInput(ctxt, input) < 0) {
  7527. xmlFreeInputStream(input);
  7528. return;
  7529. }
  7530. if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
  7531. /*
  7532. * Get the 4 first bytes and decode the charset
  7533. * if enc != XML_CHAR_ENCODING_NONE
  7534. * plug some encoding conversion routines.
  7535. * Note that, since we may have some non-UTF8
  7536. * encoding (like UTF16, bug 135229), the 'length'
  7537. * is not known, but we can calculate based upon
  7538. * the amount of data in the buffer.
  7539. */
  7540. GROW
  7541. if (ctxt->instate == XML_PARSER_EOF)
  7542. return;
  7543. if ((ctxt->input->end - ctxt->input->cur)>=4) {
  7544. start[0] = RAW;
  7545. start[1] = NXT(1);
  7546. start[2] = NXT(2);
  7547. start[3] = NXT(3);
  7548. enc = xmlDetectCharEncoding(start, 4);
  7549. if (enc != XML_CHAR_ENCODING_NONE) {
  7550. xmlSwitchEncoding(ctxt, enc);
  7551. }
  7552. }
  7553. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
  7554. (IS_BLANK_CH(NXT(5)))) {
  7555. xmlParseTextDecl(ctxt);
  7556. }
  7557. }
  7558. }
  7559. }
  7560. ctxt->hasPErefs = 1;
  7561. }
  7562. /**
  7563. * xmlLoadEntityContent:
  7564. * @ctxt: an XML parser context
  7565. * @entity: an unloaded system entity
  7566. *
  7567. * Load the original content of the given system entity from the
  7568. * ExternalID/SystemID given. This is to be used for Included in Literal
  7569. * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
  7570. *
  7571. * Returns 0 in case of success and -1 in case of failure
  7572. */
  7573. static int
  7574. xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
  7575. xmlParserInputPtr input;
  7576. xmlBufferPtr buf;
  7577. int l, c;
  7578. int count = 0;
  7579. if ((ctxt == NULL) || (entity == NULL) ||
  7580. ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
  7581. (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
  7582. (entity->content != NULL)) {
  7583. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7584. "xmlLoadEntityContent parameter error");
  7585. return(-1);
  7586. }
  7587. if (xmlParserDebugEntities)
  7588. xmlGenericError(xmlGenericErrorContext,
  7589. "Reading %s entity content input\n", entity->name);
  7590. buf = xmlBufferCreate();
  7591. if (buf == NULL) {
  7592. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7593. "xmlLoadEntityContent parameter error");
  7594. return(-1);
  7595. }
  7596. input = xmlNewEntityInputStream(ctxt, entity);
  7597. if (input == NULL) {
  7598. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7599. "xmlLoadEntityContent input error");
  7600. xmlBufferFree(buf);
  7601. return(-1);
  7602. }
  7603. /*
  7604. * Push the entity as the current input, read char by char
  7605. * saving to the buffer until the end of the entity or an error
  7606. */
  7607. if (xmlPushInput(ctxt, input) < 0) {
  7608. xmlBufferFree(buf);
  7609. return(-1);
  7610. }
  7611. GROW;
  7612. c = CUR_CHAR(l);
  7613. while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
  7614. (IS_CHAR(c))) {
  7615. xmlBufferAdd(buf, ctxt->input->cur, l);
  7616. if (count++ > XML_PARSER_CHUNK_SIZE) {
  7617. count = 0;
  7618. GROW;
  7619. if (ctxt->instate == XML_PARSER_EOF) {
  7620. xmlBufferFree(buf);
  7621. return(-1);
  7622. }
  7623. }
  7624. NEXTL(l);
  7625. c = CUR_CHAR(l);
  7626. if (c == 0) {
  7627. count = 0;
  7628. GROW;
  7629. if (ctxt->instate == XML_PARSER_EOF) {
  7630. xmlBufferFree(buf);
  7631. return(-1);
  7632. }
  7633. c = CUR_CHAR(l);
  7634. }
  7635. }
  7636. if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
  7637. xmlPopInput(ctxt);
  7638. } else if (!IS_CHAR(c)) {
  7639. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  7640. "xmlLoadEntityContent: invalid char value %d\n",
  7641. c);
  7642. xmlBufferFree(buf);
  7643. return(-1);
  7644. }
  7645. entity->content = buf->content;
  7646. buf->content = NULL;
  7647. xmlBufferFree(buf);
  7648. return(0);
  7649. }
  7650. /**
  7651. * xmlParseStringPEReference:
  7652. * @ctxt: an XML parser context
  7653. * @str: a pointer to an index in the string
  7654. *
  7655. * parse PEReference declarations
  7656. *
  7657. * [69] PEReference ::= '%' Name ';'
  7658. *
  7659. * [ WFC: No Recursion ]
  7660. * A parsed entity must not contain a recursive
  7661. * reference to itself, either directly or indirectly.
  7662. *
  7663. * [ WFC: Entity Declared ]
  7664. * In a document without any DTD, a document with only an internal DTD
  7665. * subset which contains no parameter entity references, or a document
  7666. * with "standalone='yes'", ... ... The declaration of a parameter
  7667. * entity must precede any reference to it...
  7668. *
  7669. * [ VC: Entity Declared ]
  7670. * In a document with an external subset or external parameter entities
  7671. * with "standalone='no'", ... ... The declaration of a parameter entity
  7672. * must precede any reference to it...
  7673. *
  7674. * [ WFC: In DTD ]
  7675. * Parameter-entity references may only appear in the DTD.
  7676. * NOTE: misleading but this is handled.
  7677. *
  7678. * Returns the string of the entity content.
  7679. * str is updated to the current value of the index
  7680. */
  7681. static xmlEntityPtr
  7682. xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
  7683. const xmlChar *ptr;
  7684. xmlChar cur;
  7685. xmlChar *name;
  7686. xmlEntityPtr entity = NULL;
  7687. if ((str == NULL) || (*str == NULL)) return(NULL);
  7688. ptr = *str;
  7689. cur = *ptr;
  7690. if (cur != '%')
  7691. return(NULL);
  7692. ptr++;
  7693. name = xmlParseStringName(ctxt, &ptr);
  7694. if (name == NULL) {
  7695. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7696. "xmlParseStringPEReference: no name\n");
  7697. *str = ptr;
  7698. return(NULL);
  7699. }
  7700. cur = *ptr;
  7701. if (cur != ';') {
  7702. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  7703. xmlFree(name);
  7704. *str = ptr;
  7705. return(NULL);
  7706. }
  7707. ptr++;
  7708. /*
  7709. * Increase the number of entity references parsed
  7710. */
  7711. ctxt->nbentities++;
  7712. /*
  7713. * Request the entity from SAX
  7714. */
  7715. if ((ctxt->sax != NULL) &&
  7716. (ctxt->sax->getParameterEntity != NULL))
  7717. entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
  7718. if (ctxt->instate == XML_PARSER_EOF) {
  7719. xmlFree(name);
  7720. *str = ptr;
  7721. return(NULL);
  7722. }
  7723. if (entity == NULL) {
  7724. /*
  7725. * [ WFC: Entity Declared ]
  7726. * In a document without any DTD, a document with only an
  7727. * internal DTD subset which contains no parameter entity
  7728. * references, or a document with "standalone='yes'", ...
  7729. * ... The declaration of a parameter entity must precede
  7730. * any reference to it...
  7731. */
  7732. if ((ctxt->standalone == 1) ||
  7733. ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
  7734. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7735. "PEReference: %%%s; not found\n", name);
  7736. } else {
  7737. /*
  7738. * [ VC: Entity Declared ]
  7739. * In a document with an external subset or external
  7740. * parameter entities with "standalone='no'", ...
  7741. * ... The declaration of a parameter entity must
  7742. * precede any reference to it...
  7743. */
  7744. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7745. "PEReference: %%%s; not found\n",
  7746. name, NULL);
  7747. ctxt->valid = 0;
  7748. }
  7749. xmlParserEntityCheck(ctxt, 0, NULL, 0);
  7750. } else {
  7751. /*
  7752. * Internal checking in case the entity quest barfed
  7753. */
  7754. if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
  7755. (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
  7756. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7757. "%%%s; is not a parameter entity\n",
  7758. name, NULL);
  7759. }
  7760. }
  7761. ctxt->hasPErefs = 1;
  7762. xmlFree(name);
  7763. *str = ptr;
  7764. return(entity);
  7765. }
  7766. /**
  7767. * xmlParseDocTypeDecl:
  7768. * @ctxt: an XML parser context
  7769. *
  7770. * parse a DOCTYPE declaration
  7771. *
  7772. * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
  7773. * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
  7774. *
  7775. * [ VC: Root Element Type ]
  7776. * The Name in the document type declaration must match the element
  7777. * type of the root element.
  7778. */
  7779. void
  7780. xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
  7781. const xmlChar *name = NULL;
  7782. xmlChar *ExternalID = NULL;
  7783. xmlChar *URI = NULL;
  7784. /*
  7785. * We know that '<!DOCTYPE' has been detected.
  7786. */
  7787. SKIP(9);
  7788. SKIP_BLANKS;
  7789. /*
  7790. * Parse the DOCTYPE name.
  7791. */
  7792. name = xmlParseName(ctxt);
  7793. if (name == NULL) {
  7794. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7795. "xmlParseDocTypeDecl : no DOCTYPE name !\n");
  7796. }
  7797. ctxt->intSubName = name;
  7798. SKIP_BLANKS;
  7799. /*
  7800. * Check for SystemID and ExternalID
  7801. */
  7802. URI = xmlParseExternalID(ctxt, &ExternalID, 1);
  7803. if ((URI != NULL) || (ExternalID != NULL)) {
  7804. ctxt->hasExternalSubset = 1;
  7805. }
  7806. ctxt->extSubURI = URI;
  7807. ctxt->extSubSystem = ExternalID;
  7808. SKIP_BLANKS;
  7809. /*
  7810. * Create and update the internal subset.
  7811. */
  7812. if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
  7813. (!ctxt->disableSAX))
  7814. ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
  7815. if (ctxt->instate == XML_PARSER_EOF)
  7816. return;
  7817. /*
  7818. * Is there any internal subset declarations ?
  7819. * they are handled separately in xmlParseInternalSubset()
  7820. */
  7821. if (RAW == '[')
  7822. return;
  7823. /*
  7824. * We should be at the end of the DOCTYPE declaration.
  7825. */
  7826. if (RAW != '>') {
  7827. xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
  7828. }
  7829. NEXT;
  7830. }
  7831. /**
  7832. * xmlParseInternalSubset:
  7833. * @ctxt: an XML parser context
  7834. *
  7835. * parse the internal subset declaration
  7836. *
  7837. * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
  7838. */
  7839. static void
  7840. xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
  7841. /*
  7842. * Is there any DTD definition ?
  7843. */
  7844. if (RAW == '[') {
  7845. int baseInputNr = ctxt->inputNr;
  7846. ctxt->instate = XML_PARSER_DTD;
  7847. NEXT;
  7848. /*
  7849. * Parse the succession of Markup declarations and
  7850. * PEReferences.
  7851. * Subsequence (markupdecl | PEReference | S)*
  7852. */
  7853. while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
  7854. (ctxt->instate != XML_PARSER_EOF)) {
  7855. const xmlChar *check = CUR_PTR;
  7856. unsigned int cons = ctxt->input->consumed;
  7857. SKIP_BLANKS;
  7858. xmlParseMarkupDecl(ctxt);
  7859. xmlParsePEReference(ctxt);
  7860. /*
  7861. * Conditional sections are allowed from external entities included
  7862. * by PE References in the internal subset.
  7863. */
  7864. if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
  7865. (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  7866. xmlParseConditionalSections(ctxt);
  7867. }
  7868. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  7869. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7870. "xmlParseInternalSubset: error detected in Markup declaration\n");
  7871. if (ctxt->inputNr > baseInputNr)
  7872. xmlPopInput(ctxt);
  7873. else
  7874. break;
  7875. }
  7876. }
  7877. if (RAW == ']') {
  7878. NEXT;
  7879. SKIP_BLANKS;
  7880. }
  7881. }
  7882. /*
  7883. * We should be at the end of the DOCTYPE declaration.
  7884. */
  7885. if (RAW != '>') {
  7886. xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
  7887. return;
  7888. }
  7889. NEXT;
  7890. }
  7891. #ifdef LIBXML_SAX1_ENABLED
  7892. /**
  7893. * xmlParseAttribute:
  7894. * @ctxt: an XML parser context
  7895. * @value: a xmlChar ** used to store the value of the attribute
  7896. *
  7897. * parse an attribute
  7898. *
  7899. * [41] Attribute ::= Name Eq AttValue
  7900. *
  7901. * [ WFC: No External Entity References ]
  7902. * Attribute values cannot contain direct or indirect entity references
  7903. * to external entities.
  7904. *
  7905. * [ WFC: No < in Attribute Values ]
  7906. * The replacement text of any entity referred to directly or indirectly in
  7907. * an attribute value (other than "&lt;") must not contain a <.
  7908. *
  7909. * [ VC: Attribute Value Type ]
  7910. * The attribute must have been declared; the value must be of the type
  7911. * declared for it.
  7912. *
  7913. * [25] Eq ::= S? '=' S?
  7914. *
  7915. * With namespace:
  7916. *
  7917. * [NS 11] Attribute ::= QName Eq AttValue
  7918. *
  7919. * Also the case QName == xmlns:??? is handled independently as a namespace
  7920. * definition.
  7921. *
  7922. * Returns the attribute name, and the value in *value.
  7923. */
  7924. const xmlChar *
  7925. xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
  7926. const xmlChar *name;
  7927. xmlChar *val;
  7928. *value = NULL;
  7929. GROW;
  7930. name = xmlParseName(ctxt);
  7931. if (name == NULL) {
  7932. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7933. "error parsing attribute name\n");
  7934. return(NULL);
  7935. }
  7936. /*
  7937. * read the value
  7938. */
  7939. SKIP_BLANKS;
  7940. if (RAW == '=') {
  7941. NEXT;
  7942. SKIP_BLANKS;
  7943. val = xmlParseAttValue(ctxt);
  7944. ctxt->instate = XML_PARSER_CONTENT;
  7945. } else {
  7946. xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
  7947. "Specification mandates value for attribute %s\n", name);
  7948. return(NULL);
  7949. }
  7950. /*
  7951. * Check that xml:lang conforms to the specification
  7952. * No more registered as an error, just generate a warning now
  7953. * since this was deprecated in XML second edition
  7954. */
  7955. if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
  7956. if (!xmlCheckLanguageID(val)) {
  7957. xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
  7958. "Malformed value for xml:lang : %s\n",
  7959. val, NULL);
  7960. }
  7961. }
  7962. /*
  7963. * Check that xml:space conforms to the specification
  7964. */
  7965. if (xmlStrEqual(name, BAD_CAST "xml:space")) {
  7966. if (xmlStrEqual(val, BAD_CAST "default"))
  7967. *(ctxt->space) = 0;
  7968. else if (xmlStrEqual(val, BAD_CAST "preserve"))
  7969. *(ctxt->space) = 1;
  7970. else {
  7971. xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
  7972. "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
  7973. val, NULL);
  7974. }
  7975. }
  7976. *value = val;
  7977. return(name);
  7978. }
  7979. /**
  7980. * xmlParseStartTag:
  7981. * @ctxt: an XML parser context
  7982. *
  7983. * parse a start of tag either for rule element or
  7984. * EmptyElement. In both case we don't parse the tag closing chars.
  7985. *
  7986. * [40] STag ::= '<' Name (S Attribute)* S? '>'
  7987. *
  7988. * [ WFC: Unique Att Spec ]
  7989. * No attribute name may appear more than once in the same start-tag or
  7990. * empty-element tag.
  7991. *
  7992. * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  7993. *
  7994. * [ WFC: Unique Att Spec ]
  7995. * No attribute name may appear more than once in the same start-tag or
  7996. * empty-element tag.
  7997. *
  7998. * With namespace:
  7999. *
  8000. * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
  8001. *
  8002. * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
  8003. *
  8004. * Returns the element name parsed
  8005. */
  8006. const xmlChar *
  8007. xmlParseStartTag(xmlParserCtxtPtr ctxt) {
  8008. const xmlChar *name;
  8009. const xmlChar *attname;
  8010. xmlChar *attvalue;
  8011. const xmlChar **atts = ctxt->atts;
  8012. int nbatts = 0;
  8013. int maxatts = ctxt->maxatts;
  8014. int i;
  8015. if (RAW != '<') return(NULL);
  8016. NEXT1;
  8017. name = xmlParseName(ctxt);
  8018. if (name == NULL) {
  8019. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  8020. "xmlParseStartTag: invalid element name\n");
  8021. return(NULL);
  8022. }
  8023. /*
  8024. * Now parse the attributes, it ends up with the ending
  8025. *
  8026. * (S Attribute)* S?
  8027. */
  8028. SKIP_BLANKS;
  8029. GROW;
  8030. while (((RAW != '>') &&
  8031. ((RAW != '/') || (NXT(1) != '>')) &&
  8032. (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
  8033. const xmlChar *q = CUR_PTR;
  8034. unsigned int cons = ctxt->input->consumed;
  8035. attname = xmlParseAttribute(ctxt, &attvalue);
  8036. if ((attname != NULL) && (attvalue != NULL)) {
  8037. /*
  8038. * [ WFC: Unique Att Spec ]
  8039. * No attribute name may appear more than once in the same
  8040. * start-tag or empty-element tag.
  8041. */
  8042. for (i = 0; i < nbatts;i += 2) {
  8043. if (xmlStrEqual(atts[i], attname)) {
  8044. xmlErrAttributeDup(ctxt, NULL, attname);
  8045. xmlFree(attvalue);
  8046. goto failed;
  8047. }
  8048. }
  8049. /*
  8050. * Add the pair to atts
  8051. */
  8052. if (atts == NULL) {
  8053. maxatts = 22; /* allow for 10 attrs by default */
  8054. atts = (const xmlChar **)
  8055. xmlMalloc(maxatts * sizeof(xmlChar *));
  8056. if (atts == NULL) {
  8057. xmlErrMemory(ctxt, NULL);
  8058. if (attvalue != NULL)
  8059. xmlFree(attvalue);
  8060. goto failed;
  8061. }
  8062. ctxt->atts = atts;
  8063. ctxt->maxatts = maxatts;
  8064. } else if (nbatts + 4 > maxatts) {
  8065. const xmlChar **n;
  8066. maxatts *= 2;
  8067. n = (const xmlChar **) xmlRealloc((void *) atts,
  8068. maxatts * sizeof(const xmlChar *));
  8069. if (n == NULL) {
  8070. xmlErrMemory(ctxt, NULL);
  8071. if (attvalue != NULL)
  8072. xmlFree(attvalue);
  8073. goto failed;
  8074. }
  8075. atts = n;
  8076. ctxt->atts = atts;
  8077. ctxt->maxatts = maxatts;
  8078. }
  8079. atts[nbatts++] = attname;
  8080. atts[nbatts++] = attvalue;
  8081. atts[nbatts] = NULL;
  8082. atts[nbatts + 1] = NULL;
  8083. } else {
  8084. if (attvalue != NULL)
  8085. xmlFree(attvalue);
  8086. }
  8087. failed:
  8088. GROW
  8089. if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
  8090. break;
  8091. if (SKIP_BLANKS == 0) {
  8092. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  8093. "attributes construct error\n");
  8094. }
  8095. if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
  8096. (attname == NULL) && (attvalue == NULL)) {
  8097. xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
  8098. "xmlParseStartTag: problem parsing attributes\n");
  8099. break;
  8100. }
  8101. SHRINK;
  8102. GROW;
  8103. }
  8104. /*
  8105. * SAX: Start of Element !
  8106. */
  8107. if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
  8108. (!ctxt->disableSAX)) {
  8109. if (nbatts > 0)
  8110. ctxt->sax->startElement(ctxt->userData, name, atts);
  8111. else
  8112. ctxt->sax->startElement(ctxt->userData, name, NULL);
  8113. }
  8114. if (atts != NULL) {
  8115. /* Free only the content strings */
  8116. for (i = 1;i < nbatts;i+=2)
  8117. if (atts[i] != NULL)
  8118. xmlFree((xmlChar *) atts[i]);
  8119. }
  8120. return(name);
  8121. }
  8122. /**
  8123. * xmlParseEndTag1:
  8124. * @ctxt: an XML parser context
  8125. * @line: line of the start tag
  8126. * @nsNr: number of namespaces on the start tag
  8127. *
  8128. * parse an end of tag
  8129. *
  8130. * [42] ETag ::= '</' Name S? '>'
  8131. *
  8132. * With namespace
  8133. *
  8134. * [NS 9] ETag ::= '</' QName S? '>'
  8135. */
  8136. static void
  8137. xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
  8138. const xmlChar *name;
  8139. GROW;
  8140. if ((RAW != '<') || (NXT(1) != '/')) {
  8141. xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
  8142. "xmlParseEndTag: '</' not found\n");
  8143. return;
  8144. }
  8145. SKIP(2);
  8146. name = xmlParseNameAndCompare(ctxt,ctxt->name);
  8147. /*
  8148. * We should definitely be at the ending "S? '>'" part
  8149. */
  8150. GROW;
  8151. SKIP_BLANKS;
  8152. if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
  8153. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  8154. } else
  8155. NEXT1;
  8156. /*
  8157. * [ WFC: Element Type Match ]
  8158. * The Name in an element's end-tag must match the element type in the
  8159. * start-tag.
  8160. *
  8161. */
  8162. if (name != (xmlChar*)1) {
  8163. if (name == NULL) name = BAD_CAST "unparsable";
  8164. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
  8165. "Opening and ending tag mismatch: %s line %d and %s\n",
  8166. ctxt->name, line, name);
  8167. }
  8168. /*
  8169. * SAX: End of Tag
  8170. */
  8171. if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
  8172. (!ctxt->disableSAX))
  8173. ctxt->sax->endElement(ctxt->userData, ctxt->name);
  8174. namePop(ctxt);
  8175. spacePop(ctxt);
  8176. return;
  8177. }
  8178. /**
  8179. * xmlParseEndTag:
  8180. * @ctxt: an XML parser context
  8181. *
  8182. * parse an end of tag
  8183. *
  8184. * [42] ETag ::= '</' Name S? '>'
  8185. *
  8186. * With namespace
  8187. *
  8188. * [NS 9] ETag ::= '</' QName S? '>'
  8189. */
  8190. void
  8191. xmlParseEndTag(xmlParserCtxtPtr ctxt) {
  8192. xmlParseEndTag1(ctxt, 0);
  8193. }
  8194. #endif /* LIBXML_SAX1_ENABLED */
  8195. /************************************************************************
  8196. * *
  8197. * SAX 2 specific operations *
  8198. * *
  8199. ************************************************************************/
  8200. /*
  8201. * xmlGetNamespace:
  8202. * @ctxt: an XML parser context
  8203. * @prefix: the prefix to lookup
  8204. *
  8205. * Lookup the namespace name for the @prefix (which ca be NULL)
  8206. * The prefix must come from the @ctxt->dict dictionary
  8207. *
  8208. * Returns the namespace name or NULL if not bound
  8209. */
  8210. static const xmlChar *
  8211. xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
  8212. int i;
  8213. if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
  8214. for (i = ctxt->nsNr - 2;i >= 0;i-=2)
  8215. if (ctxt->nsTab[i] == prefix) {
  8216. if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
  8217. return(NULL);
  8218. return(ctxt->nsTab[i + 1]);
  8219. }
  8220. return(NULL);
  8221. }
  8222. /**
  8223. * xmlParseQName:
  8224. * @ctxt: an XML parser context
  8225. * @prefix: pointer to store the prefix part
  8226. *
  8227. * parse an XML Namespace QName
  8228. *
  8229. * [6] QName ::= (Prefix ':')? LocalPart
  8230. * [7] Prefix ::= NCName
  8231. * [8] LocalPart ::= NCName
  8232. *
  8233. * Returns the Name parsed or NULL
  8234. */
  8235. static const xmlChar *
  8236. xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
  8237. const xmlChar *l, *p;
  8238. GROW;
  8239. l = xmlParseNCName(ctxt);
  8240. if (l == NULL) {
  8241. if (CUR == ':') {
  8242. l = xmlParseName(ctxt);
  8243. if (l != NULL) {
  8244. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  8245. "Failed to parse QName '%s'\n", l, NULL, NULL);
  8246. *prefix = NULL;
  8247. return(l);
  8248. }
  8249. }
  8250. return(NULL);
  8251. }
  8252. if (CUR == ':') {
  8253. NEXT;
  8254. p = l;
  8255. l = xmlParseNCName(ctxt);
  8256. if (l == NULL) {
  8257. xmlChar *tmp;
  8258. if (ctxt->instate == XML_PARSER_EOF)
  8259. return(NULL);
  8260. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  8261. "Failed to parse QName '%s:'\n", p, NULL, NULL);
  8262. l = xmlParseNmtoken(ctxt);
  8263. if (l == NULL) {
  8264. if (ctxt->instate == XML_PARSER_EOF)
  8265. return(NULL);
  8266. tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
  8267. } else {
  8268. tmp = xmlBuildQName(l, p, NULL, 0);
  8269. xmlFree((char *)l);
  8270. }
  8271. p = xmlDictLookup(ctxt->dict, tmp, -1);
  8272. if (tmp != NULL) xmlFree(tmp);
  8273. *prefix = NULL;
  8274. return(p);
  8275. }
  8276. if (CUR == ':') {
  8277. xmlChar *tmp;
  8278. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  8279. "Failed to parse QName '%s:%s:'\n", p, l, NULL);
  8280. NEXT;
  8281. tmp = (xmlChar *) xmlParseName(ctxt);
  8282. if (tmp != NULL) {
  8283. tmp = xmlBuildQName(tmp, l, NULL, 0);
  8284. l = xmlDictLookup(ctxt->dict, tmp, -1);
  8285. if (tmp != NULL) xmlFree(tmp);
  8286. *prefix = p;
  8287. return(l);
  8288. }
  8289. if (ctxt->instate == XML_PARSER_EOF)
  8290. return(NULL);
  8291. tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
  8292. l = xmlDictLookup(ctxt->dict, tmp, -1);
  8293. if (tmp != NULL) xmlFree(tmp);
  8294. *prefix = p;
  8295. return(l);
  8296. }
  8297. *prefix = p;
  8298. } else
  8299. *prefix = NULL;
  8300. return(l);
  8301. }
  8302. /**
  8303. * xmlParseQNameAndCompare:
  8304. * @ctxt: an XML parser context
  8305. * @name: the localname
  8306. * @prefix: the prefix, if any.
  8307. *
  8308. * parse an XML name and compares for match
  8309. * (specialized for endtag parsing)
  8310. *
  8311. * Returns NULL for an illegal name, (xmlChar*) 1 for success
  8312. * and the name for mismatch
  8313. */
  8314. static const xmlChar *
  8315. xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
  8316. xmlChar const *prefix) {
  8317. const xmlChar *cmp;
  8318. const xmlChar *in;
  8319. const xmlChar *ret;
  8320. const xmlChar *prefix2;
  8321. if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
  8322. GROW;
  8323. in = ctxt->input->cur;
  8324. cmp = prefix;
  8325. while (*in != 0 && *in == *cmp) {
  8326. ++in;
  8327. ++cmp;
  8328. }
  8329. if ((*cmp == 0) && (*in == ':')) {
  8330. in++;
  8331. cmp = name;
  8332. while (*in != 0 && *in == *cmp) {
  8333. ++in;
  8334. ++cmp;
  8335. }
  8336. if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
  8337. /* success */
  8338. ctxt->input->col += in - ctxt->input->cur;
  8339. ctxt->input->cur = in;
  8340. return((const xmlChar*) 1);
  8341. }
  8342. }
  8343. /*
  8344. * all strings coms from the dictionary, equality can be done directly
  8345. */
  8346. ret = xmlParseQName (ctxt, &prefix2);
  8347. if ((ret == name) && (prefix == prefix2))
  8348. return((const xmlChar*) 1);
  8349. return ret;
  8350. }
  8351. /**
  8352. * xmlParseAttValueInternal:
  8353. * @ctxt: an XML parser context
  8354. * @len: attribute len result
  8355. * @alloc: whether the attribute was reallocated as a new string
  8356. * @normalize: if 1 then further non-CDATA normalization must be done
  8357. *
  8358. * parse a value for an attribute.
  8359. * NOTE: if no normalization is needed, the routine will return pointers
  8360. * directly from the data buffer.
  8361. *
  8362. * 3.3.3 Attribute-Value Normalization:
  8363. * Before the value of an attribute is passed to the application or
  8364. * checked for validity, the XML processor must normalize it as follows:
  8365. * - a character reference is processed by appending the referenced
  8366. * character to the attribute value
  8367. * - an entity reference is processed by recursively processing the
  8368. * replacement text of the entity
  8369. * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
  8370. * appending #x20 to the normalized value, except that only a single
  8371. * #x20 is appended for a "#xD#xA" sequence that is part of an external
  8372. * parsed entity or the literal entity value of an internal parsed entity
  8373. * - other characters are processed by appending them to the normalized value
  8374. * If the declared value is not CDATA, then the XML processor must further
  8375. * process the normalized attribute value by discarding any leading and
  8376. * trailing space (#x20) characters, and by replacing sequences of space
  8377. * (#x20) characters by a single space (#x20) character.
  8378. * All attributes for which no declaration has been read should be treated
  8379. * by a non-validating parser as if declared CDATA.
  8380. *
  8381. * Returns the AttValue parsed or NULL. The value has to be freed by the
  8382. * caller if it was copied, this can be detected by val[*len] == 0.
  8383. */
  8384. #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
  8385. const xmlChar *oldbase = ctxt->input->base;\
  8386. GROW;\
  8387. if (ctxt->instate == XML_PARSER_EOF)\
  8388. return(NULL);\
  8389. if (oldbase != ctxt->input->base) {\
  8390. ptrdiff_t delta = ctxt->input->base - oldbase;\
  8391. start = start + delta;\
  8392. in = in + delta;\
  8393. }\
  8394. end = ctxt->input->end;
  8395. static xmlChar *
  8396. xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
  8397. int normalize)
  8398. {
  8399. xmlChar limit = 0;
  8400. const xmlChar *in = NULL, *start, *end, *last;
  8401. xmlChar *ret = NULL;
  8402. int line, col;
  8403. GROW;
  8404. in = (xmlChar *) CUR_PTR;
  8405. line = ctxt->input->line;
  8406. col = ctxt->input->col;
  8407. if (*in != '"' && *in != '\'') {
  8408. xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
  8409. return (NULL);
  8410. }
  8411. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  8412. /*
  8413. * try to handle in this routine the most common case where no
  8414. * allocation of a new string is required and where content is
  8415. * pure ASCII.
  8416. */
  8417. limit = *in++;
  8418. col++;
  8419. end = ctxt->input->end;
  8420. start = in;
  8421. if (in >= end) {
  8422. GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
  8423. }
  8424. if (normalize) {
  8425. /*
  8426. * Skip any leading spaces
  8427. */
  8428. while ((in < end) && (*in != limit) &&
  8429. ((*in == 0x20) || (*in == 0x9) ||
  8430. (*in == 0xA) || (*in == 0xD))) {
  8431. if (*in == 0xA) {
  8432. line++; col = 1;
  8433. } else {
  8434. col++;
  8435. }
  8436. in++;
  8437. start = in;
  8438. if (in >= end) {
  8439. GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
  8440. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8441. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8442. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8443. "AttValue length too long\n");
  8444. return(NULL);
  8445. }
  8446. }
  8447. }
  8448. while ((in < end) && (*in != limit) && (*in >= 0x20) &&
  8449. (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
  8450. col++;
  8451. if ((*in++ == 0x20) && (*in == 0x20)) break;
  8452. if (in >= end) {
  8453. GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
  8454. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8455. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8456. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8457. "AttValue length too long\n");
  8458. return(NULL);
  8459. }
  8460. }
  8461. }
  8462. last = in;
  8463. /*
  8464. * skip the trailing blanks
  8465. */
  8466. while ((last[-1] == 0x20) && (last > start)) last--;
  8467. while ((in < end) && (*in != limit) &&
  8468. ((*in == 0x20) || (*in == 0x9) ||
  8469. (*in == 0xA) || (*in == 0xD))) {
  8470. if (*in == 0xA) {
  8471. line++, col = 1;
  8472. } else {
  8473. col++;
  8474. }
  8475. in++;
  8476. if (in >= end) {
  8477. const xmlChar *oldbase = ctxt->input->base;
  8478. GROW;
  8479. if (ctxt->instate == XML_PARSER_EOF)
  8480. return(NULL);
  8481. if (oldbase != ctxt->input->base) {
  8482. ptrdiff_t delta = ctxt->input->base - oldbase;
  8483. start = start + delta;
  8484. in = in + delta;
  8485. last = last + delta;
  8486. }
  8487. end = ctxt->input->end;
  8488. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8489. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8490. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8491. "AttValue length too long\n");
  8492. return(NULL);
  8493. }
  8494. }
  8495. }
  8496. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8497. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8498. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8499. "AttValue length too long\n");
  8500. return(NULL);
  8501. }
  8502. if (*in != limit) goto need_complex;
  8503. } else {
  8504. while ((in < end) && (*in != limit) && (*in >= 0x20) &&
  8505. (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
  8506. in++;
  8507. col++;
  8508. if (in >= end) {
  8509. GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
  8510. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8511. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8512. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8513. "AttValue length too long\n");
  8514. return(NULL);
  8515. }
  8516. }
  8517. }
  8518. last = in;
  8519. if (((in - start) > XML_MAX_TEXT_LENGTH) &&
  8520. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8521. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  8522. "AttValue length too long\n");
  8523. return(NULL);
  8524. }
  8525. if (*in != limit) goto need_complex;
  8526. }
  8527. in++;
  8528. col++;
  8529. if (len != NULL) {
  8530. *len = last - start;
  8531. ret = (xmlChar *) start;
  8532. } else {
  8533. if (alloc) *alloc = 1;
  8534. ret = xmlStrndup(start, last - start);
  8535. }
  8536. CUR_PTR = in;
  8537. ctxt->input->line = line;
  8538. ctxt->input->col = col;
  8539. if (alloc) *alloc = 0;
  8540. return ret;
  8541. need_complex:
  8542. if (alloc) *alloc = 1;
  8543. return xmlParseAttValueComplex(ctxt, len, normalize);
  8544. }
  8545. /**
  8546. * xmlParseAttribute2:
  8547. * @ctxt: an XML parser context
  8548. * @pref: the element prefix
  8549. * @elem: the element name
  8550. * @prefix: a xmlChar ** used to store the value of the attribute prefix
  8551. * @value: a xmlChar ** used to store the value of the attribute
  8552. * @len: an int * to save the length of the attribute
  8553. * @alloc: an int * to indicate if the attribute was allocated
  8554. *
  8555. * parse an attribute in the new SAX2 framework.
  8556. *
  8557. * Returns the attribute name, and the value in *value, .
  8558. */
  8559. static const xmlChar *
  8560. xmlParseAttribute2(xmlParserCtxtPtr ctxt,
  8561. const xmlChar * pref, const xmlChar * elem,
  8562. const xmlChar ** prefix, xmlChar ** value,
  8563. int *len, int *alloc)
  8564. {
  8565. const xmlChar *name;
  8566. xmlChar *val, *internal_val = NULL;
  8567. int normalize = 0;
  8568. *value = NULL;
  8569. GROW;
  8570. name = xmlParseQName(ctxt, prefix);
  8571. if (name == NULL) {
  8572. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  8573. "error parsing attribute name\n");
  8574. return (NULL);
  8575. }
  8576. /*
  8577. * get the type if needed
  8578. */
  8579. if (ctxt->attsSpecial != NULL) {
  8580. int type;
  8581. type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
  8582. pref, elem, *prefix, name);
  8583. if (type != 0)
  8584. normalize = 1;
  8585. }
  8586. /*
  8587. * read the value
  8588. */
  8589. SKIP_BLANKS;
  8590. if (RAW == '=') {
  8591. NEXT;
  8592. SKIP_BLANKS;
  8593. val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
  8594. if (normalize) {
  8595. /*
  8596. * Sometimes a second normalisation pass for spaces is needed
  8597. * but that only happens if charrefs or entities references
  8598. * have been used in the attribute value, i.e. the attribute
  8599. * value have been extracted in an allocated string already.
  8600. */
  8601. if (*alloc) {
  8602. const xmlChar *val2;
  8603. val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
  8604. if ((val2 != NULL) && (val2 != val)) {
  8605. xmlFree(val);
  8606. val = (xmlChar *) val2;
  8607. }
  8608. }
  8609. }
  8610. ctxt->instate = XML_PARSER_CONTENT;
  8611. } else {
  8612. xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
  8613. "Specification mandates value for attribute %s\n",
  8614. name);
  8615. return (NULL);
  8616. }
  8617. if (*prefix == ctxt->str_xml) {
  8618. /*
  8619. * Check that xml:lang conforms to the specification
  8620. * No more registered as an error, just generate a warning now
  8621. * since this was deprecated in XML second edition
  8622. */
  8623. if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
  8624. internal_val = xmlStrndup(val, *len);
  8625. if (!xmlCheckLanguageID(internal_val)) {
  8626. xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
  8627. "Malformed value for xml:lang : %s\n",
  8628. internal_val, NULL);
  8629. }
  8630. }
  8631. /*
  8632. * Check that xml:space conforms to the specification
  8633. */
  8634. if (xmlStrEqual(name, BAD_CAST "space")) {
  8635. internal_val = xmlStrndup(val, *len);
  8636. if (xmlStrEqual(internal_val, BAD_CAST "default"))
  8637. *(ctxt->space) = 0;
  8638. else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
  8639. *(ctxt->space) = 1;
  8640. else {
  8641. xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
  8642. "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
  8643. internal_val, NULL);
  8644. }
  8645. }
  8646. if (internal_val) {
  8647. xmlFree(internal_val);
  8648. }
  8649. }
  8650. *value = val;
  8651. return (name);
  8652. }
  8653. /**
  8654. * xmlParseStartTag2:
  8655. * @ctxt: an XML parser context
  8656. *
  8657. * parse a start of tag either for rule element or
  8658. * EmptyElement. In both case we don't parse the tag closing chars.
  8659. * This routine is called when running SAX2 parsing
  8660. *
  8661. * [40] STag ::= '<' Name (S Attribute)* S? '>'
  8662. *
  8663. * [ WFC: Unique Att Spec ]
  8664. * No attribute name may appear more than once in the same start-tag or
  8665. * empty-element tag.
  8666. *
  8667. * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  8668. *
  8669. * [ WFC: Unique Att Spec ]
  8670. * No attribute name may appear more than once in the same start-tag or
  8671. * empty-element tag.
  8672. *
  8673. * With namespace:
  8674. *
  8675. * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
  8676. *
  8677. * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
  8678. *
  8679. * Returns the element name parsed
  8680. */
  8681. static const xmlChar *
  8682. xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
  8683. const xmlChar **URI, int *tlen) {
  8684. const xmlChar *localname;
  8685. const xmlChar *prefix;
  8686. const xmlChar *attname;
  8687. const xmlChar *aprefix;
  8688. const xmlChar *nsname;
  8689. xmlChar *attvalue;
  8690. const xmlChar **atts = ctxt->atts;
  8691. int maxatts = ctxt->maxatts;
  8692. int nratts, nbatts, nbdef, inputid;
  8693. int i, j, nbNs, attval;
  8694. unsigned long cur;
  8695. int nsNr = ctxt->nsNr;
  8696. if (RAW != '<') return(NULL);
  8697. NEXT1;
  8698. /*
  8699. * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
  8700. * point since the attribute values may be stored as pointers to
  8701. * the buffer and calling SHRINK would destroy them !
  8702. * The Shrinking is only possible once the full set of attribute
  8703. * callbacks have been done.
  8704. */
  8705. SHRINK;
  8706. cur = ctxt->input->cur - ctxt->input->base;
  8707. inputid = ctxt->input->id;
  8708. nbatts = 0;
  8709. nratts = 0;
  8710. nbdef = 0;
  8711. nbNs = 0;
  8712. attval = 0;
  8713. /* Forget any namespaces added during an earlier parse of this element. */
  8714. ctxt->nsNr = nsNr;
  8715. localname = xmlParseQName(ctxt, &prefix);
  8716. if (localname == NULL) {
  8717. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  8718. "StartTag: invalid element name\n");
  8719. return(NULL);
  8720. }
  8721. *tlen = ctxt->input->cur - ctxt->input->base - cur;
  8722. /*
  8723. * Now parse the attributes, it ends up with the ending
  8724. *
  8725. * (S Attribute)* S?
  8726. */
  8727. SKIP_BLANKS;
  8728. GROW;
  8729. while (((RAW != '>') &&
  8730. ((RAW != '/') || (NXT(1) != '>')) &&
  8731. (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
  8732. const xmlChar *q = CUR_PTR;
  8733. unsigned int cons = ctxt->input->consumed;
  8734. int len = -1, alloc = 0;
  8735. attname = xmlParseAttribute2(ctxt, prefix, localname,
  8736. &aprefix, &attvalue, &len, &alloc);
  8737. if ((attname == NULL) || (attvalue == NULL))
  8738. goto next_attr;
  8739. if (len < 0) len = xmlStrlen(attvalue);
  8740. if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
  8741. const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
  8742. xmlURIPtr uri;
  8743. if (URL == NULL) {
  8744. xmlErrMemory(ctxt, "dictionary allocation failure");
  8745. if ((attvalue != NULL) && (alloc != 0))
  8746. xmlFree(attvalue);
  8747. localname = NULL;
  8748. goto done;
  8749. }
  8750. if (*URL != 0) {
  8751. uri = xmlParseURI((const char *) URL);
  8752. if (uri == NULL) {
  8753. xmlNsErr(ctxt, XML_WAR_NS_URI,
  8754. "xmlns: '%s' is not a valid URI\n",
  8755. URL, NULL, NULL);
  8756. } else {
  8757. if (uri->scheme == NULL) {
  8758. xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
  8759. "xmlns: URI %s is not absolute\n",
  8760. URL, NULL, NULL);
  8761. }
  8762. xmlFreeURI(uri);
  8763. }
  8764. if (URL == ctxt->str_xml_ns) {
  8765. if (attname != ctxt->str_xml) {
  8766. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8767. "xml namespace URI cannot be the default namespace\n",
  8768. NULL, NULL, NULL);
  8769. }
  8770. goto next_attr;
  8771. }
  8772. if ((len == 29) &&
  8773. (xmlStrEqual(URL,
  8774. BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
  8775. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8776. "reuse of the xmlns namespace name is forbidden\n",
  8777. NULL, NULL, NULL);
  8778. goto next_attr;
  8779. }
  8780. }
  8781. /*
  8782. * check that it's not a defined namespace
  8783. */
  8784. for (j = 1;j <= nbNs;j++)
  8785. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
  8786. break;
  8787. if (j <= nbNs)
  8788. xmlErrAttributeDup(ctxt, NULL, attname);
  8789. else
  8790. if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
  8791. } else if (aprefix == ctxt->str_xmlns) {
  8792. const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
  8793. xmlURIPtr uri;
  8794. if (attname == ctxt->str_xml) {
  8795. if (URL != ctxt->str_xml_ns) {
  8796. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8797. "xml namespace prefix mapped to wrong URI\n",
  8798. NULL, NULL, NULL);
  8799. }
  8800. /*
  8801. * Do not keep a namespace definition node
  8802. */
  8803. goto next_attr;
  8804. }
  8805. if (URL == ctxt->str_xml_ns) {
  8806. if (attname != ctxt->str_xml) {
  8807. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8808. "xml namespace URI mapped to wrong prefix\n",
  8809. NULL, NULL, NULL);
  8810. }
  8811. goto next_attr;
  8812. }
  8813. if (attname == ctxt->str_xmlns) {
  8814. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8815. "redefinition of the xmlns prefix is forbidden\n",
  8816. NULL, NULL, NULL);
  8817. goto next_attr;
  8818. }
  8819. if ((len == 29) &&
  8820. (xmlStrEqual(URL,
  8821. BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
  8822. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8823. "reuse of the xmlns namespace name is forbidden\n",
  8824. NULL, NULL, NULL);
  8825. goto next_attr;
  8826. }
  8827. if ((URL == NULL) || (URL[0] == 0)) {
  8828. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8829. "xmlns:%s: Empty XML namespace is not allowed\n",
  8830. attname, NULL, NULL);
  8831. goto next_attr;
  8832. } else {
  8833. uri = xmlParseURI((const char *) URL);
  8834. if (uri == NULL) {
  8835. xmlNsErr(ctxt, XML_WAR_NS_URI,
  8836. "xmlns:%s: '%s' is not a valid URI\n",
  8837. attname, URL, NULL);
  8838. } else {
  8839. if ((ctxt->pedantic) && (uri->scheme == NULL)) {
  8840. xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
  8841. "xmlns:%s: URI %s is not absolute\n",
  8842. attname, URL, NULL);
  8843. }
  8844. xmlFreeURI(uri);
  8845. }
  8846. }
  8847. /*
  8848. * check that it's not a defined namespace
  8849. */
  8850. for (j = 1;j <= nbNs;j++)
  8851. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
  8852. break;
  8853. if (j <= nbNs)
  8854. xmlErrAttributeDup(ctxt, aprefix, attname);
  8855. else
  8856. if (nsPush(ctxt, attname, URL) > 0) nbNs++;
  8857. } else {
  8858. /*
  8859. * Add the pair to atts
  8860. */
  8861. if ((atts == NULL) || (nbatts + 5 > maxatts)) {
  8862. if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
  8863. goto next_attr;
  8864. }
  8865. maxatts = ctxt->maxatts;
  8866. atts = ctxt->atts;
  8867. }
  8868. ctxt->attallocs[nratts++] = alloc;
  8869. atts[nbatts++] = attname;
  8870. atts[nbatts++] = aprefix;
  8871. /*
  8872. * The namespace URI field is used temporarily to point at the
  8873. * base of the current input buffer for non-alloced attributes.
  8874. * When the input buffer is reallocated, all the pointers become
  8875. * invalid, but they can be reconstructed later.
  8876. */
  8877. if (alloc)
  8878. atts[nbatts++] = NULL;
  8879. else
  8880. atts[nbatts++] = ctxt->input->base;
  8881. atts[nbatts++] = attvalue;
  8882. attvalue += len;
  8883. atts[nbatts++] = attvalue;
  8884. /*
  8885. * tag if some deallocation is needed
  8886. */
  8887. if (alloc != 0) attval = 1;
  8888. attvalue = NULL; /* moved into atts */
  8889. }
  8890. next_attr:
  8891. if ((attvalue != NULL) && (alloc != 0)) {
  8892. xmlFree(attvalue);
  8893. attvalue = NULL;
  8894. }
  8895. GROW
  8896. if (ctxt->instate == XML_PARSER_EOF)
  8897. break;
  8898. if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
  8899. break;
  8900. if (SKIP_BLANKS == 0) {
  8901. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  8902. "attributes construct error\n");
  8903. break;
  8904. }
  8905. if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
  8906. (attname == NULL) && (attvalue == NULL)) {
  8907. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  8908. "xmlParseStartTag: problem parsing attributes\n");
  8909. break;
  8910. }
  8911. GROW;
  8912. }
  8913. if (ctxt->input->id != inputid) {
  8914. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  8915. "Unexpected change of input\n");
  8916. localname = NULL;
  8917. goto done;
  8918. }
  8919. /* Reconstruct attribute value pointers. */
  8920. for (i = 0, j = 0; j < nratts; i += 5, j++) {
  8921. if (atts[i+2] != NULL) {
  8922. /*
  8923. * Arithmetic on dangling pointers is technically undefined
  8924. * behavior, but well...
  8925. */
  8926. ptrdiff_t offset = ctxt->input->base - atts[i+2];
  8927. atts[i+2] = NULL; /* Reset repurposed namespace URI */
  8928. atts[i+3] += offset; /* value */
  8929. atts[i+4] += offset; /* valuend */
  8930. }
  8931. }
  8932. /*
  8933. * The attributes defaulting
  8934. */
  8935. if (ctxt->attsDefault != NULL) {
  8936. xmlDefAttrsPtr defaults;
  8937. defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
  8938. if (defaults != NULL) {
  8939. for (i = 0;i < defaults->nbAttrs;i++) {
  8940. attname = defaults->values[5 * i];
  8941. aprefix = defaults->values[5 * i + 1];
  8942. /*
  8943. * special work for namespaces defaulted defs
  8944. */
  8945. if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
  8946. /*
  8947. * check that it's not a defined namespace
  8948. */
  8949. for (j = 1;j <= nbNs;j++)
  8950. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
  8951. break;
  8952. if (j <= nbNs) continue;
  8953. nsname = xmlGetNamespace(ctxt, NULL);
  8954. if (nsname != defaults->values[5 * i + 2]) {
  8955. if (nsPush(ctxt, NULL,
  8956. defaults->values[5 * i + 2]) > 0)
  8957. nbNs++;
  8958. }
  8959. } else if (aprefix == ctxt->str_xmlns) {
  8960. /*
  8961. * check that it's not a defined namespace
  8962. */
  8963. for (j = 1;j <= nbNs;j++)
  8964. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
  8965. break;
  8966. if (j <= nbNs) continue;
  8967. nsname = xmlGetNamespace(ctxt, attname);
  8968. if (nsname != defaults->values[2]) {
  8969. if (nsPush(ctxt, attname,
  8970. defaults->values[5 * i + 2]) > 0)
  8971. nbNs++;
  8972. }
  8973. } else {
  8974. /*
  8975. * check that it's not a defined attribute
  8976. */
  8977. for (j = 0;j < nbatts;j+=5) {
  8978. if ((attname == atts[j]) && (aprefix == atts[j+1]))
  8979. break;
  8980. }
  8981. if (j < nbatts) continue;
  8982. if ((atts == NULL) || (nbatts + 5 > maxatts)) {
  8983. if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
  8984. localname = NULL;
  8985. goto done;
  8986. }
  8987. maxatts = ctxt->maxatts;
  8988. atts = ctxt->atts;
  8989. }
  8990. atts[nbatts++] = attname;
  8991. atts[nbatts++] = aprefix;
  8992. if (aprefix == NULL)
  8993. atts[nbatts++] = NULL;
  8994. else
  8995. atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
  8996. atts[nbatts++] = defaults->values[5 * i + 2];
  8997. atts[nbatts++] = defaults->values[5 * i + 3];
  8998. if ((ctxt->standalone == 1) &&
  8999. (defaults->values[5 * i + 4] != NULL)) {
  9000. xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
  9001. "standalone: attribute %s on %s defaulted from external subset\n",
  9002. attname, localname);
  9003. }
  9004. nbdef++;
  9005. }
  9006. }
  9007. }
  9008. }
  9009. /*
  9010. * The attributes checkings
  9011. */
  9012. for (i = 0; i < nbatts;i += 5) {
  9013. /*
  9014. * The default namespace does not apply to attribute names.
  9015. */
  9016. if (atts[i + 1] != NULL) {
  9017. nsname = xmlGetNamespace(ctxt, atts[i + 1]);
  9018. if (nsname == NULL) {
  9019. xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
  9020. "Namespace prefix %s for %s on %s is not defined\n",
  9021. atts[i + 1], atts[i], localname);
  9022. }
  9023. atts[i + 2] = nsname;
  9024. } else
  9025. nsname = NULL;
  9026. /*
  9027. * [ WFC: Unique Att Spec ]
  9028. * No attribute name may appear more than once in the same
  9029. * start-tag or empty-element tag.
  9030. * As extended by the Namespace in XML REC.
  9031. */
  9032. for (j = 0; j < i;j += 5) {
  9033. if (atts[i] == atts[j]) {
  9034. if (atts[i+1] == atts[j+1]) {
  9035. xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
  9036. break;
  9037. }
  9038. if ((nsname != NULL) && (atts[j + 2] == nsname)) {
  9039. xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
  9040. "Namespaced Attribute %s in '%s' redefined\n",
  9041. atts[i], nsname, NULL);
  9042. break;
  9043. }
  9044. }
  9045. }
  9046. }
  9047. nsname = xmlGetNamespace(ctxt, prefix);
  9048. if ((prefix != NULL) && (nsname == NULL)) {
  9049. xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
  9050. "Namespace prefix %s on %s is not defined\n",
  9051. prefix, localname, NULL);
  9052. }
  9053. *pref = prefix;
  9054. *URI = nsname;
  9055. /*
  9056. * SAX: Start of Element !
  9057. */
  9058. if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
  9059. (!ctxt->disableSAX)) {
  9060. if (nbNs > 0)
  9061. ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
  9062. nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
  9063. nbatts / 5, nbdef, atts);
  9064. else
  9065. ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
  9066. nsname, 0, NULL, nbatts / 5, nbdef, atts);
  9067. }
  9068. done:
  9069. /*
  9070. * Free up attribute allocated strings if needed
  9071. */
  9072. if (attval != 0) {
  9073. for (i = 3,j = 0; j < nratts;i += 5,j++)
  9074. if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
  9075. xmlFree((xmlChar *) atts[i]);
  9076. }
  9077. return(localname);
  9078. }
  9079. /**
  9080. * xmlParseEndTag2:
  9081. * @ctxt: an XML parser context
  9082. * @line: line of the start tag
  9083. * @nsNr: number of namespaces on the start tag
  9084. *
  9085. * parse an end of tag
  9086. *
  9087. * [42] ETag ::= '</' Name S? '>'
  9088. *
  9089. * With namespace
  9090. *
  9091. * [NS 9] ETag ::= '</' QName S? '>'
  9092. */
  9093. static void
  9094. xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
  9095. const xmlChar *name;
  9096. GROW;
  9097. if ((RAW != '<') || (NXT(1) != '/')) {
  9098. xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
  9099. return;
  9100. }
  9101. SKIP(2);
  9102. if (tag->prefix == NULL)
  9103. name = xmlParseNameAndCompare(ctxt, ctxt->name);
  9104. else
  9105. name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
  9106. /*
  9107. * We should definitely be at the ending "S? '>'" part
  9108. */
  9109. GROW;
  9110. if (ctxt->instate == XML_PARSER_EOF)
  9111. return;
  9112. SKIP_BLANKS;
  9113. if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
  9114. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  9115. } else
  9116. NEXT1;
  9117. /*
  9118. * [ WFC: Element Type Match ]
  9119. * The Name in an element's end-tag must match the element type in the
  9120. * start-tag.
  9121. *
  9122. */
  9123. if (name != (xmlChar*)1) {
  9124. if (name == NULL) name = BAD_CAST "unparsable";
  9125. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
  9126. "Opening and ending tag mismatch: %s line %d and %s\n",
  9127. ctxt->name, tag->line, name);
  9128. }
  9129. /*
  9130. * SAX: End of Tag
  9131. */
  9132. if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
  9133. (!ctxt->disableSAX))
  9134. ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
  9135. tag->URI);
  9136. spacePop(ctxt);
  9137. if (tag->nsNr != 0)
  9138. nsPop(ctxt, tag->nsNr);
  9139. }
  9140. /**
  9141. * xmlParseCDSect:
  9142. * @ctxt: an XML parser context
  9143. *
  9144. * Parse escaped pure raw content.
  9145. *
  9146. * [18] CDSect ::= CDStart CData CDEnd
  9147. *
  9148. * [19] CDStart ::= '<![CDATA['
  9149. *
  9150. * [20] Data ::= (Char* - (Char* ']]>' Char*))
  9151. *
  9152. * [21] CDEnd ::= ']]>'
  9153. */
  9154. void
  9155. xmlParseCDSect(xmlParserCtxtPtr ctxt) {
  9156. xmlChar *buf = NULL;
  9157. int len = 0;
  9158. int size = XML_PARSER_BUFFER_SIZE;
  9159. int r, rl;
  9160. int s, sl;
  9161. int cur, l;
  9162. int count = 0;
  9163. /* Check 2.6.0 was NXT(0) not RAW */
  9164. if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
  9165. SKIP(9);
  9166. } else
  9167. return;
  9168. ctxt->instate = XML_PARSER_CDATA_SECTION;
  9169. r = CUR_CHAR(rl);
  9170. if (!IS_CHAR(r)) {
  9171. xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
  9172. ctxt->instate = XML_PARSER_CONTENT;
  9173. return;
  9174. }
  9175. NEXTL(rl);
  9176. s = CUR_CHAR(sl);
  9177. if (!IS_CHAR(s)) {
  9178. xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
  9179. ctxt->instate = XML_PARSER_CONTENT;
  9180. return;
  9181. }
  9182. NEXTL(sl);
  9183. cur = CUR_CHAR(l);
  9184. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  9185. if (buf == NULL) {
  9186. xmlErrMemory(ctxt, NULL);
  9187. return;
  9188. }
  9189. while (IS_CHAR(cur) &&
  9190. ((r != ']') || (s != ']') || (cur != '>'))) {
  9191. if (len + 5 >= size) {
  9192. xmlChar *tmp;
  9193. if ((size > XML_MAX_TEXT_LENGTH) &&
  9194. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  9195. xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
  9196. "CData section too big found", NULL);
  9197. xmlFree (buf);
  9198. return;
  9199. }
  9200. tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
  9201. if (tmp == NULL) {
  9202. xmlFree(buf);
  9203. xmlErrMemory(ctxt, NULL);
  9204. return;
  9205. }
  9206. buf = tmp;
  9207. size *= 2;
  9208. }
  9209. COPY_BUF(rl,buf,len,r);
  9210. r = s;
  9211. rl = sl;
  9212. s = cur;
  9213. sl = l;
  9214. count++;
  9215. if (count > 50) {
  9216. SHRINK;
  9217. GROW;
  9218. if (ctxt->instate == XML_PARSER_EOF) {
  9219. xmlFree(buf);
  9220. return;
  9221. }
  9222. count = 0;
  9223. }
  9224. NEXTL(l);
  9225. cur = CUR_CHAR(l);
  9226. }
  9227. buf[len] = 0;
  9228. ctxt->instate = XML_PARSER_CONTENT;
  9229. if (cur != '>') {
  9230. xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
  9231. "CData section not finished\n%.50s\n", buf);
  9232. xmlFree(buf);
  9233. return;
  9234. }
  9235. NEXTL(l);
  9236. /*
  9237. * OK the buffer is to be consumed as cdata.
  9238. */
  9239. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  9240. if (ctxt->sax->cdataBlock != NULL)
  9241. ctxt->sax->cdataBlock(ctxt->userData, buf, len);
  9242. else if (ctxt->sax->characters != NULL)
  9243. ctxt->sax->characters(ctxt->userData, buf, len);
  9244. }
  9245. xmlFree(buf);
  9246. }
  9247. /**
  9248. * xmlParseContentInternal:
  9249. * @ctxt: an XML parser context
  9250. *
  9251. * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
  9252. * unexpected EOF to the caller.
  9253. */
  9254. static void
  9255. xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
  9256. int nameNr = ctxt->nameNr;
  9257. GROW;
  9258. while ((RAW != 0) &&
  9259. (ctxt->instate != XML_PARSER_EOF)) {
  9260. const xmlChar *test = CUR_PTR;
  9261. unsigned int cons = ctxt->input->consumed;
  9262. const xmlChar *cur = ctxt->input->cur;
  9263. /*
  9264. * First case : a Processing Instruction.
  9265. */
  9266. if ((*cur == '<') && (cur[1] == '?')) {
  9267. xmlParsePI(ctxt);
  9268. }
  9269. /*
  9270. * Second case : a CDSection
  9271. */
  9272. /* 2.6.0 test was *cur not RAW */
  9273. else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
  9274. xmlParseCDSect(ctxt);
  9275. }
  9276. /*
  9277. * Third case : a comment
  9278. */
  9279. else if ((*cur == '<') && (NXT(1) == '!') &&
  9280. (NXT(2) == '-') && (NXT(3) == '-')) {
  9281. xmlParseComment(ctxt);
  9282. ctxt->instate = XML_PARSER_CONTENT;
  9283. }
  9284. /*
  9285. * Fourth case : a sub-element.
  9286. */
  9287. else if (*cur == '<') {
  9288. if (NXT(1) == '/') {
  9289. if (ctxt->nameNr <= nameNr)
  9290. break;
  9291. xmlParseElementEnd(ctxt);
  9292. } else {
  9293. xmlParseElementStart(ctxt);
  9294. }
  9295. }
  9296. /*
  9297. * Fifth case : a reference. If if has not been resolved,
  9298. * parsing returns it's Name, create the node
  9299. */
  9300. else if (*cur == '&') {
  9301. xmlParseReference(ctxt);
  9302. }
  9303. /*
  9304. * Last case, text. Note that References are handled directly.
  9305. */
  9306. else {
  9307. xmlParseCharData(ctxt, 0);
  9308. }
  9309. GROW;
  9310. SHRINK;
  9311. if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
  9312. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  9313. "detected an error in element content\n");
  9314. xmlHaltParser(ctxt);
  9315. break;
  9316. }
  9317. }
  9318. }
  9319. /**
  9320. * xmlParseContent:
  9321. * @ctxt: an XML parser context
  9322. *
  9323. * Parse a content sequence. Stops at EOF or '</'.
  9324. *
  9325. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  9326. */
  9327. void
  9328. xmlParseContent(xmlParserCtxtPtr ctxt) {
  9329. int nameNr = ctxt->nameNr;
  9330. xmlParseContentInternal(ctxt);
  9331. if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
  9332. const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
  9333. int line = ctxt->pushTab[ctxt->nameNr - 1].line;
  9334. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
  9335. "Premature end of data in tag %s line %d\n",
  9336. name, line, NULL);
  9337. }
  9338. }
  9339. /**
  9340. * xmlParseElement:
  9341. * @ctxt: an XML parser context
  9342. *
  9343. * parse an XML element
  9344. *
  9345. * [39] element ::= EmptyElemTag | STag content ETag
  9346. *
  9347. * [ WFC: Element Type Match ]
  9348. * The Name in an element's end-tag must match the element type in the
  9349. * start-tag.
  9350. *
  9351. */
  9352. void
  9353. xmlParseElement(xmlParserCtxtPtr ctxt) {
  9354. if (xmlParseElementStart(ctxt) != 0)
  9355. return;
  9356. xmlParseContentInternal(ctxt);
  9357. if (ctxt->instate == XML_PARSER_EOF)
  9358. return;
  9359. if (CUR == 0) {
  9360. const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
  9361. int line = ctxt->pushTab[ctxt->nameNr - 1].line;
  9362. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
  9363. "Premature end of data in tag %s line %d\n",
  9364. name, line, NULL);
  9365. return;
  9366. }
  9367. xmlParseElementEnd(ctxt);
  9368. }
  9369. /**
  9370. * xmlParseElementStart:
  9371. * @ctxt: an XML parser context
  9372. *
  9373. * Parse the start of an XML element. Returns -1 in case of error, 0 if an
  9374. * opening tag was parsed, 1 if an empty element was parsed.
  9375. */
  9376. static int
  9377. xmlParseElementStart(xmlParserCtxtPtr ctxt) {
  9378. const xmlChar *name;
  9379. const xmlChar *prefix = NULL;
  9380. const xmlChar *URI = NULL;
  9381. xmlParserNodeInfo node_info;
  9382. int line, tlen = 0;
  9383. xmlNodePtr ret;
  9384. int nsNr = ctxt->nsNr;
  9385. if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
  9386. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  9387. xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
  9388. "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
  9389. xmlParserMaxDepth);
  9390. xmlHaltParser(ctxt);
  9391. return(-1);
  9392. }
  9393. /* Capture start position */
  9394. if (ctxt->record_info) {
  9395. node_info.begin_pos = ctxt->input->consumed +
  9396. (CUR_PTR - ctxt->input->base);
  9397. node_info.begin_line = ctxt->input->line;
  9398. }
  9399. if (ctxt->spaceNr == 0)
  9400. spacePush(ctxt, -1);
  9401. else if (*ctxt->space == -2)
  9402. spacePush(ctxt, -1);
  9403. else
  9404. spacePush(ctxt, *ctxt->space);
  9405. line = ctxt->input->line;
  9406. #ifdef LIBXML_SAX1_ENABLED
  9407. if (ctxt->sax2)
  9408. #endif /* LIBXML_SAX1_ENABLED */
  9409. name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
  9410. #ifdef LIBXML_SAX1_ENABLED
  9411. else
  9412. name = xmlParseStartTag(ctxt);
  9413. #endif /* LIBXML_SAX1_ENABLED */
  9414. if (ctxt->instate == XML_PARSER_EOF)
  9415. return(-1);
  9416. if (name == NULL) {
  9417. spacePop(ctxt);
  9418. return(-1);
  9419. }
  9420. nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
  9421. ret = ctxt->node;
  9422. #ifdef LIBXML_VALID_ENABLED
  9423. /*
  9424. * [ VC: Root Element Type ]
  9425. * The Name in the document type declaration must match the element
  9426. * type of the root element.
  9427. */
  9428. if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
  9429. ctxt->node && (ctxt->node == ctxt->myDoc->children))
  9430. ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
  9431. #endif /* LIBXML_VALID_ENABLED */
  9432. /*
  9433. * Check for an Empty Element.
  9434. */
  9435. if ((RAW == '/') && (NXT(1) == '>')) {
  9436. SKIP(2);
  9437. if (ctxt->sax2) {
  9438. if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
  9439. (!ctxt->disableSAX))
  9440. ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
  9441. #ifdef LIBXML_SAX1_ENABLED
  9442. } else {
  9443. if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
  9444. (!ctxt->disableSAX))
  9445. ctxt->sax->endElement(ctxt->userData, name);
  9446. #endif /* LIBXML_SAX1_ENABLED */
  9447. }
  9448. namePop(ctxt);
  9449. spacePop(ctxt);
  9450. if (nsNr != ctxt->nsNr)
  9451. nsPop(ctxt, ctxt->nsNr - nsNr);
  9452. if ( ret != NULL && ctxt->record_info ) {
  9453. node_info.end_pos = ctxt->input->consumed +
  9454. (CUR_PTR - ctxt->input->base);
  9455. node_info.end_line = ctxt->input->line;
  9456. node_info.node = ret;
  9457. xmlParserAddNodeInfo(ctxt, &node_info);
  9458. }
  9459. return(1);
  9460. }
  9461. if (RAW == '>') {
  9462. NEXT1;
  9463. } else {
  9464. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
  9465. "Couldn't find end of Start Tag %s line %d\n",
  9466. name, line, NULL);
  9467. /*
  9468. * end of parsing of this node.
  9469. */
  9470. nodePop(ctxt);
  9471. namePop(ctxt);
  9472. spacePop(ctxt);
  9473. if (nsNr != ctxt->nsNr)
  9474. nsPop(ctxt, ctxt->nsNr - nsNr);
  9475. /*
  9476. * Capture end position and add node
  9477. */
  9478. if ( ret != NULL && ctxt->record_info ) {
  9479. node_info.end_pos = ctxt->input->consumed +
  9480. (CUR_PTR - ctxt->input->base);
  9481. node_info.end_line = ctxt->input->line;
  9482. node_info.node = ret;
  9483. xmlParserAddNodeInfo(ctxt, &node_info);
  9484. }
  9485. return(-1);
  9486. }
  9487. return(0);
  9488. }
  9489. /**
  9490. * xmlParseElementEnd:
  9491. * @ctxt: an XML parser context
  9492. *
  9493. * Parse the end of an XML element.
  9494. */
  9495. static void
  9496. xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
  9497. xmlParserNodeInfo node_info;
  9498. xmlNodePtr ret = ctxt->node;
  9499. if (ctxt->nameNr <= 0)
  9500. return;
  9501. /*
  9502. * parse the end of tag: '</' should be here.
  9503. */
  9504. if (ctxt->sax2) {
  9505. xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
  9506. namePop(ctxt);
  9507. }
  9508. #ifdef LIBXML_SAX1_ENABLED
  9509. else
  9510. xmlParseEndTag1(ctxt, 0);
  9511. #endif /* LIBXML_SAX1_ENABLED */
  9512. /*
  9513. * Capture end position and add node
  9514. */
  9515. if ( ret != NULL && ctxt->record_info ) {
  9516. node_info.end_pos = ctxt->input->consumed +
  9517. (CUR_PTR - ctxt->input->base);
  9518. node_info.end_line = ctxt->input->line;
  9519. node_info.node = ret;
  9520. xmlParserAddNodeInfo(ctxt, &node_info);
  9521. }
  9522. }
  9523. /**
  9524. * xmlParseVersionNum:
  9525. * @ctxt: an XML parser context
  9526. *
  9527. * parse the XML version value.
  9528. *
  9529. * [26] VersionNum ::= '1.' [0-9]+
  9530. *
  9531. * In practice allow [0-9].[0-9]+ at that level
  9532. *
  9533. * Returns the string giving the XML version number, or NULL
  9534. */
  9535. xmlChar *
  9536. xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
  9537. xmlChar *buf = NULL;
  9538. int len = 0;
  9539. int size = 10;
  9540. xmlChar cur;
  9541. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  9542. if (buf == NULL) {
  9543. xmlErrMemory(ctxt, NULL);
  9544. return(NULL);
  9545. }
  9546. cur = CUR;
  9547. if (!((cur >= '0') && (cur <= '9'))) {
  9548. xmlFree(buf);
  9549. return(NULL);
  9550. }
  9551. buf[len++] = cur;
  9552. NEXT;
  9553. cur=CUR;
  9554. if (cur != '.') {
  9555. xmlFree(buf);
  9556. return(NULL);
  9557. }
  9558. buf[len++] = cur;
  9559. NEXT;
  9560. cur=CUR;
  9561. while ((cur >= '0') && (cur <= '9')) {
  9562. if (len + 1 >= size) {
  9563. xmlChar *tmp;
  9564. size *= 2;
  9565. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  9566. if (tmp == NULL) {
  9567. xmlFree(buf);
  9568. xmlErrMemory(ctxt, NULL);
  9569. return(NULL);
  9570. }
  9571. buf = tmp;
  9572. }
  9573. buf[len++] = cur;
  9574. NEXT;
  9575. cur=CUR;
  9576. }
  9577. buf[len] = 0;
  9578. return(buf);
  9579. }
  9580. /**
  9581. * xmlParseVersionInfo:
  9582. * @ctxt: an XML parser context
  9583. *
  9584. * parse the XML version.
  9585. *
  9586. * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  9587. *
  9588. * [25] Eq ::= S? '=' S?
  9589. *
  9590. * Returns the version string, e.g. "1.0"
  9591. */
  9592. xmlChar *
  9593. xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
  9594. xmlChar *version = NULL;
  9595. if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
  9596. SKIP(7);
  9597. SKIP_BLANKS;
  9598. if (RAW != '=') {
  9599. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9600. return(NULL);
  9601. }
  9602. NEXT;
  9603. SKIP_BLANKS;
  9604. if (RAW == '"') {
  9605. NEXT;
  9606. version = xmlParseVersionNum(ctxt);
  9607. if (RAW != '"') {
  9608. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9609. } else
  9610. NEXT;
  9611. } else if (RAW == '\''){
  9612. NEXT;
  9613. version = xmlParseVersionNum(ctxt);
  9614. if (RAW != '\'') {
  9615. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9616. } else
  9617. NEXT;
  9618. } else {
  9619. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9620. }
  9621. }
  9622. return(version);
  9623. }
  9624. /**
  9625. * xmlParseEncName:
  9626. * @ctxt: an XML parser context
  9627. *
  9628. * parse the XML encoding name
  9629. *
  9630. * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  9631. *
  9632. * Returns the encoding name value or NULL
  9633. */
  9634. xmlChar *
  9635. xmlParseEncName(xmlParserCtxtPtr ctxt) {
  9636. xmlChar *buf = NULL;
  9637. int len = 0;
  9638. int size = 10;
  9639. xmlChar cur;
  9640. cur = CUR;
  9641. if (((cur >= 'a') && (cur <= 'z')) ||
  9642. ((cur >= 'A') && (cur <= 'Z'))) {
  9643. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  9644. if (buf == NULL) {
  9645. xmlErrMemory(ctxt, NULL);
  9646. return(NULL);
  9647. }
  9648. buf[len++] = cur;
  9649. NEXT;
  9650. cur = CUR;
  9651. while (((cur >= 'a') && (cur <= 'z')) ||
  9652. ((cur >= 'A') && (cur <= 'Z')) ||
  9653. ((cur >= '0') && (cur <= '9')) ||
  9654. (cur == '.') || (cur == '_') ||
  9655. (cur == '-')) {
  9656. if (len + 1 >= size) {
  9657. xmlChar *tmp;
  9658. size *= 2;
  9659. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  9660. if (tmp == NULL) {
  9661. xmlErrMemory(ctxt, NULL);
  9662. xmlFree(buf);
  9663. return(NULL);
  9664. }
  9665. buf = tmp;
  9666. }
  9667. buf[len++] = cur;
  9668. NEXT;
  9669. cur = CUR;
  9670. if (cur == 0) {
  9671. SHRINK;
  9672. GROW;
  9673. cur = CUR;
  9674. }
  9675. }
  9676. buf[len] = 0;
  9677. } else {
  9678. xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
  9679. }
  9680. return(buf);
  9681. }
  9682. /**
  9683. * xmlParseEncodingDecl:
  9684. * @ctxt: an XML parser context
  9685. *
  9686. * parse the XML encoding declaration
  9687. *
  9688. * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
  9689. *
  9690. * this setups the conversion filters.
  9691. *
  9692. * Returns the encoding value or NULL
  9693. */
  9694. const xmlChar *
  9695. xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
  9696. xmlChar *encoding = NULL;
  9697. SKIP_BLANKS;
  9698. if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
  9699. SKIP(8);
  9700. SKIP_BLANKS;
  9701. if (RAW != '=') {
  9702. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9703. return(NULL);
  9704. }
  9705. NEXT;
  9706. SKIP_BLANKS;
  9707. if (RAW == '"') {
  9708. NEXT;
  9709. encoding = xmlParseEncName(ctxt);
  9710. if (RAW != '"') {
  9711. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9712. xmlFree((xmlChar *) encoding);
  9713. return(NULL);
  9714. } else
  9715. NEXT;
  9716. } else if (RAW == '\''){
  9717. NEXT;
  9718. encoding = xmlParseEncName(ctxt);
  9719. if (RAW != '\'') {
  9720. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9721. xmlFree((xmlChar *) encoding);
  9722. return(NULL);
  9723. } else
  9724. NEXT;
  9725. } else {
  9726. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9727. }
  9728. /*
  9729. * Non standard parsing, allowing the user to ignore encoding
  9730. */
  9731. if (ctxt->options & XML_PARSE_IGNORE_ENC) {
  9732. xmlFree((xmlChar *) encoding);
  9733. return(NULL);
  9734. }
  9735. /*
  9736. * UTF-16 encoding switch has already taken place at this stage,
  9737. * more over the little-endian/big-endian selection is already done
  9738. */
  9739. if ((encoding != NULL) &&
  9740. ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
  9741. (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
  9742. /*
  9743. * If no encoding was passed to the parser, that we are
  9744. * using UTF-16 and no decoder is present i.e. the
  9745. * document is apparently UTF-8 compatible, then raise an
  9746. * encoding mismatch fatal error
  9747. */
  9748. if ((ctxt->encoding == NULL) &&
  9749. (ctxt->input->buf != NULL) &&
  9750. (ctxt->input->buf->encoder == NULL)) {
  9751. xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
  9752. "Document labelled UTF-16 but has UTF-8 content\n");
  9753. }
  9754. if (ctxt->encoding != NULL)
  9755. xmlFree((xmlChar *) ctxt->encoding);
  9756. ctxt->encoding = encoding;
  9757. }
  9758. /*
  9759. * UTF-8 encoding is handled natively
  9760. */
  9761. else if ((encoding != NULL) &&
  9762. ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
  9763. (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
  9764. if (ctxt->encoding != NULL)
  9765. xmlFree((xmlChar *) ctxt->encoding);
  9766. ctxt->encoding = encoding;
  9767. }
  9768. else if (encoding != NULL) {
  9769. xmlCharEncodingHandlerPtr handler;
  9770. if (ctxt->input->encoding != NULL)
  9771. xmlFree((xmlChar *) ctxt->input->encoding);
  9772. ctxt->input->encoding = encoding;
  9773. handler = xmlFindCharEncodingHandler((const char *) encoding);
  9774. if (handler != NULL) {
  9775. if (xmlSwitchToEncoding(ctxt, handler) < 0) {
  9776. /* failed to convert */
  9777. ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
  9778. return(NULL);
  9779. }
  9780. } else {
  9781. xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
  9782. "Unsupported encoding %s\n", encoding);
  9783. return(NULL);
  9784. }
  9785. }
  9786. }
  9787. return(encoding);
  9788. }
  9789. /**
  9790. * xmlParseSDDecl:
  9791. * @ctxt: an XML parser context
  9792. *
  9793. * parse the XML standalone declaration
  9794. *
  9795. * [32] SDDecl ::= S 'standalone' Eq
  9796. * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
  9797. *
  9798. * [ VC: Standalone Document Declaration ]
  9799. * TODO The standalone document declaration must have the value "no"
  9800. * if any external markup declarations contain declarations of:
  9801. * - attributes with default values, if elements to which these
  9802. * attributes apply appear in the document without specifications
  9803. * of values for these attributes, or
  9804. * - entities (other than amp, lt, gt, apos, quot), if references
  9805. * to those entities appear in the document, or
  9806. * - attributes with values subject to normalization, where the
  9807. * attribute appears in the document with a value which will change
  9808. * as a result of normalization, or
  9809. * - element types with element content, if white space occurs directly
  9810. * within any instance of those types.
  9811. *
  9812. * Returns:
  9813. * 1 if standalone="yes"
  9814. * 0 if standalone="no"
  9815. * -2 if standalone attribute is missing or invalid
  9816. * (A standalone value of -2 means that the XML declaration was found,
  9817. * but no value was specified for the standalone attribute).
  9818. */
  9819. int
  9820. xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
  9821. int standalone = -2;
  9822. SKIP_BLANKS;
  9823. if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
  9824. SKIP(10);
  9825. SKIP_BLANKS;
  9826. if (RAW != '=') {
  9827. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9828. return(standalone);
  9829. }
  9830. NEXT;
  9831. SKIP_BLANKS;
  9832. if (RAW == '\''){
  9833. NEXT;
  9834. if ((RAW == 'n') && (NXT(1) == 'o')) {
  9835. standalone = 0;
  9836. SKIP(2);
  9837. } else if ((RAW == 'y') && (NXT(1) == 'e') &&
  9838. (NXT(2) == 's')) {
  9839. standalone = 1;
  9840. SKIP(3);
  9841. } else {
  9842. xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
  9843. }
  9844. if (RAW != '\'') {
  9845. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9846. } else
  9847. NEXT;
  9848. } else if (RAW == '"'){
  9849. NEXT;
  9850. if ((RAW == 'n') && (NXT(1) == 'o')) {
  9851. standalone = 0;
  9852. SKIP(2);
  9853. } else if ((RAW == 'y') && (NXT(1) == 'e') &&
  9854. (NXT(2) == 's')) {
  9855. standalone = 1;
  9856. SKIP(3);
  9857. } else {
  9858. xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
  9859. }
  9860. if (RAW != '"') {
  9861. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9862. } else
  9863. NEXT;
  9864. } else {
  9865. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9866. }
  9867. }
  9868. return(standalone);
  9869. }
  9870. /**
  9871. * xmlParseXMLDecl:
  9872. * @ctxt: an XML parser context
  9873. *
  9874. * parse an XML declaration header
  9875. *
  9876. * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  9877. */
  9878. void
  9879. xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
  9880. xmlChar *version;
  9881. /*
  9882. * This value for standalone indicates that the document has an
  9883. * XML declaration but it does not have a standalone attribute.
  9884. * It will be overwritten later if a standalone attribute is found.
  9885. */
  9886. ctxt->input->standalone = -2;
  9887. /*
  9888. * We know that '<?xml' is here.
  9889. */
  9890. SKIP(5);
  9891. if (!IS_BLANK_CH(RAW)) {
  9892. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  9893. "Blank needed after '<?xml'\n");
  9894. }
  9895. SKIP_BLANKS;
  9896. /*
  9897. * We must have the VersionInfo here.
  9898. */
  9899. version = xmlParseVersionInfo(ctxt);
  9900. if (version == NULL) {
  9901. xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
  9902. } else {
  9903. if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
  9904. /*
  9905. * Changed here for XML-1.0 5th edition
  9906. */
  9907. if (ctxt->options & XML_PARSE_OLD10) {
  9908. xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
  9909. "Unsupported version '%s'\n",
  9910. version);
  9911. } else {
  9912. if ((version[0] == '1') && ((version[1] == '.'))) {
  9913. xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
  9914. "Unsupported version '%s'\n",
  9915. version, NULL);
  9916. } else {
  9917. xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
  9918. "Unsupported version '%s'\n",
  9919. version);
  9920. }
  9921. }
  9922. }
  9923. if (ctxt->version != NULL)
  9924. xmlFree((void *) ctxt->version);
  9925. ctxt->version = version;
  9926. }
  9927. /*
  9928. * We may have the encoding declaration
  9929. */
  9930. if (!IS_BLANK_CH(RAW)) {
  9931. if ((RAW == '?') && (NXT(1) == '>')) {
  9932. SKIP(2);
  9933. return;
  9934. }
  9935. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
  9936. }
  9937. xmlParseEncodingDecl(ctxt);
  9938. if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
  9939. (ctxt->instate == XML_PARSER_EOF)) {
  9940. /*
  9941. * The XML REC instructs us to stop parsing right here
  9942. */
  9943. return;
  9944. }
  9945. /*
  9946. * We may have the standalone status.
  9947. */
  9948. if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
  9949. if ((RAW == '?') && (NXT(1) == '>')) {
  9950. SKIP(2);
  9951. return;
  9952. }
  9953. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
  9954. }
  9955. /*
  9956. * We can grow the input buffer freely at that point
  9957. */
  9958. GROW;
  9959. SKIP_BLANKS;
  9960. ctxt->input->standalone = xmlParseSDDecl(ctxt);
  9961. SKIP_BLANKS;
  9962. if ((RAW == '?') && (NXT(1) == '>')) {
  9963. SKIP(2);
  9964. } else if (RAW == '>') {
  9965. /* Deprecated old WD ... */
  9966. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  9967. NEXT;
  9968. } else {
  9969. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  9970. MOVETO_ENDTAG(CUR_PTR);
  9971. NEXT;
  9972. }
  9973. }
  9974. /**
  9975. * xmlParseMisc:
  9976. * @ctxt: an XML parser context
  9977. *
  9978. * parse an XML Misc* optional field.
  9979. *
  9980. * [27] Misc ::= Comment | PI | S
  9981. */
  9982. void
  9983. xmlParseMisc(xmlParserCtxtPtr ctxt) {
  9984. while ((ctxt->instate != XML_PARSER_EOF) &&
  9985. (((RAW == '<') && (NXT(1) == '?')) ||
  9986. (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
  9987. IS_BLANK_CH(CUR))) {
  9988. if ((RAW == '<') && (NXT(1) == '?')) {
  9989. xmlParsePI(ctxt);
  9990. } else if (IS_BLANK_CH(CUR)) {
  9991. NEXT;
  9992. } else
  9993. xmlParseComment(ctxt);
  9994. }
  9995. }
  9996. /**
  9997. * xmlParseDocument:
  9998. * @ctxt: an XML parser context
  9999. *
  10000. * parse an XML document (and build a tree if using the standard SAX
  10001. * interface).
  10002. *
  10003. * [1] document ::= prolog element Misc*
  10004. *
  10005. * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
  10006. *
  10007. * Returns 0, -1 in case of error. the parser context is augmented
  10008. * as a result of the parsing.
  10009. */
  10010. int
  10011. xmlParseDocument(xmlParserCtxtPtr ctxt) {
  10012. xmlChar start[4];
  10013. xmlCharEncoding enc;
  10014. xmlInitParser();
  10015. if ((ctxt == NULL) || (ctxt->input == NULL))
  10016. return(-1);
  10017. GROW;
  10018. /*
  10019. * SAX: detecting the level.
  10020. */
  10021. xmlDetectSAX2(ctxt);
  10022. /*
  10023. * SAX: beginning of the document processing.
  10024. */
  10025. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10026. ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
  10027. if (ctxt->instate == XML_PARSER_EOF)
  10028. return(-1);
  10029. if ((ctxt->encoding == NULL) &&
  10030. ((ctxt->input->end - ctxt->input->cur) >= 4)) {
  10031. /*
  10032. * Get the 4 first bytes and decode the charset
  10033. * if enc != XML_CHAR_ENCODING_NONE
  10034. * plug some encoding conversion routines.
  10035. */
  10036. start[0] = RAW;
  10037. start[1] = NXT(1);
  10038. start[2] = NXT(2);
  10039. start[3] = NXT(3);
  10040. enc = xmlDetectCharEncoding(&start[0], 4);
  10041. if (enc != XML_CHAR_ENCODING_NONE) {
  10042. xmlSwitchEncoding(ctxt, enc);
  10043. }
  10044. }
  10045. if (CUR == 0) {
  10046. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10047. return(-1);
  10048. }
  10049. /*
  10050. * Check for the XMLDecl in the Prolog.
  10051. * do not GROW here to avoid the detected encoder to decode more
  10052. * than just the first line, unless the amount of data is really
  10053. * too small to hold "<?xml version="1.0" encoding="foo"
  10054. */
  10055. if ((ctxt->input->end - ctxt->input->cur) < 35) {
  10056. GROW;
  10057. }
  10058. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  10059. /*
  10060. * Note that we will switch encoding on the fly.
  10061. */
  10062. xmlParseXMLDecl(ctxt);
  10063. if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
  10064. (ctxt->instate == XML_PARSER_EOF)) {
  10065. /*
  10066. * The XML REC instructs us to stop parsing right here
  10067. */
  10068. return(-1);
  10069. }
  10070. ctxt->standalone = ctxt->input->standalone;
  10071. SKIP_BLANKS;
  10072. } else {
  10073. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10074. }
  10075. if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
  10076. ctxt->sax->startDocument(ctxt->userData);
  10077. if (ctxt->instate == XML_PARSER_EOF)
  10078. return(-1);
  10079. if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
  10080. (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
  10081. ctxt->myDoc->compression = ctxt->input->buf->compressed;
  10082. }
  10083. /*
  10084. * The Misc part of the Prolog
  10085. */
  10086. GROW;
  10087. xmlParseMisc(ctxt);
  10088. /*
  10089. * Then possibly doc type declaration(s) and more Misc
  10090. * (doctypedecl Misc*)?
  10091. */
  10092. GROW;
  10093. if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
  10094. ctxt->inSubset = 1;
  10095. xmlParseDocTypeDecl(ctxt);
  10096. if (RAW == '[') {
  10097. ctxt->instate = XML_PARSER_DTD;
  10098. xmlParseInternalSubset(ctxt);
  10099. if (ctxt->instate == XML_PARSER_EOF)
  10100. return(-1);
  10101. }
  10102. /*
  10103. * Create and update the external subset.
  10104. */
  10105. ctxt->inSubset = 2;
  10106. if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
  10107. (!ctxt->disableSAX))
  10108. ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
  10109. ctxt->extSubSystem, ctxt->extSubURI);
  10110. if (ctxt->instate == XML_PARSER_EOF)
  10111. return(-1);
  10112. ctxt->inSubset = 0;
  10113. xmlCleanSpecialAttr(ctxt);
  10114. ctxt->instate = XML_PARSER_PROLOG;
  10115. xmlParseMisc(ctxt);
  10116. }
  10117. /*
  10118. * Time to start parsing the tree itself
  10119. */
  10120. GROW;
  10121. if (RAW != '<') {
  10122. xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
  10123. "Start tag expected, '<' not found\n");
  10124. } else {
  10125. ctxt->instate = XML_PARSER_CONTENT;
  10126. xmlParseElement(ctxt);
  10127. ctxt->instate = XML_PARSER_EPILOG;
  10128. /*
  10129. * The Misc part at the end
  10130. */
  10131. xmlParseMisc(ctxt);
  10132. if (RAW != 0) {
  10133. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  10134. }
  10135. ctxt->instate = XML_PARSER_EOF;
  10136. }
  10137. /*
  10138. * SAX: end of the document processing.
  10139. */
  10140. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10141. ctxt->sax->endDocument(ctxt->userData);
  10142. /*
  10143. * Remove locally kept entity definitions if the tree was not built
  10144. */
  10145. if ((ctxt->myDoc != NULL) &&
  10146. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
  10147. xmlFreeDoc(ctxt->myDoc);
  10148. ctxt->myDoc = NULL;
  10149. }
  10150. if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
  10151. ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
  10152. if (ctxt->valid)
  10153. ctxt->myDoc->properties |= XML_DOC_DTDVALID;
  10154. if (ctxt->nsWellFormed)
  10155. ctxt->myDoc->properties |= XML_DOC_NSVALID;
  10156. if (ctxt->options & XML_PARSE_OLD10)
  10157. ctxt->myDoc->properties |= XML_DOC_OLD10;
  10158. }
  10159. if (! ctxt->wellFormed) {
  10160. ctxt->valid = 0;
  10161. return(-1);
  10162. }
  10163. return(0);
  10164. }
  10165. /**
  10166. * xmlParseExtParsedEnt:
  10167. * @ctxt: an XML parser context
  10168. *
  10169. * parse a general parsed entity
  10170. * An external general parsed entity is well-formed if it matches the
  10171. * production labeled extParsedEnt.
  10172. *
  10173. * [78] extParsedEnt ::= TextDecl? content
  10174. *
  10175. * Returns 0, -1 in case of error. the parser context is augmented
  10176. * as a result of the parsing.
  10177. */
  10178. int
  10179. xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
  10180. xmlChar start[4];
  10181. xmlCharEncoding enc;
  10182. if ((ctxt == NULL) || (ctxt->input == NULL))
  10183. return(-1);
  10184. xmlDefaultSAXHandlerInit();
  10185. xmlDetectSAX2(ctxt);
  10186. GROW;
  10187. /*
  10188. * SAX: beginning of the document processing.
  10189. */
  10190. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10191. ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
  10192. /*
  10193. * Get the 4 first bytes and decode the charset
  10194. * if enc != XML_CHAR_ENCODING_NONE
  10195. * plug some encoding conversion routines.
  10196. */
  10197. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  10198. start[0] = RAW;
  10199. start[1] = NXT(1);
  10200. start[2] = NXT(2);
  10201. start[3] = NXT(3);
  10202. enc = xmlDetectCharEncoding(start, 4);
  10203. if (enc != XML_CHAR_ENCODING_NONE) {
  10204. xmlSwitchEncoding(ctxt, enc);
  10205. }
  10206. }
  10207. if (CUR == 0) {
  10208. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10209. }
  10210. /*
  10211. * Check for the XMLDecl in the Prolog.
  10212. */
  10213. GROW;
  10214. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  10215. /*
  10216. * Note that we will switch encoding on the fly.
  10217. */
  10218. xmlParseXMLDecl(ctxt);
  10219. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  10220. /*
  10221. * The XML REC instructs us to stop parsing right here
  10222. */
  10223. return(-1);
  10224. }
  10225. SKIP_BLANKS;
  10226. } else {
  10227. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10228. }
  10229. if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
  10230. ctxt->sax->startDocument(ctxt->userData);
  10231. if (ctxt->instate == XML_PARSER_EOF)
  10232. return(-1);
  10233. /*
  10234. * Doing validity checking on chunk doesn't make sense
  10235. */
  10236. ctxt->instate = XML_PARSER_CONTENT;
  10237. ctxt->validate = 0;
  10238. ctxt->loadsubset = 0;
  10239. ctxt->depth = 0;
  10240. xmlParseContent(ctxt);
  10241. if (ctxt->instate == XML_PARSER_EOF)
  10242. return(-1);
  10243. if ((RAW == '<') && (NXT(1) == '/')) {
  10244. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  10245. } else if (RAW != 0) {
  10246. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  10247. }
  10248. /*
  10249. * SAX: end of the document processing.
  10250. */
  10251. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10252. ctxt->sax->endDocument(ctxt->userData);
  10253. if (! ctxt->wellFormed) return(-1);
  10254. return(0);
  10255. }
  10256. #ifdef LIBXML_PUSH_ENABLED
  10257. /************************************************************************
  10258. * *
  10259. * Progressive parsing interfaces *
  10260. * *
  10261. ************************************************************************/
  10262. /**
  10263. * xmlParseLookupSequence:
  10264. * @ctxt: an XML parser context
  10265. * @first: the first char to lookup
  10266. * @next: the next char to lookup or zero
  10267. * @third: the next char to lookup or zero
  10268. *
  10269. * Try to find if a sequence (first, next, third) or just (first next) or
  10270. * (first) is available in the input stream.
  10271. * This function has a side effect of (possibly) incrementing ctxt->checkIndex
  10272. * to avoid rescanning sequences of bytes, it DOES change the state of the
  10273. * parser, do not use liberally.
  10274. *
  10275. * Returns the index to the current parsing point if the full sequence
  10276. * is available, -1 otherwise.
  10277. */
  10278. static int
  10279. xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
  10280. xmlChar next, xmlChar third) {
  10281. int base, len;
  10282. xmlParserInputPtr in;
  10283. const xmlChar *buf;
  10284. in = ctxt->input;
  10285. if (in == NULL) return(-1);
  10286. base = in->cur - in->base;
  10287. if (base < 0) return(-1);
  10288. if (ctxt->checkIndex > base)
  10289. base = ctxt->checkIndex;
  10290. if (in->buf == NULL) {
  10291. buf = in->base;
  10292. len = in->length;
  10293. } else {
  10294. buf = xmlBufContent(in->buf->buffer);
  10295. len = xmlBufUse(in->buf->buffer);
  10296. }
  10297. /* take into account the sequence length */
  10298. if (third) len -= 2;
  10299. else if (next) len --;
  10300. for (;base < len;base++) {
  10301. if (buf[base] == first) {
  10302. if (third != 0) {
  10303. if ((buf[base + 1] != next) ||
  10304. (buf[base + 2] != third)) continue;
  10305. } else if (next != 0) {
  10306. if (buf[base + 1] != next) continue;
  10307. }
  10308. ctxt->checkIndex = 0;
  10309. #ifdef DEBUG_PUSH
  10310. if (next == 0)
  10311. xmlGenericError(xmlGenericErrorContext,
  10312. "PP: lookup '%c' found at %d\n",
  10313. first, base);
  10314. else if (third == 0)
  10315. xmlGenericError(xmlGenericErrorContext,
  10316. "PP: lookup '%c%c' found at %d\n",
  10317. first, next, base);
  10318. else
  10319. xmlGenericError(xmlGenericErrorContext,
  10320. "PP: lookup '%c%c%c' found at %d\n",
  10321. first, next, third, base);
  10322. #endif
  10323. return(base - (in->cur - in->base));
  10324. }
  10325. }
  10326. ctxt->checkIndex = base;
  10327. #ifdef DEBUG_PUSH
  10328. if (next == 0)
  10329. xmlGenericError(xmlGenericErrorContext,
  10330. "PP: lookup '%c' failed\n", first);
  10331. else if (third == 0)
  10332. xmlGenericError(xmlGenericErrorContext,
  10333. "PP: lookup '%c%c' failed\n", first, next);
  10334. else
  10335. xmlGenericError(xmlGenericErrorContext,
  10336. "PP: lookup '%c%c%c' failed\n", first, next, third);
  10337. #endif
  10338. return(-1);
  10339. }
  10340. /**
  10341. * xmlParseGetLasts:
  10342. * @ctxt: an XML parser context
  10343. * @lastlt: pointer to store the last '<' from the input
  10344. * @lastgt: pointer to store the last '>' from the input
  10345. *
  10346. * Lookup the last < and > in the current chunk
  10347. */
  10348. static void
  10349. xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
  10350. const xmlChar **lastgt) {
  10351. const xmlChar *tmp;
  10352. if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
  10353. xmlGenericError(xmlGenericErrorContext,
  10354. "Internal error: xmlParseGetLasts\n");
  10355. return;
  10356. }
  10357. if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
  10358. tmp = ctxt->input->end;
  10359. tmp--;
  10360. while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
  10361. if (tmp < ctxt->input->base) {
  10362. *lastlt = NULL;
  10363. *lastgt = NULL;
  10364. } else {
  10365. *lastlt = tmp;
  10366. tmp++;
  10367. while ((tmp < ctxt->input->end) && (*tmp != '>')) {
  10368. if (*tmp == '\'') {
  10369. tmp++;
  10370. while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
  10371. if (tmp < ctxt->input->end) tmp++;
  10372. } else if (*tmp == '"') {
  10373. tmp++;
  10374. while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
  10375. if (tmp < ctxt->input->end) tmp++;
  10376. } else
  10377. tmp++;
  10378. }
  10379. if (tmp < ctxt->input->end)
  10380. *lastgt = tmp;
  10381. else {
  10382. tmp = *lastlt;
  10383. tmp--;
  10384. while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
  10385. if (tmp >= ctxt->input->base)
  10386. *lastgt = tmp;
  10387. else
  10388. *lastgt = NULL;
  10389. }
  10390. }
  10391. } else {
  10392. *lastlt = NULL;
  10393. *lastgt = NULL;
  10394. }
  10395. }
  10396. /**
  10397. * xmlCheckCdataPush:
  10398. * @cur: pointer to the block of characters
  10399. * @len: length of the block in bytes
  10400. * @complete: 1 if complete CDATA block is passed in, 0 if partial block
  10401. *
  10402. * Check that the block of characters is okay as SCdata content [20]
  10403. *
  10404. * Returns the number of bytes to pass if okay, a negative index where an
  10405. * UTF-8 error occurred otherwise
  10406. */
  10407. static int
  10408. xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
  10409. int ix;
  10410. unsigned char c;
  10411. int codepoint;
  10412. if ((utf == NULL) || (len <= 0))
  10413. return(0);
  10414. for (ix = 0; ix < len;) { /* string is 0-terminated */
  10415. c = utf[ix];
  10416. if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
  10417. if (c >= 0x20)
  10418. ix++;
  10419. else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
  10420. ix++;
  10421. else
  10422. return(-ix);
  10423. } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
  10424. if (ix + 2 > len) return(complete ? -ix : ix);
  10425. if ((utf[ix+1] & 0xc0 ) != 0x80)
  10426. return(-ix);
  10427. codepoint = (utf[ix] & 0x1f) << 6;
  10428. codepoint |= utf[ix+1] & 0x3f;
  10429. if (!xmlIsCharQ(codepoint))
  10430. return(-ix);
  10431. ix += 2;
  10432. } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
  10433. if (ix + 3 > len) return(complete ? -ix : ix);
  10434. if (((utf[ix+1] & 0xc0) != 0x80) ||
  10435. ((utf[ix+2] & 0xc0) != 0x80))
  10436. return(-ix);
  10437. codepoint = (utf[ix] & 0xf) << 12;
  10438. codepoint |= (utf[ix+1] & 0x3f) << 6;
  10439. codepoint |= utf[ix+2] & 0x3f;
  10440. if (!xmlIsCharQ(codepoint))
  10441. return(-ix);
  10442. ix += 3;
  10443. } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
  10444. if (ix + 4 > len) return(complete ? -ix : ix);
  10445. if (((utf[ix+1] & 0xc0) != 0x80) ||
  10446. ((utf[ix+2] & 0xc0) != 0x80) ||
  10447. ((utf[ix+3] & 0xc0) != 0x80))
  10448. return(-ix);
  10449. codepoint = (utf[ix] & 0x7) << 18;
  10450. codepoint |= (utf[ix+1] & 0x3f) << 12;
  10451. codepoint |= (utf[ix+2] & 0x3f) << 6;
  10452. codepoint |= utf[ix+3] & 0x3f;
  10453. if (!xmlIsCharQ(codepoint))
  10454. return(-ix);
  10455. ix += 4;
  10456. } else /* unknown encoding */
  10457. return(-ix);
  10458. }
  10459. return(ix);
  10460. }
  10461. /**
  10462. * xmlParseTryOrFinish:
  10463. * @ctxt: an XML parser context
  10464. * @terminate: last chunk indicator
  10465. *
  10466. * Try to progress on parsing
  10467. *
  10468. * Returns zero if no parsing was possible
  10469. */
  10470. static int
  10471. xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
  10472. int ret = 0;
  10473. int avail, tlen;
  10474. xmlChar cur, next;
  10475. const xmlChar *lastlt, *lastgt;
  10476. if (ctxt->input == NULL)
  10477. return(0);
  10478. #ifdef DEBUG_PUSH
  10479. switch (ctxt->instate) {
  10480. case XML_PARSER_EOF:
  10481. xmlGenericError(xmlGenericErrorContext,
  10482. "PP: try EOF\n"); break;
  10483. case XML_PARSER_START:
  10484. xmlGenericError(xmlGenericErrorContext,
  10485. "PP: try START\n"); break;
  10486. case XML_PARSER_MISC:
  10487. xmlGenericError(xmlGenericErrorContext,
  10488. "PP: try MISC\n");break;
  10489. case XML_PARSER_COMMENT:
  10490. xmlGenericError(xmlGenericErrorContext,
  10491. "PP: try COMMENT\n");break;
  10492. case XML_PARSER_PROLOG:
  10493. xmlGenericError(xmlGenericErrorContext,
  10494. "PP: try PROLOG\n");break;
  10495. case XML_PARSER_START_TAG:
  10496. xmlGenericError(xmlGenericErrorContext,
  10497. "PP: try START_TAG\n");break;
  10498. case XML_PARSER_CONTENT:
  10499. xmlGenericError(xmlGenericErrorContext,
  10500. "PP: try CONTENT\n");break;
  10501. case XML_PARSER_CDATA_SECTION:
  10502. xmlGenericError(xmlGenericErrorContext,
  10503. "PP: try CDATA_SECTION\n");break;
  10504. case XML_PARSER_END_TAG:
  10505. xmlGenericError(xmlGenericErrorContext,
  10506. "PP: try END_TAG\n");break;
  10507. case XML_PARSER_ENTITY_DECL:
  10508. xmlGenericError(xmlGenericErrorContext,
  10509. "PP: try ENTITY_DECL\n");break;
  10510. case XML_PARSER_ENTITY_VALUE:
  10511. xmlGenericError(xmlGenericErrorContext,
  10512. "PP: try ENTITY_VALUE\n");break;
  10513. case XML_PARSER_ATTRIBUTE_VALUE:
  10514. xmlGenericError(xmlGenericErrorContext,
  10515. "PP: try ATTRIBUTE_VALUE\n");break;
  10516. case XML_PARSER_DTD:
  10517. xmlGenericError(xmlGenericErrorContext,
  10518. "PP: try DTD\n");break;
  10519. case XML_PARSER_EPILOG:
  10520. xmlGenericError(xmlGenericErrorContext,
  10521. "PP: try EPILOG\n");break;
  10522. case XML_PARSER_PI:
  10523. xmlGenericError(xmlGenericErrorContext,
  10524. "PP: try PI\n");break;
  10525. case XML_PARSER_IGNORE:
  10526. xmlGenericError(xmlGenericErrorContext,
  10527. "PP: try IGNORE\n");break;
  10528. }
  10529. #endif
  10530. if ((ctxt->input != NULL) &&
  10531. (ctxt->input->cur - ctxt->input->base > 4096)) {
  10532. xmlSHRINK(ctxt);
  10533. ctxt->checkIndex = 0;
  10534. }
  10535. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  10536. while (ctxt->instate != XML_PARSER_EOF) {
  10537. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  10538. return(0);
  10539. if (ctxt->input == NULL) break;
  10540. if (ctxt->input->buf == NULL)
  10541. avail = ctxt->input->length -
  10542. (ctxt->input->cur - ctxt->input->base);
  10543. else {
  10544. /*
  10545. * If we are operating on converted input, try to flush
  10546. * remaining chars to avoid them stalling in the non-converted
  10547. * buffer. But do not do this in document start where
  10548. * encoding="..." may not have been read and we work on a
  10549. * guessed encoding.
  10550. */
  10551. if ((ctxt->instate != XML_PARSER_START) &&
  10552. (ctxt->input->buf->raw != NULL) &&
  10553. (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
  10554. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
  10555. ctxt->input);
  10556. size_t current = ctxt->input->cur - ctxt->input->base;
  10557. xmlParserInputBufferPush(ctxt->input->buf, 0, "");
  10558. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
  10559. base, current);
  10560. }
  10561. avail = xmlBufUse(ctxt->input->buf->buffer) -
  10562. (ctxt->input->cur - ctxt->input->base);
  10563. }
  10564. if (avail < 1)
  10565. goto done;
  10566. switch (ctxt->instate) {
  10567. case XML_PARSER_EOF:
  10568. /*
  10569. * Document parsing is done !
  10570. */
  10571. goto done;
  10572. case XML_PARSER_START:
  10573. if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
  10574. xmlChar start[4];
  10575. xmlCharEncoding enc;
  10576. /*
  10577. * Very first chars read from the document flow.
  10578. */
  10579. if (avail < 4)
  10580. goto done;
  10581. /*
  10582. * Get the 4 first bytes and decode the charset
  10583. * if enc != XML_CHAR_ENCODING_NONE
  10584. * plug some encoding conversion routines,
  10585. * else xmlSwitchEncoding will set to (default)
  10586. * UTF8.
  10587. */
  10588. start[0] = RAW;
  10589. start[1] = NXT(1);
  10590. start[2] = NXT(2);
  10591. start[3] = NXT(3);
  10592. enc = xmlDetectCharEncoding(start, 4);
  10593. xmlSwitchEncoding(ctxt, enc);
  10594. break;
  10595. }
  10596. if (avail < 2)
  10597. goto done;
  10598. cur = ctxt->input->cur[0];
  10599. next = ctxt->input->cur[1];
  10600. if (cur == 0) {
  10601. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10602. ctxt->sax->setDocumentLocator(ctxt->userData,
  10603. &xmlDefaultSAXLocator);
  10604. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10605. xmlHaltParser(ctxt);
  10606. #ifdef DEBUG_PUSH
  10607. xmlGenericError(xmlGenericErrorContext,
  10608. "PP: entering EOF\n");
  10609. #endif
  10610. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10611. ctxt->sax->endDocument(ctxt->userData);
  10612. goto done;
  10613. }
  10614. if ((cur == '<') && (next == '?')) {
  10615. /* PI or XML decl */
  10616. if (avail < 5) return(ret);
  10617. if ((!terminate) &&
  10618. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
  10619. return(ret);
  10620. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10621. ctxt->sax->setDocumentLocator(ctxt->userData,
  10622. &xmlDefaultSAXLocator);
  10623. if ((ctxt->input->cur[2] == 'x') &&
  10624. (ctxt->input->cur[3] == 'm') &&
  10625. (ctxt->input->cur[4] == 'l') &&
  10626. (IS_BLANK_CH(ctxt->input->cur[5]))) {
  10627. ret += 5;
  10628. #ifdef DEBUG_PUSH
  10629. xmlGenericError(xmlGenericErrorContext,
  10630. "PP: Parsing XML Decl\n");
  10631. #endif
  10632. xmlParseXMLDecl(ctxt);
  10633. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  10634. /*
  10635. * The XML REC instructs us to stop parsing right
  10636. * here
  10637. */
  10638. xmlHaltParser(ctxt);
  10639. return(0);
  10640. }
  10641. ctxt->standalone = ctxt->input->standalone;
  10642. if ((ctxt->encoding == NULL) &&
  10643. (ctxt->input->encoding != NULL))
  10644. ctxt->encoding = xmlStrdup(ctxt->input->encoding);
  10645. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10646. (!ctxt->disableSAX))
  10647. ctxt->sax->startDocument(ctxt->userData);
  10648. ctxt->instate = XML_PARSER_MISC;
  10649. #ifdef DEBUG_PUSH
  10650. xmlGenericError(xmlGenericErrorContext,
  10651. "PP: entering MISC\n");
  10652. #endif
  10653. } else {
  10654. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10655. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10656. (!ctxt->disableSAX))
  10657. ctxt->sax->startDocument(ctxt->userData);
  10658. ctxt->instate = XML_PARSER_MISC;
  10659. #ifdef DEBUG_PUSH
  10660. xmlGenericError(xmlGenericErrorContext,
  10661. "PP: entering MISC\n");
  10662. #endif
  10663. }
  10664. } else {
  10665. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10666. ctxt->sax->setDocumentLocator(ctxt->userData,
  10667. &xmlDefaultSAXLocator);
  10668. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10669. if (ctxt->version == NULL) {
  10670. xmlErrMemory(ctxt, NULL);
  10671. break;
  10672. }
  10673. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10674. (!ctxt->disableSAX))
  10675. ctxt->sax->startDocument(ctxt->userData);
  10676. ctxt->instate = XML_PARSER_MISC;
  10677. #ifdef DEBUG_PUSH
  10678. xmlGenericError(xmlGenericErrorContext,
  10679. "PP: entering MISC\n");
  10680. #endif
  10681. }
  10682. break;
  10683. case XML_PARSER_START_TAG: {
  10684. const xmlChar *name;
  10685. const xmlChar *prefix = NULL;
  10686. const xmlChar *URI = NULL;
  10687. int line = ctxt->input->line;
  10688. int nsNr = ctxt->nsNr;
  10689. if ((avail < 2) && (ctxt->inputNr == 1))
  10690. goto done;
  10691. cur = ctxt->input->cur[0];
  10692. if (cur != '<') {
  10693. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10694. xmlHaltParser(ctxt);
  10695. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10696. ctxt->sax->endDocument(ctxt->userData);
  10697. goto done;
  10698. }
  10699. if (!terminate) {
  10700. if (ctxt->progressive) {
  10701. /* > can be found unescaped in attribute values */
  10702. if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
  10703. goto done;
  10704. } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
  10705. goto done;
  10706. }
  10707. }
  10708. if (ctxt->spaceNr == 0)
  10709. spacePush(ctxt, -1);
  10710. else if (*ctxt->space == -2)
  10711. spacePush(ctxt, -1);
  10712. else
  10713. spacePush(ctxt, *ctxt->space);
  10714. #ifdef LIBXML_SAX1_ENABLED
  10715. if (ctxt->sax2)
  10716. #endif /* LIBXML_SAX1_ENABLED */
  10717. name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
  10718. #ifdef LIBXML_SAX1_ENABLED
  10719. else
  10720. name = xmlParseStartTag(ctxt);
  10721. #endif /* LIBXML_SAX1_ENABLED */
  10722. if (ctxt->instate == XML_PARSER_EOF)
  10723. goto done;
  10724. if (name == NULL) {
  10725. spacePop(ctxt);
  10726. xmlHaltParser(ctxt);
  10727. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10728. ctxt->sax->endDocument(ctxt->userData);
  10729. goto done;
  10730. }
  10731. #ifdef LIBXML_VALID_ENABLED
  10732. /*
  10733. * [ VC: Root Element Type ]
  10734. * The Name in the document type declaration must match
  10735. * the element type of the root element.
  10736. */
  10737. if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
  10738. ctxt->node && (ctxt->node == ctxt->myDoc->children))
  10739. ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
  10740. #endif /* LIBXML_VALID_ENABLED */
  10741. /*
  10742. * Check for an Empty Element.
  10743. */
  10744. if ((RAW == '/') && (NXT(1) == '>')) {
  10745. SKIP(2);
  10746. if (ctxt->sax2) {
  10747. if ((ctxt->sax != NULL) &&
  10748. (ctxt->sax->endElementNs != NULL) &&
  10749. (!ctxt->disableSAX))
  10750. ctxt->sax->endElementNs(ctxt->userData, name,
  10751. prefix, URI);
  10752. if (ctxt->nsNr - nsNr > 0)
  10753. nsPop(ctxt, ctxt->nsNr - nsNr);
  10754. #ifdef LIBXML_SAX1_ENABLED
  10755. } else {
  10756. if ((ctxt->sax != NULL) &&
  10757. (ctxt->sax->endElement != NULL) &&
  10758. (!ctxt->disableSAX))
  10759. ctxt->sax->endElement(ctxt->userData, name);
  10760. #endif /* LIBXML_SAX1_ENABLED */
  10761. }
  10762. if (ctxt->instate == XML_PARSER_EOF)
  10763. goto done;
  10764. spacePop(ctxt);
  10765. if (ctxt->nameNr == 0) {
  10766. ctxt->instate = XML_PARSER_EPILOG;
  10767. } else {
  10768. ctxt->instate = XML_PARSER_CONTENT;
  10769. }
  10770. ctxt->progressive = 1;
  10771. break;
  10772. }
  10773. if (RAW == '>') {
  10774. NEXT;
  10775. } else {
  10776. xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
  10777. "Couldn't find end of Start Tag %s\n",
  10778. name);
  10779. nodePop(ctxt);
  10780. spacePop(ctxt);
  10781. }
  10782. nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
  10783. ctxt->instate = XML_PARSER_CONTENT;
  10784. ctxt->progressive = 1;
  10785. break;
  10786. }
  10787. case XML_PARSER_CONTENT: {
  10788. const xmlChar *test;
  10789. unsigned int cons;
  10790. if ((avail < 2) && (ctxt->inputNr == 1))
  10791. goto done;
  10792. cur = ctxt->input->cur[0];
  10793. next = ctxt->input->cur[1];
  10794. test = CUR_PTR;
  10795. cons = ctxt->input->consumed;
  10796. if ((cur == '<') && (next == '/')) {
  10797. ctxt->instate = XML_PARSER_END_TAG;
  10798. break;
  10799. } else if ((cur == '<') && (next == '?')) {
  10800. if ((!terminate) &&
  10801. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
  10802. ctxt->progressive = XML_PARSER_PI;
  10803. goto done;
  10804. }
  10805. xmlParsePI(ctxt);
  10806. ctxt->instate = XML_PARSER_CONTENT;
  10807. ctxt->progressive = 1;
  10808. } else if ((cur == '<') && (next != '!')) {
  10809. ctxt->instate = XML_PARSER_START_TAG;
  10810. break;
  10811. } else if ((cur == '<') && (next == '!') &&
  10812. (ctxt->input->cur[2] == '-') &&
  10813. (ctxt->input->cur[3] == '-')) {
  10814. int term;
  10815. if (avail < 4)
  10816. goto done;
  10817. ctxt->input->cur += 4;
  10818. term = xmlParseLookupSequence(ctxt, '-', '-', '>');
  10819. ctxt->input->cur -= 4;
  10820. if ((!terminate) && (term < 0)) {
  10821. ctxt->progressive = XML_PARSER_COMMENT;
  10822. goto done;
  10823. }
  10824. xmlParseComment(ctxt);
  10825. ctxt->instate = XML_PARSER_CONTENT;
  10826. ctxt->progressive = 1;
  10827. } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
  10828. (ctxt->input->cur[2] == '[') &&
  10829. (ctxt->input->cur[3] == 'C') &&
  10830. (ctxt->input->cur[4] == 'D') &&
  10831. (ctxt->input->cur[5] == 'A') &&
  10832. (ctxt->input->cur[6] == 'T') &&
  10833. (ctxt->input->cur[7] == 'A') &&
  10834. (ctxt->input->cur[8] == '[')) {
  10835. SKIP(9);
  10836. ctxt->instate = XML_PARSER_CDATA_SECTION;
  10837. break;
  10838. } else if ((cur == '<') && (next == '!') &&
  10839. (avail < 9)) {
  10840. goto done;
  10841. } else if (cur == '&') {
  10842. if ((!terminate) &&
  10843. (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
  10844. goto done;
  10845. xmlParseReference(ctxt);
  10846. } else {
  10847. /* TODO Avoid the extra copy, handle directly !!! */
  10848. /*
  10849. * Goal of the following test is:
  10850. * - minimize calls to the SAX 'character' callback
  10851. * when they are mergeable
  10852. * - handle an problem for isBlank when we only parse
  10853. * a sequence of blank chars and the next one is
  10854. * not available to check against '<' presence.
  10855. * - tries to homogenize the differences in SAX
  10856. * callbacks between the push and pull versions
  10857. * of the parser.
  10858. */
  10859. if ((ctxt->inputNr == 1) &&
  10860. (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
  10861. if (!terminate) {
  10862. if (ctxt->progressive) {
  10863. if ((lastlt == NULL) ||
  10864. (ctxt->input->cur > lastlt))
  10865. goto done;
  10866. } else if (xmlParseLookupSequence(ctxt,
  10867. '<', 0, 0) < 0) {
  10868. goto done;
  10869. }
  10870. }
  10871. }
  10872. ctxt->checkIndex = 0;
  10873. xmlParseCharData(ctxt, 0);
  10874. }
  10875. if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
  10876. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  10877. "detected an error in element content\n");
  10878. xmlHaltParser(ctxt);
  10879. break;
  10880. }
  10881. break;
  10882. }
  10883. case XML_PARSER_END_TAG:
  10884. if (avail < 2)
  10885. goto done;
  10886. if (!terminate) {
  10887. if (ctxt->progressive) {
  10888. /* > can be found unescaped in attribute values */
  10889. if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
  10890. goto done;
  10891. } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
  10892. goto done;
  10893. }
  10894. }
  10895. if (ctxt->sax2) {
  10896. xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
  10897. nameNsPop(ctxt);
  10898. }
  10899. #ifdef LIBXML_SAX1_ENABLED
  10900. else
  10901. xmlParseEndTag1(ctxt, 0);
  10902. #endif /* LIBXML_SAX1_ENABLED */
  10903. if (ctxt->instate == XML_PARSER_EOF) {
  10904. /* Nothing */
  10905. } else if (ctxt->nameNr == 0) {
  10906. ctxt->instate = XML_PARSER_EPILOG;
  10907. } else {
  10908. ctxt->instate = XML_PARSER_CONTENT;
  10909. }
  10910. break;
  10911. case XML_PARSER_CDATA_SECTION: {
  10912. /*
  10913. * The Push mode need to have the SAX callback for
  10914. * cdataBlock merge back contiguous callbacks.
  10915. */
  10916. int base;
  10917. base = xmlParseLookupSequence(ctxt, ']', ']', '>');
  10918. if (base < 0) {
  10919. if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
  10920. int tmp;
  10921. tmp = xmlCheckCdataPush(ctxt->input->cur,
  10922. XML_PARSER_BIG_BUFFER_SIZE, 0);
  10923. if (tmp < 0) {
  10924. tmp = -tmp;
  10925. ctxt->input->cur += tmp;
  10926. goto encoding_error;
  10927. }
  10928. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  10929. if (ctxt->sax->cdataBlock != NULL)
  10930. ctxt->sax->cdataBlock(ctxt->userData,
  10931. ctxt->input->cur, tmp);
  10932. else if (ctxt->sax->characters != NULL)
  10933. ctxt->sax->characters(ctxt->userData,
  10934. ctxt->input->cur, tmp);
  10935. }
  10936. if (ctxt->instate == XML_PARSER_EOF)
  10937. goto done;
  10938. SKIPL(tmp);
  10939. ctxt->checkIndex = 0;
  10940. }
  10941. goto done;
  10942. } else {
  10943. int tmp;
  10944. tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
  10945. if ((tmp < 0) || (tmp != base)) {
  10946. tmp = -tmp;
  10947. ctxt->input->cur += tmp;
  10948. goto encoding_error;
  10949. }
  10950. if ((ctxt->sax != NULL) && (base == 0) &&
  10951. (ctxt->sax->cdataBlock != NULL) &&
  10952. (!ctxt->disableSAX)) {
  10953. /*
  10954. * Special case to provide identical behaviour
  10955. * between pull and push parsers on enpty CDATA
  10956. * sections
  10957. */
  10958. if ((ctxt->input->cur - ctxt->input->base >= 9) &&
  10959. (!strncmp((const char *)&ctxt->input->cur[-9],
  10960. "<![CDATA[", 9)))
  10961. ctxt->sax->cdataBlock(ctxt->userData,
  10962. BAD_CAST "", 0);
  10963. } else if ((ctxt->sax != NULL) && (base > 0) &&
  10964. (!ctxt->disableSAX)) {
  10965. if (ctxt->sax->cdataBlock != NULL)
  10966. ctxt->sax->cdataBlock(ctxt->userData,
  10967. ctxt->input->cur, base);
  10968. else if (ctxt->sax->characters != NULL)
  10969. ctxt->sax->characters(ctxt->userData,
  10970. ctxt->input->cur, base);
  10971. }
  10972. if (ctxt->instate == XML_PARSER_EOF)
  10973. goto done;
  10974. SKIPL(base + 3);
  10975. ctxt->checkIndex = 0;
  10976. ctxt->instate = XML_PARSER_CONTENT;
  10977. #ifdef DEBUG_PUSH
  10978. xmlGenericError(xmlGenericErrorContext,
  10979. "PP: entering CONTENT\n");
  10980. #endif
  10981. }
  10982. break;
  10983. }
  10984. case XML_PARSER_MISC:
  10985. SKIP_BLANKS;
  10986. if (ctxt->input->buf == NULL)
  10987. avail = ctxt->input->length -
  10988. (ctxt->input->cur - ctxt->input->base);
  10989. else
  10990. avail = xmlBufUse(ctxt->input->buf->buffer) -
  10991. (ctxt->input->cur - ctxt->input->base);
  10992. if (avail < 2)
  10993. goto done;
  10994. cur = ctxt->input->cur[0];
  10995. next = ctxt->input->cur[1];
  10996. if ((cur == '<') && (next == '?')) {
  10997. if ((!terminate) &&
  10998. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
  10999. ctxt->progressive = XML_PARSER_PI;
  11000. goto done;
  11001. }
  11002. #ifdef DEBUG_PUSH
  11003. xmlGenericError(xmlGenericErrorContext,
  11004. "PP: Parsing PI\n");
  11005. #endif
  11006. xmlParsePI(ctxt);
  11007. if (ctxt->instate == XML_PARSER_EOF)
  11008. goto done;
  11009. ctxt->instate = XML_PARSER_MISC;
  11010. ctxt->progressive = 1;
  11011. ctxt->checkIndex = 0;
  11012. } else if ((cur == '<') && (next == '!') &&
  11013. (ctxt->input->cur[2] == '-') &&
  11014. (ctxt->input->cur[3] == '-')) {
  11015. if ((!terminate) &&
  11016. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
  11017. ctxt->progressive = XML_PARSER_COMMENT;
  11018. goto done;
  11019. }
  11020. #ifdef DEBUG_PUSH
  11021. xmlGenericError(xmlGenericErrorContext,
  11022. "PP: Parsing Comment\n");
  11023. #endif
  11024. xmlParseComment(ctxt);
  11025. if (ctxt->instate == XML_PARSER_EOF)
  11026. goto done;
  11027. ctxt->instate = XML_PARSER_MISC;
  11028. ctxt->progressive = 1;
  11029. ctxt->checkIndex = 0;
  11030. } else if ((cur == '<') && (next == '!') &&
  11031. (ctxt->input->cur[2] == 'D') &&
  11032. (ctxt->input->cur[3] == 'O') &&
  11033. (ctxt->input->cur[4] == 'C') &&
  11034. (ctxt->input->cur[5] == 'T') &&
  11035. (ctxt->input->cur[6] == 'Y') &&
  11036. (ctxt->input->cur[7] == 'P') &&
  11037. (ctxt->input->cur[8] == 'E')) {
  11038. if ((!terminate) &&
  11039. (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
  11040. ctxt->progressive = XML_PARSER_DTD;
  11041. goto done;
  11042. }
  11043. #ifdef DEBUG_PUSH
  11044. xmlGenericError(xmlGenericErrorContext,
  11045. "PP: Parsing internal subset\n");
  11046. #endif
  11047. ctxt->inSubset = 1;
  11048. ctxt->progressive = 0;
  11049. ctxt->checkIndex = 0;
  11050. xmlParseDocTypeDecl(ctxt);
  11051. if (ctxt->instate == XML_PARSER_EOF)
  11052. goto done;
  11053. if (RAW == '[') {
  11054. ctxt->instate = XML_PARSER_DTD;
  11055. #ifdef DEBUG_PUSH
  11056. xmlGenericError(xmlGenericErrorContext,
  11057. "PP: entering DTD\n");
  11058. #endif
  11059. } else {
  11060. /*
  11061. * Create and update the external subset.
  11062. */
  11063. ctxt->inSubset = 2;
  11064. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  11065. (ctxt->sax->externalSubset != NULL))
  11066. ctxt->sax->externalSubset(ctxt->userData,
  11067. ctxt->intSubName, ctxt->extSubSystem,
  11068. ctxt->extSubURI);
  11069. ctxt->inSubset = 0;
  11070. xmlCleanSpecialAttr(ctxt);
  11071. ctxt->instate = XML_PARSER_PROLOG;
  11072. #ifdef DEBUG_PUSH
  11073. xmlGenericError(xmlGenericErrorContext,
  11074. "PP: entering PROLOG\n");
  11075. #endif
  11076. }
  11077. } else if ((cur == '<') && (next == '!') &&
  11078. (avail < 9)) {
  11079. goto done;
  11080. } else {
  11081. ctxt->instate = XML_PARSER_START_TAG;
  11082. ctxt->progressive = XML_PARSER_START_TAG;
  11083. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  11084. #ifdef DEBUG_PUSH
  11085. xmlGenericError(xmlGenericErrorContext,
  11086. "PP: entering START_TAG\n");
  11087. #endif
  11088. }
  11089. break;
  11090. case XML_PARSER_PROLOG:
  11091. SKIP_BLANKS;
  11092. if (ctxt->input->buf == NULL)
  11093. avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
  11094. else
  11095. avail = xmlBufUse(ctxt->input->buf->buffer) -
  11096. (ctxt->input->cur - ctxt->input->base);
  11097. if (avail < 2)
  11098. goto done;
  11099. cur = ctxt->input->cur[0];
  11100. next = ctxt->input->cur[1];
  11101. if ((cur == '<') && (next == '?')) {
  11102. if ((!terminate) &&
  11103. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
  11104. ctxt->progressive = XML_PARSER_PI;
  11105. goto done;
  11106. }
  11107. #ifdef DEBUG_PUSH
  11108. xmlGenericError(xmlGenericErrorContext,
  11109. "PP: Parsing PI\n");
  11110. #endif
  11111. xmlParsePI(ctxt);
  11112. if (ctxt->instate == XML_PARSER_EOF)
  11113. goto done;
  11114. ctxt->instate = XML_PARSER_PROLOG;
  11115. ctxt->progressive = 1;
  11116. } else if ((cur == '<') && (next == '!') &&
  11117. (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
  11118. if ((!terminate) &&
  11119. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
  11120. ctxt->progressive = XML_PARSER_COMMENT;
  11121. goto done;
  11122. }
  11123. #ifdef DEBUG_PUSH
  11124. xmlGenericError(xmlGenericErrorContext,
  11125. "PP: Parsing Comment\n");
  11126. #endif
  11127. xmlParseComment(ctxt);
  11128. if (ctxt->instate == XML_PARSER_EOF)
  11129. goto done;
  11130. ctxt->instate = XML_PARSER_PROLOG;
  11131. ctxt->progressive = 1;
  11132. } else if ((cur == '<') && (next == '!') &&
  11133. (avail < 4)) {
  11134. goto done;
  11135. } else {
  11136. ctxt->instate = XML_PARSER_START_TAG;
  11137. if (ctxt->progressive == 0)
  11138. ctxt->progressive = XML_PARSER_START_TAG;
  11139. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  11140. #ifdef DEBUG_PUSH
  11141. xmlGenericError(xmlGenericErrorContext,
  11142. "PP: entering START_TAG\n");
  11143. #endif
  11144. }
  11145. break;
  11146. case XML_PARSER_EPILOG:
  11147. SKIP_BLANKS;
  11148. if (ctxt->input->buf == NULL)
  11149. avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
  11150. else
  11151. avail = xmlBufUse(ctxt->input->buf->buffer) -
  11152. (ctxt->input->cur - ctxt->input->base);
  11153. if (avail < 2)
  11154. goto done;
  11155. cur = ctxt->input->cur[0];
  11156. next = ctxt->input->cur[1];
  11157. if ((cur == '<') && (next == '?')) {
  11158. if ((!terminate) &&
  11159. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
  11160. ctxt->progressive = XML_PARSER_PI;
  11161. goto done;
  11162. }
  11163. #ifdef DEBUG_PUSH
  11164. xmlGenericError(xmlGenericErrorContext,
  11165. "PP: Parsing PI\n");
  11166. #endif
  11167. xmlParsePI(ctxt);
  11168. if (ctxt->instate == XML_PARSER_EOF)
  11169. goto done;
  11170. ctxt->instate = XML_PARSER_EPILOG;
  11171. ctxt->progressive = 1;
  11172. } else if ((cur == '<') && (next == '!') &&
  11173. (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
  11174. if ((!terminate) &&
  11175. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
  11176. ctxt->progressive = XML_PARSER_COMMENT;
  11177. goto done;
  11178. }
  11179. #ifdef DEBUG_PUSH
  11180. xmlGenericError(xmlGenericErrorContext,
  11181. "PP: Parsing Comment\n");
  11182. #endif
  11183. xmlParseComment(ctxt);
  11184. if (ctxt->instate == XML_PARSER_EOF)
  11185. goto done;
  11186. ctxt->instate = XML_PARSER_EPILOG;
  11187. ctxt->progressive = 1;
  11188. } else if ((cur == '<') && (next == '!') &&
  11189. (avail < 4)) {
  11190. goto done;
  11191. } else {
  11192. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  11193. xmlHaltParser(ctxt);
  11194. #ifdef DEBUG_PUSH
  11195. xmlGenericError(xmlGenericErrorContext,
  11196. "PP: entering EOF\n");
  11197. #endif
  11198. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  11199. ctxt->sax->endDocument(ctxt->userData);
  11200. goto done;
  11201. }
  11202. break;
  11203. case XML_PARSER_DTD: {
  11204. /*
  11205. * Sorry but progressive parsing of the internal subset
  11206. * is not expected to be supported. We first check that
  11207. * the full content of the internal subset is available and
  11208. * the parsing is launched only at that point.
  11209. * Internal subset ends up with "']' S? '>'" in an unescaped
  11210. * section and not in a ']]>' sequence which are conditional
  11211. * sections (whoever argued to keep that crap in XML deserve
  11212. * a place in hell !).
  11213. */
  11214. int base, i;
  11215. xmlChar *buf;
  11216. xmlChar quote = 0;
  11217. size_t use;
  11218. base = ctxt->input->cur - ctxt->input->base;
  11219. if (base < 0) return(0);
  11220. if (ctxt->checkIndex > base)
  11221. base = ctxt->checkIndex;
  11222. buf = xmlBufContent(ctxt->input->buf->buffer);
  11223. use = xmlBufUse(ctxt->input->buf->buffer);
  11224. for (;(unsigned int) base < use; base++) {
  11225. if (quote != 0) {
  11226. if (buf[base] == quote)
  11227. quote = 0;
  11228. continue;
  11229. }
  11230. if ((quote == 0) && (buf[base] == '<')) {
  11231. int found = 0;
  11232. /* special handling of comments */
  11233. if (((unsigned int) base + 4 < use) &&
  11234. (buf[base + 1] == '!') &&
  11235. (buf[base + 2] == '-') &&
  11236. (buf[base + 3] == '-')) {
  11237. for (;(unsigned int) base + 3 < use; base++) {
  11238. if ((buf[base] == '-') &&
  11239. (buf[base + 1] == '-') &&
  11240. (buf[base + 2] == '>')) {
  11241. found = 1;
  11242. base += 2;
  11243. break;
  11244. }
  11245. }
  11246. if (!found) {
  11247. #if 0
  11248. fprintf(stderr, "unfinished comment\n");
  11249. #endif
  11250. break; /* for */
  11251. }
  11252. continue;
  11253. }
  11254. }
  11255. if (buf[base] == '"') {
  11256. quote = '"';
  11257. continue;
  11258. }
  11259. if (buf[base] == '\'') {
  11260. quote = '\'';
  11261. continue;
  11262. }
  11263. if (buf[base] == ']') {
  11264. #if 0
  11265. fprintf(stderr, "%c%c%c%c: ", buf[base],
  11266. buf[base + 1], buf[base + 2], buf[base + 3]);
  11267. #endif
  11268. if ((unsigned int) base +1 >= use)
  11269. break;
  11270. if (buf[base + 1] == ']') {
  11271. /* conditional crap, skip both ']' ! */
  11272. base++;
  11273. continue;
  11274. }
  11275. for (i = 1; (unsigned int) base + i < use; i++) {
  11276. if (buf[base + i] == '>') {
  11277. #if 0
  11278. fprintf(stderr, "found\n");
  11279. #endif
  11280. goto found_end_int_subset;
  11281. }
  11282. if (!IS_BLANK_CH(buf[base + i])) {
  11283. #if 0
  11284. fprintf(stderr, "not found\n");
  11285. #endif
  11286. goto not_end_of_int_subset;
  11287. }
  11288. }
  11289. #if 0
  11290. fprintf(stderr, "end of stream\n");
  11291. #endif
  11292. break;
  11293. }
  11294. not_end_of_int_subset:
  11295. continue; /* for */
  11296. }
  11297. /*
  11298. * We didn't found the end of the Internal subset
  11299. */
  11300. if (quote == 0)
  11301. ctxt->checkIndex = base;
  11302. else
  11303. ctxt->checkIndex = 0;
  11304. #ifdef DEBUG_PUSH
  11305. if (next == 0)
  11306. xmlGenericError(xmlGenericErrorContext,
  11307. "PP: lookup of int subset end filed\n");
  11308. #endif
  11309. goto done;
  11310. found_end_int_subset:
  11311. ctxt->checkIndex = 0;
  11312. xmlParseInternalSubset(ctxt);
  11313. if (ctxt->instate == XML_PARSER_EOF)
  11314. goto done;
  11315. ctxt->inSubset = 2;
  11316. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  11317. (ctxt->sax->externalSubset != NULL))
  11318. ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
  11319. ctxt->extSubSystem, ctxt->extSubURI);
  11320. ctxt->inSubset = 0;
  11321. xmlCleanSpecialAttr(ctxt);
  11322. if (ctxt->instate == XML_PARSER_EOF)
  11323. goto done;
  11324. ctxt->instate = XML_PARSER_PROLOG;
  11325. ctxt->checkIndex = 0;
  11326. #ifdef DEBUG_PUSH
  11327. xmlGenericError(xmlGenericErrorContext,
  11328. "PP: entering PROLOG\n");
  11329. #endif
  11330. break;
  11331. }
  11332. case XML_PARSER_COMMENT:
  11333. xmlGenericError(xmlGenericErrorContext,
  11334. "PP: internal error, state == COMMENT\n");
  11335. ctxt->instate = XML_PARSER_CONTENT;
  11336. #ifdef DEBUG_PUSH
  11337. xmlGenericError(xmlGenericErrorContext,
  11338. "PP: entering CONTENT\n");
  11339. #endif
  11340. break;
  11341. case XML_PARSER_IGNORE:
  11342. xmlGenericError(xmlGenericErrorContext,
  11343. "PP: internal error, state == IGNORE");
  11344. ctxt->instate = XML_PARSER_DTD;
  11345. #ifdef DEBUG_PUSH
  11346. xmlGenericError(xmlGenericErrorContext,
  11347. "PP: entering DTD\n");
  11348. #endif
  11349. break;
  11350. case XML_PARSER_PI:
  11351. xmlGenericError(xmlGenericErrorContext,
  11352. "PP: internal error, state == PI\n");
  11353. ctxt->instate = XML_PARSER_CONTENT;
  11354. #ifdef DEBUG_PUSH
  11355. xmlGenericError(xmlGenericErrorContext,
  11356. "PP: entering CONTENT\n");
  11357. #endif
  11358. break;
  11359. case XML_PARSER_ENTITY_DECL:
  11360. xmlGenericError(xmlGenericErrorContext,
  11361. "PP: internal error, state == ENTITY_DECL\n");
  11362. ctxt->instate = XML_PARSER_DTD;
  11363. #ifdef DEBUG_PUSH
  11364. xmlGenericError(xmlGenericErrorContext,
  11365. "PP: entering DTD\n");
  11366. #endif
  11367. break;
  11368. case XML_PARSER_ENTITY_VALUE:
  11369. xmlGenericError(xmlGenericErrorContext,
  11370. "PP: internal error, state == ENTITY_VALUE\n");
  11371. ctxt->instate = XML_PARSER_CONTENT;
  11372. #ifdef DEBUG_PUSH
  11373. xmlGenericError(xmlGenericErrorContext,
  11374. "PP: entering DTD\n");
  11375. #endif
  11376. break;
  11377. case XML_PARSER_ATTRIBUTE_VALUE:
  11378. xmlGenericError(xmlGenericErrorContext,
  11379. "PP: internal error, state == ATTRIBUTE_VALUE\n");
  11380. ctxt->instate = XML_PARSER_START_TAG;
  11381. #ifdef DEBUG_PUSH
  11382. xmlGenericError(xmlGenericErrorContext,
  11383. "PP: entering START_TAG\n");
  11384. #endif
  11385. break;
  11386. case XML_PARSER_SYSTEM_LITERAL:
  11387. xmlGenericError(xmlGenericErrorContext,
  11388. "PP: internal error, state == SYSTEM_LITERAL\n");
  11389. ctxt->instate = XML_PARSER_START_TAG;
  11390. #ifdef DEBUG_PUSH
  11391. xmlGenericError(xmlGenericErrorContext,
  11392. "PP: entering START_TAG\n");
  11393. #endif
  11394. break;
  11395. case XML_PARSER_PUBLIC_LITERAL:
  11396. xmlGenericError(xmlGenericErrorContext,
  11397. "PP: internal error, state == PUBLIC_LITERAL\n");
  11398. ctxt->instate = XML_PARSER_START_TAG;
  11399. #ifdef DEBUG_PUSH
  11400. xmlGenericError(xmlGenericErrorContext,
  11401. "PP: entering START_TAG\n");
  11402. #endif
  11403. break;
  11404. }
  11405. }
  11406. done:
  11407. #ifdef DEBUG_PUSH
  11408. xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
  11409. #endif
  11410. return(ret);
  11411. encoding_error:
  11412. {
  11413. char buffer[150];
  11414. snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
  11415. ctxt->input->cur[0], ctxt->input->cur[1],
  11416. ctxt->input->cur[2], ctxt->input->cur[3]);
  11417. __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
  11418. "Input is not proper UTF-8, indicate encoding !\n%s",
  11419. BAD_CAST buffer, NULL);
  11420. }
  11421. return(0);
  11422. }
  11423. /**
  11424. * xmlParseCheckTransition:
  11425. * @ctxt: an XML parser context
  11426. * @chunk: a char array
  11427. * @size: the size in byte of the chunk
  11428. *
  11429. * Check depending on the current parser state if the chunk given must be
  11430. * processed immediately or one need more data to advance on parsing.
  11431. *
  11432. * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
  11433. */
  11434. static int
  11435. xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
  11436. if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
  11437. return(-1);
  11438. if (ctxt->instate == XML_PARSER_START_TAG) {
  11439. if (memchr(chunk, '>', size) != NULL)
  11440. return(1);
  11441. return(0);
  11442. }
  11443. if (ctxt->progressive == XML_PARSER_COMMENT) {
  11444. if (memchr(chunk, '>', size) != NULL)
  11445. return(1);
  11446. return(0);
  11447. }
  11448. if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
  11449. if (memchr(chunk, '>', size) != NULL)
  11450. return(1);
  11451. return(0);
  11452. }
  11453. if (ctxt->progressive == XML_PARSER_PI) {
  11454. if (memchr(chunk, '>', size) != NULL)
  11455. return(1);
  11456. return(0);
  11457. }
  11458. if (ctxt->instate == XML_PARSER_END_TAG) {
  11459. if (memchr(chunk, '>', size) != NULL)
  11460. return(1);
  11461. return(0);
  11462. }
  11463. if ((ctxt->progressive == XML_PARSER_DTD) ||
  11464. (ctxt->instate == XML_PARSER_DTD)) {
  11465. if (memchr(chunk, '>', size) != NULL)
  11466. return(1);
  11467. return(0);
  11468. }
  11469. return(1);
  11470. }
  11471. /**
  11472. * xmlParseChunk:
  11473. * @ctxt: an XML parser context
  11474. * @chunk: an char array
  11475. * @size: the size in byte of the chunk
  11476. * @terminate: last chunk indicator
  11477. *
  11478. * Parse a Chunk of memory
  11479. *
  11480. * Returns zero if no error, the xmlParserErrors otherwise.
  11481. */
  11482. int
  11483. xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
  11484. int terminate) {
  11485. int end_in_lf = 0;
  11486. int remain = 0;
  11487. size_t old_avail = 0;
  11488. size_t avail = 0;
  11489. if (ctxt == NULL)
  11490. return(XML_ERR_INTERNAL_ERROR);
  11491. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  11492. return(ctxt->errNo);
  11493. if (ctxt->instate == XML_PARSER_EOF)
  11494. return(-1);
  11495. if (ctxt->instate == XML_PARSER_START)
  11496. xmlDetectSAX2(ctxt);
  11497. if ((size > 0) && (chunk != NULL) && (!terminate) &&
  11498. (chunk[size - 1] == '\r')) {
  11499. end_in_lf = 1;
  11500. size--;
  11501. }
  11502. xmldecl_done:
  11503. if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
  11504. (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
  11505. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
  11506. size_t cur = ctxt->input->cur - ctxt->input->base;
  11507. int res;
  11508. old_avail = xmlBufUse(ctxt->input->buf->buffer);
  11509. /*
  11510. * Specific handling if we autodetected an encoding, we should not
  11511. * push more than the first line ... which depend on the encoding
  11512. * And only push the rest once the final encoding was detected
  11513. */
  11514. if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
  11515. (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
  11516. unsigned int len = 45;
  11517. if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  11518. BAD_CAST "UTF-16")) ||
  11519. (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  11520. BAD_CAST "UTF16")))
  11521. len = 90;
  11522. else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  11523. BAD_CAST "UCS-4")) ||
  11524. (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  11525. BAD_CAST "UCS4")))
  11526. len = 180;
  11527. if (ctxt->input->buf->rawconsumed < len)
  11528. len -= ctxt->input->buf->rawconsumed;
  11529. /*
  11530. * Change size for reading the initial declaration only
  11531. * if size is greater than len. Otherwise, memmove in xmlBufferAdd
  11532. * will blindly copy extra bytes from memory.
  11533. */
  11534. if ((unsigned int) size > len) {
  11535. remain = size - len;
  11536. size = len;
  11537. } else {
  11538. remain = 0;
  11539. }
  11540. }
  11541. res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  11542. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
  11543. if (res < 0) {
  11544. ctxt->errNo = XML_PARSER_EOF;
  11545. xmlHaltParser(ctxt);
  11546. return (XML_PARSER_EOF);
  11547. }
  11548. #ifdef DEBUG_PUSH
  11549. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  11550. #endif
  11551. } else if (ctxt->instate != XML_PARSER_EOF) {
  11552. if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
  11553. xmlParserInputBufferPtr in = ctxt->input->buf;
  11554. if ((in->encoder != NULL) && (in->buffer != NULL) &&
  11555. (in->raw != NULL)) {
  11556. int nbchars;
  11557. size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
  11558. size_t current = ctxt->input->cur - ctxt->input->base;
  11559. nbchars = xmlCharEncInput(in, terminate);
  11560. xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
  11561. if (nbchars < 0) {
  11562. /* TODO 2.6.0 */
  11563. xmlGenericError(xmlGenericErrorContext,
  11564. "xmlParseChunk: encoder error\n");
  11565. xmlHaltParser(ctxt);
  11566. return(XML_ERR_INVALID_ENCODING);
  11567. }
  11568. }
  11569. }
  11570. }
  11571. if (remain != 0) {
  11572. xmlParseTryOrFinish(ctxt, 0);
  11573. } else {
  11574. if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
  11575. avail = xmlBufUse(ctxt->input->buf->buffer);
  11576. /*
  11577. * Depending on the current state it may not be such
  11578. * a good idea to try parsing if there is nothing in the chunk
  11579. * which would be worth doing a parser state transition and we
  11580. * need to wait for more data
  11581. */
  11582. if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
  11583. (old_avail == 0) || (avail == 0) ||
  11584. (xmlParseCheckTransition(ctxt,
  11585. (const char *)&ctxt->input->base[old_avail],
  11586. avail - old_avail)))
  11587. xmlParseTryOrFinish(ctxt, terminate);
  11588. }
  11589. if (ctxt->instate == XML_PARSER_EOF)
  11590. return(ctxt->errNo);
  11591. if ((ctxt->input != NULL) &&
  11592. (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
  11593. ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
  11594. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  11595. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
  11596. xmlHaltParser(ctxt);
  11597. }
  11598. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  11599. return(ctxt->errNo);
  11600. if (remain != 0) {
  11601. chunk += size;
  11602. size = remain;
  11603. remain = 0;
  11604. goto xmldecl_done;
  11605. }
  11606. if ((end_in_lf == 1) && (ctxt->input != NULL) &&
  11607. (ctxt->input->buf != NULL)) {
  11608. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
  11609. ctxt->input);
  11610. size_t current = ctxt->input->cur - ctxt->input->base;
  11611. xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
  11612. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
  11613. base, current);
  11614. }
  11615. if (terminate) {
  11616. /*
  11617. * Check for termination
  11618. */
  11619. int cur_avail = 0;
  11620. if (ctxt->input != NULL) {
  11621. if (ctxt->input->buf == NULL)
  11622. cur_avail = ctxt->input->length -
  11623. (ctxt->input->cur - ctxt->input->base);
  11624. else
  11625. cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
  11626. (ctxt->input->cur - ctxt->input->base);
  11627. }
  11628. if ((ctxt->instate != XML_PARSER_EOF) &&
  11629. (ctxt->instate != XML_PARSER_EPILOG)) {
  11630. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  11631. }
  11632. if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
  11633. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  11634. }
  11635. if (ctxt->instate != XML_PARSER_EOF) {
  11636. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  11637. ctxt->sax->endDocument(ctxt->userData);
  11638. }
  11639. ctxt->instate = XML_PARSER_EOF;
  11640. }
  11641. if (ctxt->wellFormed == 0)
  11642. return((xmlParserErrors) ctxt->errNo);
  11643. else
  11644. return(0);
  11645. }
  11646. /************************************************************************
  11647. * *
  11648. * I/O front end functions to the parser *
  11649. * *
  11650. ************************************************************************/
  11651. /**
  11652. * xmlCreatePushParserCtxt:
  11653. * @sax: a SAX handler
  11654. * @user_data: The user data returned on SAX callbacks
  11655. * @chunk: a pointer to an array of chars
  11656. * @size: number of chars in the array
  11657. * @filename: an optional file name or URI
  11658. *
  11659. * Create a parser context for using the XML parser in push mode.
  11660. * If @buffer and @size are non-NULL, the data is used to detect
  11661. * the encoding. The remaining characters will be parsed so they
  11662. * don't need to be fed in again through xmlParseChunk.
  11663. * To allow content encoding detection, @size should be >= 4
  11664. * The value of @filename is used for fetching external entities
  11665. * and error/warning reports.
  11666. *
  11667. * Returns the new parser context or NULL
  11668. */
  11669. xmlParserCtxtPtr
  11670. xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
  11671. const char *chunk, int size, const char *filename) {
  11672. xmlParserCtxtPtr ctxt;
  11673. xmlParserInputPtr inputStream;
  11674. xmlParserInputBufferPtr buf;
  11675. xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
  11676. /*
  11677. * plug some encoding conversion routines
  11678. */
  11679. if ((chunk != NULL) && (size >= 4))
  11680. enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
  11681. buf = xmlAllocParserInputBuffer(enc);
  11682. if (buf == NULL) return(NULL);
  11683. ctxt = xmlNewParserCtxt();
  11684. if (ctxt == NULL) {
  11685. xmlErrMemory(NULL, "creating parser: out of memory\n");
  11686. xmlFreeParserInputBuffer(buf);
  11687. return(NULL);
  11688. }
  11689. ctxt->dictNames = 1;
  11690. if (sax != NULL) {
  11691. #ifdef LIBXML_SAX1_ENABLED
  11692. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  11693. #endif /* LIBXML_SAX1_ENABLED */
  11694. xmlFree(ctxt->sax);
  11695. ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
  11696. if (ctxt->sax == NULL) {
  11697. xmlErrMemory(ctxt, NULL);
  11698. xmlFreeParserInputBuffer(buf);
  11699. xmlFreeParserCtxt(ctxt);
  11700. return(NULL);
  11701. }
  11702. memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
  11703. if (sax->initialized == XML_SAX2_MAGIC)
  11704. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
  11705. else
  11706. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
  11707. if (user_data != NULL)
  11708. ctxt->userData = user_data;
  11709. }
  11710. if (filename == NULL) {
  11711. ctxt->directory = NULL;
  11712. } else {
  11713. ctxt->directory = xmlParserGetDirectory(filename);
  11714. }
  11715. inputStream = xmlNewInputStream(ctxt);
  11716. if (inputStream == NULL) {
  11717. xmlFreeParserCtxt(ctxt);
  11718. xmlFreeParserInputBuffer(buf);
  11719. return(NULL);
  11720. }
  11721. if (filename == NULL)
  11722. inputStream->filename = NULL;
  11723. else {
  11724. inputStream->filename = (char *)
  11725. xmlCanonicPath((const xmlChar *) filename);
  11726. if (inputStream->filename == NULL) {
  11727. xmlFreeParserCtxt(ctxt);
  11728. xmlFreeParserInputBuffer(buf);
  11729. return(NULL);
  11730. }
  11731. }
  11732. inputStream->buf = buf;
  11733. xmlBufResetInput(inputStream->buf->buffer, inputStream);
  11734. inputPush(ctxt, inputStream);
  11735. /*
  11736. * If the caller didn't provide an initial 'chunk' for determining
  11737. * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
  11738. * that it can be automatically determined later
  11739. */
  11740. if ((size == 0) || (chunk == NULL)) {
  11741. ctxt->charset = XML_CHAR_ENCODING_NONE;
  11742. } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
  11743. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
  11744. size_t cur = ctxt->input->cur - ctxt->input->base;
  11745. xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  11746. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
  11747. #ifdef DEBUG_PUSH
  11748. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  11749. #endif
  11750. }
  11751. if (enc != XML_CHAR_ENCODING_NONE) {
  11752. xmlSwitchEncoding(ctxt, enc);
  11753. }
  11754. return(ctxt);
  11755. }
  11756. #endif /* LIBXML_PUSH_ENABLED */
  11757. /**
  11758. * xmlHaltParser:
  11759. * @ctxt: an XML parser context
  11760. *
  11761. * Blocks further parser processing don't override error
  11762. * for internal use
  11763. */
  11764. static void
  11765. xmlHaltParser(xmlParserCtxtPtr ctxt) {
  11766. if (ctxt == NULL)
  11767. return;
  11768. ctxt->instate = XML_PARSER_EOF;
  11769. ctxt->disableSAX = 1;
  11770. while (ctxt->inputNr > 1)
  11771. xmlFreeInputStream(inputPop(ctxt));
  11772. if (ctxt->input != NULL) {
  11773. /*
  11774. * in case there was a specific allocation deallocate before
  11775. * overriding base
  11776. */
  11777. if (ctxt->input->free != NULL) {
  11778. ctxt->input->free((xmlChar *) ctxt->input->base);
  11779. ctxt->input->free = NULL;
  11780. }
  11781. if (ctxt->input->buf != NULL) {
  11782. xmlFreeParserInputBuffer(ctxt->input->buf);
  11783. ctxt->input->buf = NULL;
  11784. }
  11785. ctxt->input->cur = BAD_CAST"";
  11786. ctxt->input->length = 0;
  11787. ctxt->input->base = ctxt->input->cur;
  11788. ctxt->input->end = ctxt->input->cur;
  11789. }
  11790. }
  11791. /**
  11792. * xmlStopParser:
  11793. * @ctxt: an XML parser context
  11794. *
  11795. * Blocks further parser processing
  11796. */
  11797. void
  11798. xmlStopParser(xmlParserCtxtPtr ctxt) {
  11799. if (ctxt == NULL)
  11800. return;
  11801. xmlHaltParser(ctxt);
  11802. ctxt->errNo = XML_ERR_USER_STOP;
  11803. }
  11804. /**
  11805. * xmlCreateIOParserCtxt:
  11806. * @sax: a SAX handler
  11807. * @user_data: The user data returned on SAX callbacks
  11808. * @ioread: an I/O read function
  11809. * @ioclose: an I/O close function
  11810. * @ioctx: an I/O handler
  11811. * @enc: the charset encoding if known
  11812. *
  11813. * Create a parser context for using the XML parser with an existing
  11814. * I/O stream
  11815. *
  11816. * Returns the new parser context or NULL
  11817. */
  11818. xmlParserCtxtPtr
  11819. xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
  11820. xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
  11821. void *ioctx, xmlCharEncoding enc) {
  11822. xmlParserCtxtPtr ctxt;
  11823. xmlParserInputPtr inputStream;
  11824. xmlParserInputBufferPtr buf;
  11825. if (ioread == NULL) return(NULL);
  11826. buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
  11827. if (buf == NULL) {
  11828. if (ioclose != NULL)
  11829. ioclose(ioctx);
  11830. return (NULL);
  11831. }
  11832. ctxt = xmlNewParserCtxt();
  11833. if (ctxt == NULL) {
  11834. xmlFreeParserInputBuffer(buf);
  11835. return(NULL);
  11836. }
  11837. if (sax != NULL) {
  11838. #ifdef LIBXML_SAX1_ENABLED
  11839. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  11840. #endif /* LIBXML_SAX1_ENABLED */
  11841. xmlFree(ctxt->sax);
  11842. ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
  11843. if (ctxt->sax == NULL) {
  11844. xmlErrMemory(ctxt, NULL);
  11845. xmlFreeParserCtxt(ctxt);
  11846. return(NULL);
  11847. }
  11848. memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
  11849. if (sax->initialized == XML_SAX2_MAGIC)
  11850. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
  11851. else
  11852. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
  11853. if (user_data != NULL)
  11854. ctxt->userData = user_data;
  11855. }
  11856. inputStream = xmlNewIOInputStream(ctxt, buf, enc);
  11857. if (inputStream == NULL) {
  11858. xmlFreeParserCtxt(ctxt);
  11859. return(NULL);
  11860. }
  11861. inputPush(ctxt, inputStream);
  11862. return(ctxt);
  11863. }
  11864. #ifdef LIBXML_VALID_ENABLED
  11865. /************************************************************************
  11866. * *
  11867. * Front ends when parsing a DTD *
  11868. * *
  11869. ************************************************************************/
  11870. /**
  11871. * xmlIOParseDTD:
  11872. * @sax: the SAX handler block or NULL
  11873. * @input: an Input Buffer
  11874. * @enc: the charset encoding if known
  11875. *
  11876. * Load and parse a DTD
  11877. *
  11878. * Returns the resulting xmlDtdPtr or NULL in case of error.
  11879. * @input will be freed by the function in any case.
  11880. */
  11881. xmlDtdPtr
  11882. xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
  11883. xmlCharEncoding enc) {
  11884. xmlDtdPtr ret = NULL;
  11885. xmlParserCtxtPtr ctxt;
  11886. xmlParserInputPtr pinput = NULL;
  11887. xmlChar start[4];
  11888. if (input == NULL)
  11889. return(NULL);
  11890. ctxt = xmlNewParserCtxt();
  11891. if (ctxt == NULL) {
  11892. xmlFreeParserInputBuffer(input);
  11893. return(NULL);
  11894. }
  11895. /* We are loading a DTD */
  11896. ctxt->options |= XML_PARSE_DTDLOAD;
  11897. /*
  11898. * Set-up the SAX context
  11899. */
  11900. if (sax != NULL) {
  11901. if (ctxt->sax != NULL)
  11902. xmlFree(ctxt->sax);
  11903. ctxt->sax = sax;
  11904. ctxt->userData = ctxt;
  11905. }
  11906. xmlDetectSAX2(ctxt);
  11907. /*
  11908. * generate a parser input from the I/O handler
  11909. */
  11910. pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  11911. if (pinput == NULL) {
  11912. if (sax != NULL) ctxt->sax = NULL;
  11913. xmlFreeParserInputBuffer(input);
  11914. xmlFreeParserCtxt(ctxt);
  11915. return(NULL);
  11916. }
  11917. /*
  11918. * plug some encoding conversion routines here.
  11919. */
  11920. if (xmlPushInput(ctxt, pinput) < 0) {
  11921. if (sax != NULL) ctxt->sax = NULL;
  11922. xmlFreeParserCtxt(ctxt);
  11923. return(NULL);
  11924. }
  11925. if (enc != XML_CHAR_ENCODING_NONE) {
  11926. xmlSwitchEncoding(ctxt, enc);
  11927. }
  11928. pinput->filename = NULL;
  11929. pinput->line = 1;
  11930. pinput->col = 1;
  11931. pinput->base = ctxt->input->cur;
  11932. pinput->cur = ctxt->input->cur;
  11933. pinput->free = NULL;
  11934. /*
  11935. * let's parse that entity knowing it's an external subset.
  11936. */
  11937. ctxt->inSubset = 2;
  11938. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  11939. if (ctxt->myDoc == NULL) {
  11940. xmlErrMemory(ctxt, "New Doc failed");
  11941. return(NULL);
  11942. }
  11943. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  11944. ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
  11945. BAD_CAST "none", BAD_CAST "none");
  11946. if ((enc == XML_CHAR_ENCODING_NONE) &&
  11947. ((ctxt->input->end - ctxt->input->cur) >= 4)) {
  11948. /*
  11949. * Get the 4 first bytes and decode the charset
  11950. * if enc != XML_CHAR_ENCODING_NONE
  11951. * plug some encoding conversion routines.
  11952. */
  11953. start[0] = RAW;
  11954. start[1] = NXT(1);
  11955. start[2] = NXT(2);
  11956. start[3] = NXT(3);
  11957. enc = xmlDetectCharEncoding(start, 4);
  11958. if (enc != XML_CHAR_ENCODING_NONE) {
  11959. xmlSwitchEncoding(ctxt, enc);
  11960. }
  11961. }
  11962. xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
  11963. if (ctxt->myDoc != NULL) {
  11964. if (ctxt->wellFormed) {
  11965. ret = ctxt->myDoc->extSubset;
  11966. ctxt->myDoc->extSubset = NULL;
  11967. if (ret != NULL) {
  11968. xmlNodePtr tmp;
  11969. ret->doc = NULL;
  11970. tmp = ret->children;
  11971. while (tmp != NULL) {
  11972. tmp->doc = NULL;
  11973. tmp = tmp->next;
  11974. }
  11975. }
  11976. } else {
  11977. ret = NULL;
  11978. }
  11979. xmlFreeDoc(ctxt->myDoc);
  11980. ctxt->myDoc = NULL;
  11981. }
  11982. if (sax != NULL) ctxt->sax = NULL;
  11983. xmlFreeParserCtxt(ctxt);
  11984. return(ret);
  11985. }
  11986. /**
  11987. * xmlSAXParseDTD:
  11988. * @sax: the SAX handler block
  11989. * @ExternalID: a NAME* containing the External ID of the DTD
  11990. * @SystemID: a NAME* containing the URL to the DTD
  11991. *
  11992. * Load and parse an external subset.
  11993. *
  11994. * Returns the resulting xmlDtdPtr or NULL in case of error.
  11995. */
  11996. xmlDtdPtr
  11997. xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
  11998. const xmlChar *SystemID) {
  11999. xmlDtdPtr ret = NULL;
  12000. xmlParserCtxtPtr ctxt;
  12001. xmlParserInputPtr input = NULL;
  12002. xmlCharEncoding enc;
  12003. xmlChar* systemIdCanonic;
  12004. if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
  12005. ctxt = xmlNewParserCtxt();
  12006. if (ctxt == NULL) {
  12007. return(NULL);
  12008. }
  12009. /* We are loading a DTD */
  12010. ctxt->options |= XML_PARSE_DTDLOAD;
  12011. /*
  12012. * Set-up the SAX context
  12013. */
  12014. if (sax != NULL) {
  12015. if (ctxt->sax != NULL)
  12016. xmlFree(ctxt->sax);
  12017. ctxt->sax = sax;
  12018. ctxt->userData = ctxt;
  12019. }
  12020. /*
  12021. * Canonicalise the system ID
  12022. */
  12023. systemIdCanonic = xmlCanonicPath(SystemID);
  12024. if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
  12025. xmlFreeParserCtxt(ctxt);
  12026. return(NULL);
  12027. }
  12028. /*
  12029. * Ask the Entity resolver to load the damn thing
  12030. */
  12031. if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
  12032. input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
  12033. systemIdCanonic);
  12034. if (input == NULL) {
  12035. if (sax != NULL) ctxt->sax = NULL;
  12036. xmlFreeParserCtxt(ctxt);
  12037. if (systemIdCanonic != NULL)
  12038. xmlFree(systemIdCanonic);
  12039. return(NULL);
  12040. }
  12041. /*
  12042. * plug some encoding conversion routines here.
  12043. */
  12044. if (xmlPushInput(ctxt, input) < 0) {
  12045. if (sax != NULL) ctxt->sax = NULL;
  12046. xmlFreeParserCtxt(ctxt);
  12047. if (systemIdCanonic != NULL)
  12048. xmlFree(systemIdCanonic);
  12049. return(NULL);
  12050. }
  12051. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  12052. enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
  12053. xmlSwitchEncoding(ctxt, enc);
  12054. }
  12055. if (input->filename == NULL)
  12056. input->filename = (char *) systemIdCanonic;
  12057. else
  12058. xmlFree(systemIdCanonic);
  12059. input->line = 1;
  12060. input->col = 1;
  12061. input->base = ctxt->input->cur;
  12062. input->cur = ctxt->input->cur;
  12063. input->free = NULL;
  12064. /*
  12065. * let's parse that entity knowing it's an external subset.
  12066. */
  12067. ctxt->inSubset = 2;
  12068. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  12069. if (ctxt->myDoc == NULL) {
  12070. xmlErrMemory(ctxt, "New Doc failed");
  12071. if (sax != NULL) ctxt->sax = NULL;
  12072. xmlFreeParserCtxt(ctxt);
  12073. return(NULL);
  12074. }
  12075. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  12076. ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
  12077. ExternalID, SystemID);
  12078. xmlParseExternalSubset(ctxt, ExternalID, SystemID);
  12079. if (ctxt->myDoc != NULL) {
  12080. if (ctxt->wellFormed) {
  12081. ret = ctxt->myDoc->extSubset;
  12082. ctxt->myDoc->extSubset = NULL;
  12083. if (ret != NULL) {
  12084. xmlNodePtr tmp;
  12085. ret->doc = NULL;
  12086. tmp = ret->children;
  12087. while (tmp != NULL) {
  12088. tmp->doc = NULL;
  12089. tmp = tmp->next;
  12090. }
  12091. }
  12092. } else {
  12093. ret = NULL;
  12094. }
  12095. xmlFreeDoc(ctxt->myDoc);
  12096. ctxt->myDoc = NULL;
  12097. }
  12098. if (sax != NULL) ctxt->sax = NULL;
  12099. xmlFreeParserCtxt(ctxt);
  12100. return(ret);
  12101. }
  12102. /**
  12103. * xmlParseDTD:
  12104. * @ExternalID: a NAME* containing the External ID of the DTD
  12105. * @SystemID: a NAME* containing the URL to the DTD
  12106. *
  12107. * Load and parse an external subset.
  12108. *
  12109. * Returns the resulting xmlDtdPtr or NULL in case of error.
  12110. */
  12111. xmlDtdPtr
  12112. xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
  12113. return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
  12114. }
  12115. #endif /* LIBXML_VALID_ENABLED */
  12116. /************************************************************************
  12117. * *
  12118. * Front ends when parsing an Entity *
  12119. * *
  12120. ************************************************************************/
  12121. /**
  12122. * xmlParseCtxtExternalEntity:
  12123. * @ctx: the existing parsing context
  12124. * @URL: the URL for the entity to load
  12125. * @ID: the System ID for the entity to load
  12126. * @lst: the return value for the set of parsed nodes
  12127. *
  12128. * Parse an external general entity within an existing parsing context
  12129. * An external general parsed entity is well-formed if it matches the
  12130. * production labeled extParsedEnt.
  12131. *
  12132. * [78] extParsedEnt ::= TextDecl? content
  12133. *
  12134. * Returns 0 if the entity is well formed, -1 in case of args problem and
  12135. * the parser error code otherwise
  12136. */
  12137. int
  12138. xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
  12139. const xmlChar *ID, xmlNodePtr *lst) {
  12140. void *userData;
  12141. if (ctx == NULL) return(-1);
  12142. /*
  12143. * If the user provided their own SAX callbacks, then reuse the
  12144. * userData callback field, otherwise the expected setup in a
  12145. * DOM builder is to have userData == ctxt
  12146. */
  12147. if (ctx->userData == ctx)
  12148. userData = NULL;
  12149. else
  12150. userData = ctx->userData;
  12151. return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
  12152. userData, ctx->depth + 1,
  12153. URL, ID, lst);
  12154. }
  12155. /**
  12156. * xmlParseExternalEntityPrivate:
  12157. * @doc: the document the chunk pertains to
  12158. * @oldctxt: the previous parser context if available
  12159. * @sax: the SAX handler block (possibly NULL)
  12160. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12161. * @depth: Used for loop detection, use 0
  12162. * @URL: the URL for the entity to load
  12163. * @ID: the System ID for the entity to load
  12164. * @list: the return value for the set of parsed nodes
  12165. *
  12166. * Private version of xmlParseExternalEntity()
  12167. *
  12168. * Returns 0 if the entity is well formed, -1 in case of args problem and
  12169. * the parser error code otherwise
  12170. */
  12171. static xmlParserErrors
  12172. xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
  12173. xmlSAXHandlerPtr sax,
  12174. void *user_data, int depth, const xmlChar *URL,
  12175. const xmlChar *ID, xmlNodePtr *list) {
  12176. xmlParserCtxtPtr ctxt;
  12177. xmlDocPtr newDoc;
  12178. xmlNodePtr newRoot;
  12179. xmlSAXHandlerPtr oldsax = NULL;
  12180. xmlParserErrors ret = XML_ERR_OK;
  12181. xmlChar start[4];
  12182. xmlCharEncoding enc;
  12183. if (((depth > 40) &&
  12184. ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
  12185. (depth > 1024)) {
  12186. return(XML_ERR_ENTITY_LOOP);
  12187. }
  12188. if (list != NULL)
  12189. *list = NULL;
  12190. if ((URL == NULL) && (ID == NULL))
  12191. return(XML_ERR_INTERNAL_ERROR);
  12192. if (doc == NULL)
  12193. return(XML_ERR_INTERNAL_ERROR);
  12194. ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
  12195. if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
  12196. ctxt->userData = ctxt;
  12197. if (sax != NULL) {
  12198. oldsax = ctxt->sax;
  12199. ctxt->sax = sax;
  12200. if (user_data != NULL)
  12201. ctxt->userData = user_data;
  12202. }
  12203. xmlDetectSAX2(ctxt);
  12204. newDoc = xmlNewDoc(BAD_CAST "1.0");
  12205. if (newDoc == NULL) {
  12206. xmlFreeParserCtxt(ctxt);
  12207. return(XML_ERR_INTERNAL_ERROR);
  12208. }
  12209. newDoc->properties = XML_DOC_INTERNAL;
  12210. if (doc) {
  12211. newDoc->intSubset = doc->intSubset;
  12212. newDoc->extSubset = doc->extSubset;
  12213. if (doc->dict) {
  12214. newDoc->dict = doc->dict;
  12215. xmlDictReference(newDoc->dict);
  12216. }
  12217. if (doc->URL != NULL) {
  12218. newDoc->URL = xmlStrdup(doc->URL);
  12219. }
  12220. }
  12221. newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
  12222. if (newRoot == NULL) {
  12223. if (sax != NULL)
  12224. ctxt->sax = oldsax;
  12225. xmlFreeParserCtxt(ctxt);
  12226. newDoc->intSubset = NULL;
  12227. newDoc->extSubset = NULL;
  12228. xmlFreeDoc(newDoc);
  12229. return(XML_ERR_INTERNAL_ERROR);
  12230. }
  12231. xmlAddChild((xmlNodePtr) newDoc, newRoot);
  12232. nodePush(ctxt, newDoc->children);
  12233. if (doc == NULL) {
  12234. ctxt->myDoc = newDoc;
  12235. } else {
  12236. ctxt->myDoc = doc;
  12237. newRoot->doc = doc;
  12238. }
  12239. /*
  12240. * Get the 4 first bytes and decode the charset
  12241. * if enc != XML_CHAR_ENCODING_NONE
  12242. * plug some encoding conversion routines.
  12243. */
  12244. GROW;
  12245. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  12246. start[0] = RAW;
  12247. start[1] = NXT(1);
  12248. start[2] = NXT(2);
  12249. start[3] = NXT(3);
  12250. enc = xmlDetectCharEncoding(start, 4);
  12251. if (enc != XML_CHAR_ENCODING_NONE) {
  12252. xmlSwitchEncoding(ctxt, enc);
  12253. }
  12254. }
  12255. /*
  12256. * Parse a possible text declaration first
  12257. */
  12258. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  12259. xmlParseTextDecl(ctxt);
  12260. /*
  12261. * An XML-1.0 document can't reference an entity not XML-1.0
  12262. */
  12263. if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
  12264. (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
  12265. xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
  12266. "Version mismatch between document and entity\n");
  12267. }
  12268. }
  12269. ctxt->instate = XML_PARSER_CONTENT;
  12270. ctxt->depth = depth;
  12271. if (oldctxt != NULL) {
  12272. ctxt->_private = oldctxt->_private;
  12273. ctxt->loadsubset = oldctxt->loadsubset;
  12274. ctxt->validate = oldctxt->validate;
  12275. ctxt->valid = oldctxt->valid;
  12276. ctxt->replaceEntities = oldctxt->replaceEntities;
  12277. if (oldctxt->validate) {
  12278. ctxt->vctxt.error = oldctxt->vctxt.error;
  12279. ctxt->vctxt.warning = oldctxt->vctxt.warning;
  12280. ctxt->vctxt.userData = oldctxt->vctxt.userData;
  12281. }
  12282. ctxt->external = oldctxt->external;
  12283. if (ctxt->dict) xmlDictFree(ctxt->dict);
  12284. ctxt->dict = oldctxt->dict;
  12285. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  12286. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  12287. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  12288. ctxt->dictNames = oldctxt->dictNames;
  12289. ctxt->attsDefault = oldctxt->attsDefault;
  12290. ctxt->attsSpecial = oldctxt->attsSpecial;
  12291. ctxt->linenumbers = oldctxt->linenumbers;
  12292. ctxt->record_info = oldctxt->record_info;
  12293. ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
  12294. ctxt->node_seq.length = oldctxt->node_seq.length;
  12295. ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
  12296. } else {
  12297. /*
  12298. * Doing validity checking on chunk without context
  12299. * doesn't make sense
  12300. */
  12301. ctxt->_private = NULL;
  12302. ctxt->validate = 0;
  12303. ctxt->external = 2;
  12304. ctxt->loadsubset = 0;
  12305. }
  12306. xmlParseContent(ctxt);
  12307. if ((RAW == '<') && (NXT(1) == '/')) {
  12308. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12309. } else if (RAW != 0) {
  12310. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12311. }
  12312. if (ctxt->node != newDoc->children) {
  12313. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12314. }
  12315. if (!ctxt->wellFormed) {
  12316. if (ctxt->errNo == 0)
  12317. ret = XML_ERR_INTERNAL_ERROR;
  12318. else
  12319. ret = (xmlParserErrors)ctxt->errNo;
  12320. } else {
  12321. if (list != NULL) {
  12322. xmlNodePtr cur;
  12323. /*
  12324. * Return the newly created nodeset after unlinking it from
  12325. * they pseudo parent.
  12326. */
  12327. cur = newDoc->children->children;
  12328. *list = cur;
  12329. while (cur != NULL) {
  12330. cur->parent = NULL;
  12331. cur = cur->next;
  12332. }
  12333. newDoc->children->children = NULL;
  12334. }
  12335. ret = XML_ERR_OK;
  12336. }
  12337. /*
  12338. * Record in the parent context the number of entities replacement
  12339. * done when parsing that reference.
  12340. */
  12341. if (oldctxt != NULL)
  12342. oldctxt->nbentities += ctxt->nbentities;
  12343. /*
  12344. * Also record the size of the entity parsed
  12345. */
  12346. if (ctxt->input != NULL && oldctxt != NULL) {
  12347. oldctxt->sizeentities += ctxt->input->consumed;
  12348. oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
  12349. }
  12350. /*
  12351. * And record the last error if any
  12352. */
  12353. if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
  12354. xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
  12355. if (sax != NULL)
  12356. ctxt->sax = oldsax;
  12357. if (oldctxt != NULL) {
  12358. ctxt->dict = NULL;
  12359. ctxt->attsDefault = NULL;
  12360. ctxt->attsSpecial = NULL;
  12361. oldctxt->validate = ctxt->validate;
  12362. oldctxt->valid = ctxt->valid;
  12363. oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
  12364. oldctxt->node_seq.length = ctxt->node_seq.length;
  12365. oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
  12366. }
  12367. ctxt->node_seq.maximum = 0;
  12368. ctxt->node_seq.length = 0;
  12369. ctxt->node_seq.buffer = NULL;
  12370. xmlFreeParserCtxt(ctxt);
  12371. newDoc->intSubset = NULL;
  12372. newDoc->extSubset = NULL;
  12373. xmlFreeDoc(newDoc);
  12374. return(ret);
  12375. }
  12376. #ifdef LIBXML_SAX1_ENABLED
  12377. /**
  12378. * xmlParseExternalEntity:
  12379. * @doc: the document the chunk pertains to
  12380. * @sax: the SAX handler block (possibly NULL)
  12381. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12382. * @depth: Used for loop detection, use 0
  12383. * @URL: the URL for the entity to load
  12384. * @ID: the System ID for the entity to load
  12385. * @lst: the return value for the set of parsed nodes
  12386. *
  12387. * Parse an external general entity
  12388. * An external general parsed entity is well-formed if it matches the
  12389. * production labeled extParsedEnt.
  12390. *
  12391. * [78] extParsedEnt ::= TextDecl? content
  12392. *
  12393. * Returns 0 if the entity is well formed, -1 in case of args problem and
  12394. * the parser error code otherwise
  12395. */
  12396. int
  12397. xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
  12398. int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
  12399. return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
  12400. ID, lst));
  12401. }
  12402. /**
  12403. * xmlParseBalancedChunkMemory:
  12404. * @doc: the document the chunk pertains to (must not be NULL)
  12405. * @sax: the SAX handler block (possibly NULL)
  12406. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12407. * @depth: Used for loop detection, use 0
  12408. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  12409. * @lst: the return value for the set of parsed nodes
  12410. *
  12411. * Parse a well-balanced chunk of an XML document
  12412. * called by the parser
  12413. * The allowed sequence for the Well Balanced Chunk is the one defined by
  12414. * the content production in the XML grammar:
  12415. *
  12416. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12417. *
  12418. * Returns 0 if the chunk is well balanced, -1 in case of args problem and
  12419. * the parser error code otherwise
  12420. */
  12421. int
  12422. xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
  12423. void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
  12424. return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
  12425. depth, string, lst, 0 );
  12426. }
  12427. #endif /* LIBXML_SAX1_ENABLED */
  12428. /**
  12429. * xmlParseBalancedChunkMemoryInternal:
  12430. * @oldctxt: the existing parsing context
  12431. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  12432. * @user_data: the user data field for the parser context
  12433. * @lst: the return value for the set of parsed nodes
  12434. *
  12435. *
  12436. * Parse a well-balanced chunk of an XML document
  12437. * called by the parser
  12438. * The allowed sequence for the Well Balanced Chunk is the one defined by
  12439. * the content production in the XML grammar:
  12440. *
  12441. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12442. *
  12443. * Returns XML_ERR_OK if the chunk is well balanced, and the parser
  12444. * error code otherwise
  12445. *
  12446. * In case recover is set to 1, the nodelist will not be empty even if
  12447. * the parsed chunk is not well balanced.
  12448. */
  12449. static xmlParserErrors
  12450. xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
  12451. const xmlChar *string, void *user_data, xmlNodePtr *lst) {
  12452. xmlParserCtxtPtr ctxt;
  12453. xmlDocPtr newDoc = NULL;
  12454. xmlNodePtr newRoot;
  12455. xmlSAXHandlerPtr oldsax = NULL;
  12456. xmlNodePtr content = NULL;
  12457. xmlNodePtr last = NULL;
  12458. int size;
  12459. xmlParserErrors ret = XML_ERR_OK;
  12460. #ifdef SAX2
  12461. int i;
  12462. #endif
  12463. if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
  12464. (oldctxt->depth > 1024)) {
  12465. return(XML_ERR_ENTITY_LOOP);
  12466. }
  12467. if (lst != NULL)
  12468. *lst = NULL;
  12469. if (string == NULL)
  12470. return(XML_ERR_INTERNAL_ERROR);
  12471. size = xmlStrlen(string);
  12472. ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
  12473. if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
  12474. if (user_data != NULL)
  12475. ctxt->userData = user_data;
  12476. else
  12477. ctxt->userData = ctxt;
  12478. if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
  12479. ctxt->dict = oldctxt->dict;
  12480. ctxt->input_id = oldctxt->input_id + 1;
  12481. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  12482. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  12483. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  12484. #ifdef SAX2
  12485. /* propagate namespaces down the entity */
  12486. for (i = 0;i < oldctxt->nsNr;i += 2) {
  12487. nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
  12488. }
  12489. #endif
  12490. oldsax = ctxt->sax;
  12491. ctxt->sax = oldctxt->sax;
  12492. xmlDetectSAX2(ctxt);
  12493. ctxt->replaceEntities = oldctxt->replaceEntities;
  12494. ctxt->options = oldctxt->options;
  12495. ctxt->_private = oldctxt->_private;
  12496. if (oldctxt->myDoc == NULL) {
  12497. newDoc = xmlNewDoc(BAD_CAST "1.0");
  12498. if (newDoc == NULL) {
  12499. ctxt->sax = oldsax;
  12500. ctxt->dict = NULL;
  12501. xmlFreeParserCtxt(ctxt);
  12502. return(XML_ERR_INTERNAL_ERROR);
  12503. }
  12504. newDoc->properties = XML_DOC_INTERNAL;
  12505. newDoc->dict = ctxt->dict;
  12506. xmlDictReference(newDoc->dict);
  12507. ctxt->myDoc = newDoc;
  12508. } else {
  12509. ctxt->myDoc = oldctxt->myDoc;
  12510. content = ctxt->myDoc->children;
  12511. last = ctxt->myDoc->last;
  12512. }
  12513. newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
  12514. if (newRoot == NULL) {
  12515. ctxt->sax = oldsax;
  12516. ctxt->dict = NULL;
  12517. xmlFreeParserCtxt(ctxt);
  12518. if (newDoc != NULL) {
  12519. xmlFreeDoc(newDoc);
  12520. }
  12521. return(XML_ERR_INTERNAL_ERROR);
  12522. }
  12523. ctxt->myDoc->children = NULL;
  12524. ctxt->myDoc->last = NULL;
  12525. xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
  12526. nodePush(ctxt, ctxt->myDoc->children);
  12527. ctxt->instate = XML_PARSER_CONTENT;
  12528. ctxt->depth = oldctxt->depth + 1;
  12529. ctxt->validate = 0;
  12530. ctxt->loadsubset = oldctxt->loadsubset;
  12531. if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
  12532. /*
  12533. * ID/IDREF registration will be done in xmlValidateElement below
  12534. */
  12535. ctxt->loadsubset |= XML_SKIP_IDS;
  12536. }
  12537. ctxt->dictNames = oldctxt->dictNames;
  12538. ctxt->attsDefault = oldctxt->attsDefault;
  12539. ctxt->attsSpecial = oldctxt->attsSpecial;
  12540. xmlParseContent(ctxt);
  12541. if ((RAW == '<') && (NXT(1) == '/')) {
  12542. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12543. } else if (RAW != 0) {
  12544. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12545. }
  12546. if (ctxt->node != ctxt->myDoc->children) {
  12547. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12548. }
  12549. if (!ctxt->wellFormed) {
  12550. if (ctxt->errNo == 0)
  12551. ret = XML_ERR_INTERNAL_ERROR;
  12552. else
  12553. ret = (xmlParserErrors)ctxt->errNo;
  12554. } else {
  12555. ret = XML_ERR_OK;
  12556. }
  12557. if ((lst != NULL) && (ret == XML_ERR_OK)) {
  12558. xmlNodePtr cur;
  12559. /*
  12560. * Return the newly created nodeset after unlinking it from
  12561. * they pseudo parent.
  12562. */
  12563. cur = ctxt->myDoc->children->children;
  12564. *lst = cur;
  12565. while (cur != NULL) {
  12566. #ifdef LIBXML_VALID_ENABLED
  12567. if ((oldctxt->validate) && (oldctxt->wellFormed) &&
  12568. (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
  12569. (cur->type == XML_ELEMENT_NODE)) {
  12570. oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
  12571. oldctxt->myDoc, cur);
  12572. }
  12573. #endif /* LIBXML_VALID_ENABLED */
  12574. cur->parent = NULL;
  12575. cur = cur->next;
  12576. }
  12577. ctxt->myDoc->children->children = NULL;
  12578. }
  12579. if (ctxt->myDoc != NULL) {
  12580. xmlFreeNode(ctxt->myDoc->children);
  12581. ctxt->myDoc->children = content;
  12582. ctxt->myDoc->last = last;
  12583. }
  12584. /*
  12585. * Record in the parent context the number of entities replacement
  12586. * done when parsing that reference.
  12587. */
  12588. if (oldctxt != NULL)
  12589. oldctxt->nbentities += ctxt->nbentities;
  12590. /*
  12591. * Also record the last error if any
  12592. */
  12593. if (ctxt->lastError.code != XML_ERR_OK)
  12594. xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
  12595. ctxt->sax = oldsax;
  12596. ctxt->dict = NULL;
  12597. ctxt->attsDefault = NULL;
  12598. ctxt->attsSpecial = NULL;
  12599. xmlFreeParserCtxt(ctxt);
  12600. if (newDoc != NULL) {
  12601. xmlFreeDoc(newDoc);
  12602. }
  12603. return(ret);
  12604. }
  12605. /**
  12606. * xmlParseInNodeContext:
  12607. * @node: the context node
  12608. * @data: the input string
  12609. * @datalen: the input string length in bytes
  12610. * @options: a combination of xmlParserOption
  12611. * @lst: the return value for the set of parsed nodes
  12612. *
  12613. * Parse a well-balanced chunk of an XML document
  12614. * within the context (DTD, namespaces, etc ...) of the given node.
  12615. *
  12616. * The allowed sequence for the data is a Well Balanced Chunk defined by
  12617. * the content production in the XML grammar:
  12618. *
  12619. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12620. *
  12621. * Returns XML_ERR_OK if the chunk is well balanced, and the parser
  12622. * error code otherwise
  12623. */
  12624. xmlParserErrors
  12625. xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
  12626. int options, xmlNodePtr *lst) {
  12627. #ifdef SAX2
  12628. xmlParserCtxtPtr ctxt;
  12629. xmlDocPtr doc = NULL;
  12630. xmlNodePtr fake, cur;
  12631. int nsnr = 0;
  12632. xmlParserErrors ret = XML_ERR_OK;
  12633. /*
  12634. * check all input parameters, grab the document
  12635. */
  12636. if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
  12637. return(XML_ERR_INTERNAL_ERROR);
  12638. switch (node->type) {
  12639. case XML_ELEMENT_NODE:
  12640. case XML_ATTRIBUTE_NODE:
  12641. case XML_TEXT_NODE:
  12642. case XML_CDATA_SECTION_NODE:
  12643. case XML_ENTITY_REF_NODE:
  12644. case XML_PI_NODE:
  12645. case XML_COMMENT_NODE:
  12646. case XML_DOCUMENT_NODE:
  12647. case XML_HTML_DOCUMENT_NODE:
  12648. break;
  12649. default:
  12650. return(XML_ERR_INTERNAL_ERROR);
  12651. }
  12652. while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
  12653. (node->type != XML_DOCUMENT_NODE) &&
  12654. (node->type != XML_HTML_DOCUMENT_NODE))
  12655. node = node->parent;
  12656. if (node == NULL)
  12657. return(XML_ERR_INTERNAL_ERROR);
  12658. if (node->type == XML_ELEMENT_NODE)
  12659. doc = node->doc;
  12660. else
  12661. doc = (xmlDocPtr) node;
  12662. if (doc == NULL)
  12663. return(XML_ERR_INTERNAL_ERROR);
  12664. /*
  12665. * allocate a context and set-up everything not related to the
  12666. * node position in the tree
  12667. */
  12668. if (doc->type == XML_DOCUMENT_NODE)
  12669. ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
  12670. #ifdef LIBXML_HTML_ENABLED
  12671. else if (doc->type == XML_HTML_DOCUMENT_NODE) {
  12672. ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
  12673. /*
  12674. * When parsing in context, it makes no sense to add implied
  12675. * elements like html/body/etc...
  12676. */
  12677. options |= HTML_PARSE_NOIMPLIED;
  12678. }
  12679. #endif
  12680. else
  12681. return(XML_ERR_INTERNAL_ERROR);
  12682. if (ctxt == NULL)
  12683. return(XML_ERR_NO_MEMORY);
  12684. /*
  12685. * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
  12686. * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
  12687. * we must wait until the last moment to free the original one.
  12688. */
  12689. if (doc->dict != NULL) {
  12690. if (ctxt->dict != NULL)
  12691. xmlDictFree(ctxt->dict);
  12692. ctxt->dict = doc->dict;
  12693. } else
  12694. options |= XML_PARSE_NODICT;
  12695. if (doc->encoding != NULL) {
  12696. xmlCharEncodingHandlerPtr hdlr;
  12697. if (ctxt->encoding != NULL)
  12698. xmlFree((xmlChar *) ctxt->encoding);
  12699. ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
  12700. hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
  12701. if (hdlr != NULL) {
  12702. xmlSwitchToEncoding(ctxt, hdlr);
  12703. } else {
  12704. return(XML_ERR_UNSUPPORTED_ENCODING);
  12705. }
  12706. }
  12707. xmlCtxtUseOptionsInternal(ctxt, options, NULL);
  12708. xmlDetectSAX2(ctxt);
  12709. ctxt->myDoc = doc;
  12710. /* parsing in context, i.e. as within existing content */
  12711. ctxt->input_id = 2;
  12712. ctxt->instate = XML_PARSER_CONTENT;
  12713. fake = xmlNewComment(NULL);
  12714. if (fake == NULL) {
  12715. xmlFreeParserCtxt(ctxt);
  12716. return(XML_ERR_NO_MEMORY);
  12717. }
  12718. xmlAddChild(node, fake);
  12719. if (node->type == XML_ELEMENT_NODE) {
  12720. nodePush(ctxt, node);
  12721. /*
  12722. * initialize the SAX2 namespaces stack
  12723. */
  12724. cur = node;
  12725. while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
  12726. xmlNsPtr ns = cur->nsDef;
  12727. const xmlChar *iprefix, *ihref;
  12728. while (ns != NULL) {
  12729. if (ctxt->dict) {
  12730. iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
  12731. ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
  12732. } else {
  12733. iprefix = ns->prefix;
  12734. ihref = ns->href;
  12735. }
  12736. if (xmlGetNamespace(ctxt, iprefix) == NULL) {
  12737. nsPush(ctxt, iprefix, ihref);
  12738. nsnr++;
  12739. }
  12740. ns = ns->next;
  12741. }
  12742. cur = cur->parent;
  12743. }
  12744. }
  12745. if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
  12746. /*
  12747. * ID/IDREF registration will be done in xmlValidateElement below
  12748. */
  12749. ctxt->loadsubset |= XML_SKIP_IDS;
  12750. }
  12751. #ifdef LIBXML_HTML_ENABLED
  12752. if (doc->type == XML_HTML_DOCUMENT_NODE)
  12753. __htmlParseContent(ctxt);
  12754. else
  12755. #endif
  12756. xmlParseContent(ctxt);
  12757. nsPop(ctxt, nsnr);
  12758. if ((RAW == '<') && (NXT(1) == '/')) {
  12759. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12760. } else if (RAW != 0) {
  12761. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12762. }
  12763. if ((ctxt->node != NULL) && (ctxt->node != node)) {
  12764. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12765. ctxt->wellFormed = 0;
  12766. }
  12767. if (!ctxt->wellFormed) {
  12768. if (ctxt->errNo == 0)
  12769. ret = XML_ERR_INTERNAL_ERROR;
  12770. else
  12771. ret = (xmlParserErrors)ctxt->errNo;
  12772. } else {
  12773. ret = XML_ERR_OK;
  12774. }
  12775. /*
  12776. * Return the newly created nodeset after unlinking it from
  12777. * the pseudo sibling.
  12778. */
  12779. cur = fake->next;
  12780. fake->next = NULL;
  12781. node->last = fake;
  12782. if (cur != NULL) {
  12783. cur->prev = NULL;
  12784. }
  12785. *lst = cur;
  12786. while (cur != NULL) {
  12787. cur->parent = NULL;
  12788. cur = cur->next;
  12789. }
  12790. xmlUnlinkNode(fake);
  12791. xmlFreeNode(fake);
  12792. if (ret != XML_ERR_OK) {
  12793. xmlFreeNodeList(*lst);
  12794. *lst = NULL;
  12795. }
  12796. if (doc->dict != NULL)
  12797. ctxt->dict = NULL;
  12798. xmlFreeParserCtxt(ctxt);
  12799. return(ret);
  12800. #else /* !SAX2 */
  12801. return(XML_ERR_INTERNAL_ERROR);
  12802. #endif
  12803. }
  12804. #ifdef LIBXML_SAX1_ENABLED
  12805. /**
  12806. * xmlParseBalancedChunkMemoryRecover:
  12807. * @doc: the document the chunk pertains to (must not be NULL)
  12808. * @sax: the SAX handler block (possibly NULL)
  12809. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12810. * @depth: Used for loop detection, use 0
  12811. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  12812. * @lst: the return value for the set of parsed nodes
  12813. * @recover: return nodes even if the data is broken (use 0)
  12814. *
  12815. *
  12816. * Parse a well-balanced chunk of an XML document
  12817. * called by the parser
  12818. * The allowed sequence for the Well Balanced Chunk is the one defined by
  12819. * the content production in the XML grammar:
  12820. *
  12821. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12822. *
  12823. * Returns 0 if the chunk is well balanced, -1 in case of args problem and
  12824. * the parser error code otherwise
  12825. *
  12826. * In case recover is set to 1, the nodelist will not be empty even if
  12827. * the parsed chunk is not well balanced, assuming the parsing succeeded to
  12828. * some extent.
  12829. */
  12830. int
  12831. xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
  12832. void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
  12833. int recover) {
  12834. xmlParserCtxtPtr ctxt;
  12835. xmlDocPtr newDoc;
  12836. xmlSAXHandlerPtr oldsax = NULL;
  12837. xmlNodePtr content, newRoot;
  12838. int size;
  12839. int ret = 0;
  12840. if (depth > 40) {
  12841. return(XML_ERR_ENTITY_LOOP);
  12842. }
  12843. if (lst != NULL)
  12844. *lst = NULL;
  12845. if (string == NULL)
  12846. return(-1);
  12847. size = xmlStrlen(string);
  12848. ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
  12849. if (ctxt == NULL) return(-1);
  12850. ctxt->userData = ctxt;
  12851. if (sax != NULL) {
  12852. oldsax = ctxt->sax;
  12853. ctxt->sax = sax;
  12854. if (user_data != NULL)
  12855. ctxt->userData = user_data;
  12856. }
  12857. newDoc = xmlNewDoc(BAD_CAST "1.0");
  12858. if (newDoc == NULL) {
  12859. xmlFreeParserCtxt(ctxt);
  12860. return(-1);
  12861. }
  12862. newDoc->properties = XML_DOC_INTERNAL;
  12863. if ((doc != NULL) && (doc->dict != NULL)) {
  12864. xmlDictFree(ctxt->dict);
  12865. ctxt->dict = doc->dict;
  12866. xmlDictReference(ctxt->dict);
  12867. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  12868. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  12869. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  12870. ctxt->dictNames = 1;
  12871. } else {
  12872. xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
  12873. }
  12874. /* doc == NULL is only supported for historic reasons */
  12875. if (doc != NULL) {
  12876. newDoc->intSubset = doc->intSubset;
  12877. newDoc->extSubset = doc->extSubset;
  12878. }
  12879. newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
  12880. if (newRoot == NULL) {
  12881. if (sax != NULL)
  12882. ctxt->sax = oldsax;
  12883. xmlFreeParserCtxt(ctxt);
  12884. newDoc->intSubset = NULL;
  12885. newDoc->extSubset = NULL;
  12886. xmlFreeDoc(newDoc);
  12887. return(-1);
  12888. }
  12889. xmlAddChild((xmlNodePtr) newDoc, newRoot);
  12890. nodePush(ctxt, newRoot);
  12891. /* doc == NULL is only supported for historic reasons */
  12892. if (doc == NULL) {
  12893. ctxt->myDoc = newDoc;
  12894. } else {
  12895. ctxt->myDoc = newDoc;
  12896. newDoc->children->doc = doc;
  12897. /* Ensure that doc has XML spec namespace */
  12898. xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
  12899. newDoc->oldNs = doc->oldNs;
  12900. }
  12901. ctxt->instate = XML_PARSER_CONTENT;
  12902. ctxt->input_id = 2;
  12903. ctxt->depth = depth;
  12904. /*
  12905. * Doing validity checking on chunk doesn't make sense
  12906. */
  12907. ctxt->validate = 0;
  12908. ctxt->loadsubset = 0;
  12909. xmlDetectSAX2(ctxt);
  12910. if ( doc != NULL ){
  12911. content = doc->children;
  12912. doc->children = NULL;
  12913. xmlParseContent(ctxt);
  12914. doc->children = content;
  12915. }
  12916. else {
  12917. xmlParseContent(ctxt);
  12918. }
  12919. if ((RAW == '<') && (NXT(1) == '/')) {
  12920. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12921. } else if (RAW != 0) {
  12922. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12923. }
  12924. if (ctxt->node != newDoc->children) {
  12925. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12926. }
  12927. if (!ctxt->wellFormed) {
  12928. if (ctxt->errNo == 0)
  12929. ret = 1;
  12930. else
  12931. ret = ctxt->errNo;
  12932. } else {
  12933. ret = 0;
  12934. }
  12935. if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
  12936. xmlNodePtr cur;
  12937. /*
  12938. * Return the newly created nodeset after unlinking it from
  12939. * they pseudo parent.
  12940. */
  12941. cur = newDoc->children->children;
  12942. *lst = cur;
  12943. while (cur != NULL) {
  12944. xmlSetTreeDoc(cur, doc);
  12945. cur->parent = NULL;
  12946. cur = cur->next;
  12947. }
  12948. newDoc->children->children = NULL;
  12949. }
  12950. if (sax != NULL)
  12951. ctxt->sax = oldsax;
  12952. xmlFreeParserCtxt(ctxt);
  12953. newDoc->intSubset = NULL;
  12954. newDoc->extSubset = NULL;
  12955. /* This leaks the namespace list if doc == NULL */
  12956. newDoc->oldNs = NULL;
  12957. xmlFreeDoc(newDoc);
  12958. return(ret);
  12959. }
  12960. /**
  12961. * xmlSAXParseEntity:
  12962. * @sax: the SAX handler block
  12963. * @filename: the filename
  12964. *
  12965. * parse an XML external entity out of context and build a tree.
  12966. * It use the given SAX function block to handle the parsing callback.
  12967. * If sax is NULL, fallback to the default DOM tree building routines.
  12968. *
  12969. * [78] extParsedEnt ::= TextDecl? content
  12970. *
  12971. * This correspond to a "Well Balanced" chunk
  12972. *
  12973. * Returns the resulting document tree
  12974. */
  12975. xmlDocPtr
  12976. xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
  12977. xmlDocPtr ret;
  12978. xmlParserCtxtPtr ctxt;
  12979. ctxt = xmlCreateFileParserCtxt(filename);
  12980. if (ctxt == NULL) {
  12981. return(NULL);
  12982. }
  12983. if (sax != NULL) {
  12984. if (ctxt->sax != NULL)
  12985. xmlFree(ctxt->sax);
  12986. ctxt->sax = sax;
  12987. ctxt->userData = NULL;
  12988. }
  12989. xmlParseExtParsedEnt(ctxt);
  12990. if (ctxt->wellFormed)
  12991. ret = ctxt->myDoc;
  12992. else {
  12993. ret = NULL;
  12994. xmlFreeDoc(ctxt->myDoc);
  12995. ctxt->myDoc = NULL;
  12996. }
  12997. if (sax != NULL)
  12998. ctxt->sax = NULL;
  12999. xmlFreeParserCtxt(ctxt);
  13000. return(ret);
  13001. }
  13002. /**
  13003. * xmlParseEntity:
  13004. * @filename: the filename
  13005. *
  13006. * parse an XML external entity out of context and build a tree.
  13007. *
  13008. * [78] extParsedEnt ::= TextDecl? content
  13009. *
  13010. * This correspond to a "Well Balanced" chunk
  13011. *
  13012. * Returns the resulting document tree
  13013. */
  13014. xmlDocPtr
  13015. xmlParseEntity(const char *filename) {
  13016. return(xmlSAXParseEntity(NULL, filename));
  13017. }
  13018. #endif /* LIBXML_SAX1_ENABLED */
  13019. /**
  13020. * xmlCreateEntityParserCtxtInternal:
  13021. * @URL: the entity URL
  13022. * @ID: the entity PUBLIC ID
  13023. * @base: a possible base for the target URI
  13024. * @pctx: parser context used to set options on new context
  13025. *
  13026. * Create a parser context for an external entity
  13027. * Automatic support for ZLIB/Compress compressed document is provided
  13028. * by default if found at compile-time.
  13029. *
  13030. * Returns the new parser context or NULL
  13031. */
  13032. static xmlParserCtxtPtr
  13033. xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
  13034. const xmlChar *base, xmlParserCtxtPtr pctx) {
  13035. xmlParserCtxtPtr ctxt;
  13036. xmlParserInputPtr inputStream;
  13037. char *directory = NULL;
  13038. xmlChar *uri;
  13039. ctxt = xmlNewParserCtxt();
  13040. if (ctxt == NULL) {
  13041. return(NULL);
  13042. }
  13043. if (pctx != NULL) {
  13044. ctxt->options = pctx->options;
  13045. ctxt->_private = pctx->_private;
  13046. /*
  13047. * this is a subparser of pctx, so the input_id should be
  13048. * incremented to distinguish from main entity
  13049. */
  13050. ctxt->input_id = pctx->input_id + 1;
  13051. }
  13052. /* Don't read from stdin. */
  13053. if (xmlStrcmp(URL, BAD_CAST "-") == 0)
  13054. URL = BAD_CAST "./-";
  13055. uri = xmlBuildURI(URL, base);
  13056. if (uri == NULL) {
  13057. inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
  13058. if (inputStream == NULL) {
  13059. xmlFreeParserCtxt(ctxt);
  13060. return(NULL);
  13061. }
  13062. inputPush(ctxt, inputStream);
  13063. if ((ctxt->directory == NULL) && (directory == NULL))
  13064. directory = xmlParserGetDirectory((char *)URL);
  13065. if ((ctxt->directory == NULL) && (directory != NULL))
  13066. ctxt->directory = directory;
  13067. } else {
  13068. inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
  13069. if (inputStream == NULL) {
  13070. xmlFree(uri);
  13071. xmlFreeParserCtxt(ctxt);
  13072. return(NULL);
  13073. }
  13074. inputPush(ctxt, inputStream);
  13075. if ((ctxt->directory == NULL) && (directory == NULL))
  13076. directory = xmlParserGetDirectory((char *)uri);
  13077. if ((ctxt->directory == NULL) && (directory != NULL))
  13078. ctxt->directory = directory;
  13079. xmlFree(uri);
  13080. }
  13081. return(ctxt);
  13082. }
  13083. /**
  13084. * xmlCreateEntityParserCtxt:
  13085. * @URL: the entity URL
  13086. * @ID: the entity PUBLIC ID
  13087. * @base: a possible base for the target URI
  13088. *
  13089. * Create a parser context for an external entity
  13090. * Automatic support for ZLIB/Compress compressed document is provided
  13091. * by default if found at compile-time.
  13092. *
  13093. * Returns the new parser context or NULL
  13094. */
  13095. xmlParserCtxtPtr
  13096. xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
  13097. const xmlChar *base) {
  13098. return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
  13099. }
  13100. /************************************************************************
  13101. * *
  13102. * Front ends when parsing from a file *
  13103. * *
  13104. ************************************************************************/
  13105. /**
  13106. * xmlCreateURLParserCtxt:
  13107. * @filename: the filename or URL
  13108. * @options: a combination of xmlParserOption
  13109. *
  13110. * Create a parser context for a file or URL content.
  13111. * Automatic support for ZLIB/Compress compressed document is provided
  13112. * by default if found at compile-time and for file accesses
  13113. *
  13114. * Returns the new parser context or NULL
  13115. */
  13116. xmlParserCtxtPtr
  13117. xmlCreateURLParserCtxt(const char *filename, int options)
  13118. {
  13119. xmlParserCtxtPtr ctxt;
  13120. xmlParserInputPtr inputStream;
  13121. char *directory = NULL;
  13122. ctxt = xmlNewParserCtxt();
  13123. if (ctxt == NULL) {
  13124. xmlErrMemory(NULL, "cannot allocate parser context");
  13125. return(NULL);
  13126. }
  13127. if (options)
  13128. xmlCtxtUseOptionsInternal(ctxt, options, NULL);
  13129. ctxt->linenumbers = 1;
  13130. inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
  13131. if (inputStream == NULL) {
  13132. xmlFreeParserCtxt(ctxt);
  13133. return(NULL);
  13134. }
  13135. inputPush(ctxt, inputStream);
  13136. if ((ctxt->directory == NULL) && (directory == NULL))
  13137. directory = xmlParserGetDirectory(filename);
  13138. if ((ctxt->directory == NULL) && (directory != NULL))
  13139. ctxt->directory = directory;
  13140. return(ctxt);
  13141. }
  13142. /**
  13143. * xmlCreateFileParserCtxt:
  13144. * @filename: the filename
  13145. *
  13146. * Create a parser context for a file content.
  13147. * Automatic support for ZLIB/Compress compressed document is provided
  13148. * by default if found at compile-time.
  13149. *
  13150. * Returns the new parser context or NULL
  13151. */
  13152. xmlParserCtxtPtr
  13153. xmlCreateFileParserCtxt(const char *filename)
  13154. {
  13155. return(xmlCreateURLParserCtxt(filename, 0));
  13156. }
  13157. #ifdef LIBXML_SAX1_ENABLED
  13158. /**
  13159. * xmlSAXParseFileWithData:
  13160. * @sax: the SAX handler block
  13161. * @filename: the filename
  13162. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  13163. * documents
  13164. * @data: the userdata
  13165. *
  13166. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  13167. * compressed document is provided by default if found at compile-time.
  13168. * It use the given SAX function block to handle the parsing callback.
  13169. * If sax is NULL, fallback to the default DOM tree building routines.
  13170. *
  13171. * User data (void *) is stored within the parser context in the
  13172. * context's _private member, so it is available nearly everywhere in libxml
  13173. *
  13174. * Returns the resulting document tree
  13175. */
  13176. xmlDocPtr
  13177. xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
  13178. int recovery, void *data) {
  13179. xmlDocPtr ret;
  13180. xmlParserCtxtPtr ctxt;
  13181. xmlInitParser();
  13182. ctxt = xmlCreateFileParserCtxt(filename);
  13183. if (ctxt == NULL) {
  13184. return(NULL);
  13185. }
  13186. if (sax != NULL) {
  13187. if (ctxt->sax != NULL)
  13188. xmlFree(ctxt->sax);
  13189. ctxt->sax = sax;
  13190. }
  13191. xmlDetectSAX2(ctxt);
  13192. if (data!=NULL) {
  13193. ctxt->_private = data;
  13194. }
  13195. if (ctxt->directory == NULL)
  13196. ctxt->directory = xmlParserGetDirectory(filename);
  13197. ctxt->recovery = recovery;
  13198. xmlParseDocument(ctxt);
  13199. if ((ctxt->wellFormed) || recovery) {
  13200. ret = ctxt->myDoc;
  13201. if ((ret != NULL) && (ctxt->input->buf != NULL)) {
  13202. if (ctxt->input->buf->compressed > 0)
  13203. ret->compression = 9;
  13204. else
  13205. ret->compression = ctxt->input->buf->compressed;
  13206. }
  13207. }
  13208. else {
  13209. ret = NULL;
  13210. xmlFreeDoc(ctxt->myDoc);
  13211. ctxt->myDoc = NULL;
  13212. }
  13213. if (sax != NULL)
  13214. ctxt->sax = NULL;
  13215. xmlFreeParserCtxt(ctxt);
  13216. return(ret);
  13217. }
  13218. /**
  13219. * xmlSAXParseFile:
  13220. * @sax: the SAX handler block
  13221. * @filename: the filename
  13222. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  13223. * documents
  13224. *
  13225. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  13226. * compressed document is provided by default if found at compile-time.
  13227. * It use the given SAX function block to handle the parsing callback.
  13228. * If sax is NULL, fallback to the default DOM tree building routines.
  13229. *
  13230. * Returns the resulting document tree
  13231. */
  13232. xmlDocPtr
  13233. xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
  13234. int recovery) {
  13235. return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
  13236. }
  13237. /**
  13238. * xmlRecoverDoc:
  13239. * @cur: a pointer to an array of xmlChar
  13240. *
  13241. * parse an XML in-memory document and build a tree.
  13242. * In the case the document is not Well Formed, a attempt to build a
  13243. * tree is tried anyway
  13244. *
  13245. * Returns the resulting document tree or NULL in case of failure
  13246. */
  13247. xmlDocPtr
  13248. xmlRecoverDoc(const xmlChar *cur) {
  13249. return(xmlSAXParseDoc(NULL, cur, 1));
  13250. }
  13251. /**
  13252. * xmlParseFile:
  13253. * @filename: the filename
  13254. *
  13255. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  13256. * compressed document is provided by default if found at compile-time.
  13257. *
  13258. * Returns the resulting document tree if the file was wellformed,
  13259. * NULL otherwise.
  13260. */
  13261. xmlDocPtr
  13262. xmlParseFile(const char *filename) {
  13263. return(xmlSAXParseFile(NULL, filename, 0));
  13264. }
  13265. /**
  13266. * xmlRecoverFile:
  13267. * @filename: the filename
  13268. *
  13269. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  13270. * compressed document is provided by default if found at compile-time.
  13271. * In the case the document is not Well Formed, it attempts to build
  13272. * a tree anyway
  13273. *
  13274. * Returns the resulting document tree or NULL in case of failure
  13275. */
  13276. xmlDocPtr
  13277. xmlRecoverFile(const char *filename) {
  13278. return(xmlSAXParseFile(NULL, filename, 1));
  13279. }
  13280. /**
  13281. * xmlSetupParserForBuffer:
  13282. * @ctxt: an XML parser context
  13283. * @buffer: a xmlChar * buffer
  13284. * @filename: a file name
  13285. *
  13286. * Setup the parser context to parse a new buffer; Clears any prior
  13287. * contents from the parser context. The buffer parameter must not be
  13288. * NULL, but the filename parameter can be
  13289. */
  13290. void
  13291. xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
  13292. const char* filename)
  13293. {
  13294. xmlParserInputPtr input;
  13295. if ((ctxt == NULL) || (buffer == NULL))
  13296. return;
  13297. input = xmlNewInputStream(ctxt);
  13298. if (input == NULL) {
  13299. xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
  13300. xmlClearParserCtxt(ctxt);
  13301. return;
  13302. }
  13303. xmlClearParserCtxt(ctxt);
  13304. if (filename != NULL)
  13305. input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
  13306. input->base = buffer;
  13307. input->cur = buffer;
  13308. input->end = &buffer[xmlStrlen(buffer)];
  13309. inputPush(ctxt, input);
  13310. }
  13311. /**
  13312. * xmlSAXUserParseFile:
  13313. * @sax: a SAX handler
  13314. * @user_data: The user data returned on SAX callbacks
  13315. * @filename: a file name
  13316. *
  13317. * parse an XML file and call the given SAX handler routines.
  13318. * Automatic support for ZLIB/Compress compressed document is provided
  13319. *
  13320. * Returns 0 in case of success or a error number otherwise
  13321. */
  13322. int
  13323. xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
  13324. const char *filename) {
  13325. int ret = 0;
  13326. xmlParserCtxtPtr ctxt;
  13327. ctxt = xmlCreateFileParserCtxt(filename);
  13328. if (ctxt == NULL) return -1;
  13329. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  13330. xmlFree(ctxt->sax);
  13331. ctxt->sax = sax;
  13332. xmlDetectSAX2(ctxt);
  13333. if (user_data != NULL)
  13334. ctxt->userData = user_data;
  13335. xmlParseDocument(ctxt);
  13336. if (ctxt->wellFormed)
  13337. ret = 0;
  13338. else {
  13339. if (ctxt->errNo != 0)
  13340. ret = ctxt->errNo;
  13341. else
  13342. ret = -1;
  13343. }
  13344. if (sax != NULL)
  13345. ctxt->sax = NULL;
  13346. if (ctxt->myDoc != NULL) {
  13347. xmlFreeDoc(ctxt->myDoc);
  13348. ctxt->myDoc = NULL;
  13349. }
  13350. xmlFreeParserCtxt(ctxt);
  13351. return ret;
  13352. }
  13353. #endif /* LIBXML_SAX1_ENABLED */
  13354. /************************************************************************
  13355. * *
  13356. * Front ends when parsing from memory *
  13357. * *
  13358. ************************************************************************/
  13359. /**
  13360. * xmlCreateMemoryParserCtxt:
  13361. * @buffer: a pointer to a char array
  13362. * @size: the size of the array
  13363. *
  13364. * Create a parser context for an XML in-memory document.
  13365. *
  13366. * Returns the new parser context or NULL
  13367. */
  13368. xmlParserCtxtPtr
  13369. xmlCreateMemoryParserCtxt(const char *buffer, int size) {
  13370. xmlParserCtxtPtr ctxt;
  13371. xmlParserInputPtr input;
  13372. xmlParserInputBufferPtr buf;
  13373. if (buffer == NULL)
  13374. return(NULL);
  13375. if (size <= 0)
  13376. return(NULL);
  13377. ctxt = xmlNewParserCtxt();
  13378. if (ctxt == NULL)
  13379. return(NULL);
  13380. /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
  13381. buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
  13382. if (buf == NULL) {
  13383. xmlFreeParserCtxt(ctxt);
  13384. return(NULL);
  13385. }
  13386. input = xmlNewInputStream(ctxt);
  13387. if (input == NULL) {
  13388. xmlFreeParserInputBuffer(buf);
  13389. xmlFreeParserCtxt(ctxt);
  13390. return(NULL);
  13391. }
  13392. input->filename = NULL;
  13393. input->buf = buf;
  13394. xmlBufResetInput(input->buf->buffer, input);
  13395. inputPush(ctxt, input);
  13396. return(ctxt);
  13397. }
  13398. #ifdef LIBXML_SAX1_ENABLED
  13399. /**
  13400. * xmlSAXParseMemoryWithData:
  13401. * @sax: the SAX handler block
  13402. * @buffer: an pointer to a char array
  13403. * @size: the size of the array
  13404. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  13405. * documents
  13406. * @data: the userdata
  13407. *
  13408. * parse an XML in-memory block and use the given SAX function block
  13409. * to handle the parsing callback. If sax is NULL, fallback to the default
  13410. * DOM tree building routines.
  13411. *
  13412. * User data (void *) is stored within the parser context in the
  13413. * context's _private member, so it is available nearly everywhere in libxml
  13414. *
  13415. * Returns the resulting document tree
  13416. */
  13417. xmlDocPtr
  13418. xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
  13419. int size, int recovery, void *data) {
  13420. xmlDocPtr ret;
  13421. xmlParserCtxtPtr ctxt;
  13422. xmlInitParser();
  13423. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  13424. if (ctxt == NULL) return(NULL);
  13425. if (sax != NULL) {
  13426. if (ctxt->sax != NULL)
  13427. xmlFree(ctxt->sax);
  13428. ctxt->sax = sax;
  13429. }
  13430. xmlDetectSAX2(ctxt);
  13431. if (data!=NULL) {
  13432. ctxt->_private=data;
  13433. }
  13434. ctxt->recovery = recovery;
  13435. xmlParseDocument(ctxt);
  13436. if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
  13437. else {
  13438. ret = NULL;
  13439. xmlFreeDoc(ctxt->myDoc);
  13440. ctxt->myDoc = NULL;
  13441. }
  13442. if (sax != NULL)
  13443. ctxt->sax = NULL;
  13444. xmlFreeParserCtxt(ctxt);
  13445. return(ret);
  13446. }
  13447. /**
  13448. * xmlSAXParseMemory:
  13449. * @sax: the SAX handler block
  13450. * @buffer: an pointer to a char array
  13451. * @size: the size of the array
  13452. * @recovery: work in recovery mode, i.e. tries to read not Well Formed
  13453. * documents
  13454. *
  13455. * parse an XML in-memory block and use the given SAX function block
  13456. * to handle the parsing callback. If sax is NULL, fallback to the default
  13457. * DOM tree building routines.
  13458. *
  13459. * Returns the resulting document tree
  13460. */
  13461. xmlDocPtr
  13462. xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
  13463. int size, int recovery) {
  13464. return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
  13465. }
  13466. /**
  13467. * xmlParseMemory:
  13468. * @buffer: an pointer to a char array
  13469. * @size: the size of the array
  13470. *
  13471. * parse an XML in-memory block and build a tree.
  13472. *
  13473. * Returns the resulting document tree
  13474. */
  13475. xmlDocPtr xmlParseMemory(const char *buffer, int size) {
  13476. return(xmlSAXParseMemory(NULL, buffer, size, 0));
  13477. }
  13478. /**
  13479. * xmlRecoverMemory:
  13480. * @buffer: an pointer to a char array
  13481. * @size: the size of the array
  13482. *
  13483. * parse an XML in-memory block and build a tree.
  13484. * In the case the document is not Well Formed, an attempt to
  13485. * build a tree is tried anyway
  13486. *
  13487. * Returns the resulting document tree or NULL in case of error
  13488. */
  13489. xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
  13490. return(xmlSAXParseMemory(NULL, buffer, size, 1));
  13491. }
  13492. /**
  13493. * xmlSAXUserParseMemory:
  13494. * @sax: a SAX handler
  13495. * @user_data: The user data returned on SAX callbacks
  13496. * @buffer: an in-memory XML document input
  13497. * @size: the length of the XML document in bytes
  13498. *
  13499. * A better SAX parsing routine.
  13500. * parse an XML in-memory buffer and call the given SAX handler routines.
  13501. *
  13502. * Returns 0 in case of success or a error number otherwise
  13503. */
  13504. int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
  13505. const char *buffer, int size) {
  13506. int ret = 0;
  13507. xmlParserCtxtPtr ctxt;
  13508. xmlInitParser();
  13509. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  13510. if (ctxt == NULL) return -1;
  13511. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  13512. xmlFree(ctxt->sax);
  13513. ctxt->sax = sax;
  13514. xmlDetectSAX2(ctxt);
  13515. if (user_data != NULL)
  13516. ctxt->userData = user_data;
  13517. xmlParseDocument(ctxt);
  13518. if (ctxt->wellFormed)
  13519. ret = 0;
  13520. else {
  13521. if (ctxt->errNo != 0)
  13522. ret = ctxt->errNo;
  13523. else
  13524. ret = -1;
  13525. }
  13526. if (sax != NULL)
  13527. ctxt->sax = NULL;
  13528. if (ctxt->myDoc != NULL) {
  13529. xmlFreeDoc(ctxt->myDoc);
  13530. ctxt->myDoc = NULL;
  13531. }
  13532. xmlFreeParserCtxt(ctxt);
  13533. return ret;
  13534. }
  13535. #endif /* LIBXML_SAX1_ENABLED */
  13536. /**
  13537. * xmlCreateDocParserCtxt:
  13538. * @cur: a pointer to an array of xmlChar
  13539. *
  13540. * Creates a parser context for an XML in-memory document.
  13541. *
  13542. * Returns the new parser context or NULL
  13543. */
  13544. xmlParserCtxtPtr
  13545. xmlCreateDocParserCtxt(const xmlChar *cur) {
  13546. int len;
  13547. if (cur == NULL)
  13548. return(NULL);
  13549. len = xmlStrlen(cur);
  13550. return(xmlCreateMemoryParserCtxt((const char *)cur, len));
  13551. }
  13552. #ifdef LIBXML_SAX1_ENABLED
  13553. /**
  13554. * xmlSAXParseDoc:
  13555. * @sax: the SAX handler block
  13556. * @cur: a pointer to an array of xmlChar
  13557. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  13558. * documents
  13559. *
  13560. * parse an XML in-memory document and build a tree.
  13561. * It use the given SAX function block to handle the parsing callback.
  13562. * If sax is NULL, fallback to the default DOM tree building routines.
  13563. *
  13564. * Returns the resulting document tree
  13565. */
  13566. xmlDocPtr
  13567. xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
  13568. xmlDocPtr ret;
  13569. xmlParserCtxtPtr ctxt;
  13570. xmlSAXHandlerPtr oldsax = NULL;
  13571. if (cur == NULL) return(NULL);
  13572. ctxt = xmlCreateDocParserCtxt(cur);
  13573. if (ctxt == NULL) return(NULL);
  13574. if (sax != NULL) {
  13575. oldsax = ctxt->sax;
  13576. ctxt->sax = sax;
  13577. ctxt->userData = NULL;
  13578. }
  13579. xmlDetectSAX2(ctxt);
  13580. xmlParseDocument(ctxt);
  13581. if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
  13582. else {
  13583. ret = NULL;
  13584. xmlFreeDoc(ctxt->myDoc);
  13585. ctxt->myDoc = NULL;
  13586. }
  13587. if (sax != NULL)
  13588. ctxt->sax = oldsax;
  13589. xmlFreeParserCtxt(ctxt);
  13590. return(ret);
  13591. }
  13592. /**
  13593. * xmlParseDoc:
  13594. * @cur: a pointer to an array of xmlChar
  13595. *
  13596. * parse an XML in-memory document and build a tree.
  13597. *
  13598. * Returns the resulting document tree
  13599. */
  13600. xmlDocPtr
  13601. xmlParseDoc(const xmlChar *cur) {
  13602. return(xmlSAXParseDoc(NULL, cur, 0));
  13603. }
  13604. #endif /* LIBXML_SAX1_ENABLED */
  13605. #ifdef LIBXML_LEGACY_ENABLED
  13606. /************************************************************************
  13607. * *
  13608. * Specific function to keep track of entities references *
  13609. * and used by the XSLT debugger *
  13610. * *
  13611. ************************************************************************/
  13612. static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
  13613. /**
  13614. * xmlAddEntityReference:
  13615. * @ent : A valid entity
  13616. * @firstNode : A valid first node for children of entity
  13617. * @lastNode : A valid last node of children entity
  13618. *
  13619. * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
  13620. */
  13621. static void
  13622. xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
  13623. xmlNodePtr lastNode)
  13624. {
  13625. if (xmlEntityRefFunc != NULL) {
  13626. (*xmlEntityRefFunc) (ent, firstNode, lastNode);
  13627. }
  13628. }
  13629. /**
  13630. * xmlSetEntityReferenceFunc:
  13631. * @func: A valid function
  13632. *
  13633. * Set the function to call call back when a xml reference has been made
  13634. */
  13635. void
  13636. xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
  13637. {
  13638. xmlEntityRefFunc = func;
  13639. }
  13640. #endif /* LIBXML_LEGACY_ENABLED */
  13641. /************************************************************************
  13642. * *
  13643. * Miscellaneous *
  13644. * *
  13645. ************************************************************************/
  13646. #ifdef LIBXML_XPATH_ENABLED
  13647. #include <libxml/xpath.h>
  13648. #endif
  13649. extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
  13650. static int xmlParserInitialized = 0;
  13651. /**
  13652. * xmlInitParser:
  13653. *
  13654. * Initialization function for the XML parser.
  13655. * This is not reentrant. Call once before processing in case of
  13656. * use in multithreaded programs.
  13657. */
  13658. void
  13659. xmlInitParser(void) {
  13660. if (xmlParserInitialized != 0)
  13661. return;
  13662. #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
  13663. atexit(xmlCleanupParser);
  13664. #endif
  13665. #ifdef LIBXML_THREAD_ENABLED
  13666. __xmlGlobalInitMutexLock();
  13667. if (xmlParserInitialized == 0) {
  13668. #endif
  13669. xmlInitThreads();
  13670. xmlInitGlobals();
  13671. if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
  13672. (xmlGenericError == NULL))
  13673. initGenericErrorDefaultFunc(NULL);
  13674. xmlInitMemory();
  13675. xmlInitializeDict();
  13676. xmlInitCharEncodingHandlers();
  13677. xmlDefaultSAXHandlerInit();
  13678. xmlRegisterDefaultInputCallbacks();
  13679. #ifdef LIBXML_OUTPUT_ENABLED
  13680. xmlRegisterDefaultOutputCallbacks();
  13681. #endif /* LIBXML_OUTPUT_ENABLED */
  13682. #ifdef LIBXML_HTML_ENABLED
  13683. htmlInitAutoClose();
  13684. htmlDefaultSAXHandlerInit();
  13685. #endif
  13686. #ifdef LIBXML_XPATH_ENABLED
  13687. xmlXPathInit();
  13688. #endif
  13689. xmlParserInitialized = 1;
  13690. #ifdef LIBXML_THREAD_ENABLED
  13691. }
  13692. __xmlGlobalInitMutexUnlock();
  13693. #endif
  13694. }
  13695. /**
  13696. * xmlCleanupParser:
  13697. *
  13698. * This function name is somewhat misleading. It does not clean up
  13699. * parser state, it cleans up memory allocated by the library itself.
  13700. * It is a cleanup function for the XML library. It tries to reclaim all
  13701. * related global memory allocated for the library processing.
  13702. * It doesn't deallocate any document related memory. One should
  13703. * call xmlCleanupParser() only when the process has finished using
  13704. * the library and all XML/HTML documents built with it.
  13705. * See also xmlInitParser() which has the opposite function of preparing
  13706. * the library for operations.
  13707. *
  13708. * WARNING: if your application is multithreaded or has plugin support
  13709. * calling this may crash the application if another thread or
  13710. * a plugin is still using libxml2. It's sometimes very hard to
  13711. * guess if libxml2 is in use in the application, some libraries
  13712. * or plugins may use it without notice. In case of doubt abstain
  13713. * from calling this function or do it just before calling exit()
  13714. * to avoid leak reports from valgrind !
  13715. */
  13716. void
  13717. xmlCleanupParser(void) {
  13718. if (!xmlParserInitialized)
  13719. return;
  13720. xmlCleanupCharEncodingHandlers();
  13721. #ifdef LIBXML_CATALOG_ENABLED
  13722. xmlCatalogCleanup();
  13723. #endif
  13724. xmlDictCleanup();
  13725. xmlCleanupInputCallbacks();
  13726. #ifdef LIBXML_OUTPUT_ENABLED
  13727. xmlCleanupOutputCallbacks();
  13728. #endif
  13729. #ifdef LIBXML_SCHEMAS_ENABLED
  13730. xmlSchemaCleanupTypes();
  13731. xmlRelaxNGCleanupTypes();
  13732. #endif
  13733. xmlResetLastError();
  13734. xmlCleanupGlobals();
  13735. xmlCleanupThreads(); /* must be last if called not from the main thread */
  13736. xmlCleanupMemory();
  13737. xmlParserInitialized = 0;
  13738. }
  13739. #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
  13740. !defined(_WIN32)
  13741. static void
  13742. ATTRIBUTE_DESTRUCTOR
  13743. xmlDestructor(void) {
  13744. /*
  13745. * Calling custom deallocation functions in a destructor can cause
  13746. * problems, for example with Nokogiri.
  13747. */
  13748. if (xmlFree == free)
  13749. xmlCleanupParser();
  13750. }
  13751. #endif
  13752. /************************************************************************
  13753. * *
  13754. * New set (2.6.0) of simpler and more flexible APIs *
  13755. * *
  13756. ************************************************************************/
  13757. /**
  13758. * DICT_FREE:
  13759. * @str: a string
  13760. *
  13761. * Free a string if it is not owned by the "dict" dictionary in the
  13762. * current scope
  13763. */
  13764. #define DICT_FREE(str) \
  13765. if ((str) && ((!dict) || \
  13766. (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
  13767. xmlFree((char *)(str));
  13768. /**
  13769. * xmlCtxtReset:
  13770. * @ctxt: an XML parser context
  13771. *
  13772. * Reset a parser context
  13773. */
  13774. void
  13775. xmlCtxtReset(xmlParserCtxtPtr ctxt)
  13776. {
  13777. xmlParserInputPtr input;
  13778. xmlDictPtr dict;
  13779. if (ctxt == NULL)
  13780. return;
  13781. dict = ctxt->dict;
  13782. while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
  13783. xmlFreeInputStream(input);
  13784. }
  13785. ctxt->inputNr = 0;
  13786. ctxt->input = NULL;
  13787. ctxt->spaceNr = 0;
  13788. if (ctxt->spaceTab != NULL) {
  13789. ctxt->spaceTab[0] = -1;
  13790. ctxt->space = &ctxt->spaceTab[0];
  13791. } else {
  13792. ctxt->space = NULL;
  13793. }
  13794. ctxt->nodeNr = 0;
  13795. ctxt->node = NULL;
  13796. ctxt->nameNr = 0;
  13797. ctxt->name = NULL;
  13798. DICT_FREE(ctxt->version);
  13799. ctxt->version = NULL;
  13800. DICT_FREE(ctxt->encoding);
  13801. ctxt->encoding = NULL;
  13802. DICT_FREE(ctxt->directory);
  13803. ctxt->directory = NULL;
  13804. DICT_FREE(ctxt->extSubURI);
  13805. ctxt->extSubURI = NULL;
  13806. DICT_FREE(ctxt->extSubSystem);
  13807. ctxt->extSubSystem = NULL;
  13808. if (ctxt->myDoc != NULL)
  13809. xmlFreeDoc(ctxt->myDoc);
  13810. ctxt->myDoc = NULL;
  13811. ctxt->standalone = -1;
  13812. ctxt->hasExternalSubset = 0;
  13813. ctxt->hasPErefs = 0;
  13814. ctxt->html = 0;
  13815. ctxt->external = 0;
  13816. ctxt->instate = XML_PARSER_START;
  13817. ctxt->token = 0;
  13818. ctxt->wellFormed = 1;
  13819. ctxt->nsWellFormed = 1;
  13820. ctxt->disableSAX = 0;
  13821. ctxt->valid = 1;
  13822. #if 0
  13823. ctxt->vctxt.userData = ctxt;
  13824. ctxt->vctxt.error = xmlParserValidityError;
  13825. ctxt->vctxt.warning = xmlParserValidityWarning;
  13826. #endif
  13827. ctxt->record_info = 0;
  13828. ctxt->checkIndex = 0;
  13829. ctxt->inSubset = 0;
  13830. ctxt->errNo = XML_ERR_OK;
  13831. ctxt->depth = 0;
  13832. ctxt->charset = XML_CHAR_ENCODING_UTF8;
  13833. ctxt->catalogs = NULL;
  13834. ctxt->nbentities = 0;
  13835. ctxt->sizeentities = 0;
  13836. ctxt->sizeentcopy = 0;
  13837. xmlInitNodeInfoSeq(&ctxt->node_seq);
  13838. if (ctxt->attsDefault != NULL) {
  13839. xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
  13840. ctxt->attsDefault = NULL;
  13841. }
  13842. if (ctxt->attsSpecial != NULL) {
  13843. xmlHashFree(ctxt->attsSpecial, NULL);
  13844. ctxt->attsSpecial = NULL;
  13845. }
  13846. #ifdef LIBXML_CATALOG_ENABLED
  13847. if (ctxt->catalogs != NULL)
  13848. xmlCatalogFreeLocal(ctxt->catalogs);
  13849. #endif
  13850. if (ctxt->lastError.code != XML_ERR_OK)
  13851. xmlResetError(&ctxt->lastError);
  13852. }
  13853. /**
  13854. * xmlCtxtResetPush:
  13855. * @ctxt: an XML parser context
  13856. * @chunk: a pointer to an array of chars
  13857. * @size: number of chars in the array
  13858. * @filename: an optional file name or URI
  13859. * @encoding: the document encoding, or NULL
  13860. *
  13861. * Reset a push parser context
  13862. *
  13863. * Returns 0 in case of success and 1 in case of error
  13864. */
  13865. int
  13866. xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
  13867. int size, const char *filename, const char *encoding)
  13868. {
  13869. xmlParserInputPtr inputStream;
  13870. xmlParserInputBufferPtr buf;
  13871. xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
  13872. if (ctxt == NULL)
  13873. return(1);
  13874. if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
  13875. enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
  13876. buf = xmlAllocParserInputBuffer(enc);
  13877. if (buf == NULL)
  13878. return(1);
  13879. if (ctxt == NULL) {
  13880. xmlFreeParserInputBuffer(buf);
  13881. return(1);
  13882. }
  13883. xmlCtxtReset(ctxt);
  13884. if (filename == NULL) {
  13885. ctxt->directory = NULL;
  13886. } else {
  13887. ctxt->directory = xmlParserGetDirectory(filename);
  13888. }
  13889. inputStream = xmlNewInputStream(ctxt);
  13890. if (inputStream == NULL) {
  13891. xmlFreeParserInputBuffer(buf);
  13892. return(1);
  13893. }
  13894. if (filename == NULL)
  13895. inputStream->filename = NULL;
  13896. else
  13897. inputStream->filename = (char *)
  13898. xmlCanonicPath((const xmlChar *) filename);
  13899. inputStream->buf = buf;
  13900. xmlBufResetInput(buf->buffer, inputStream);
  13901. inputPush(ctxt, inputStream);
  13902. if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
  13903. (ctxt->input->buf != NULL)) {
  13904. size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
  13905. size_t cur = ctxt->input->cur - ctxt->input->base;
  13906. xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  13907. xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
  13908. #ifdef DEBUG_PUSH
  13909. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  13910. #endif
  13911. }
  13912. if (encoding != NULL) {
  13913. xmlCharEncodingHandlerPtr hdlr;
  13914. if (ctxt->encoding != NULL)
  13915. xmlFree((xmlChar *) ctxt->encoding);
  13916. ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
  13917. hdlr = xmlFindCharEncodingHandler(encoding);
  13918. if (hdlr != NULL) {
  13919. xmlSwitchToEncoding(ctxt, hdlr);
  13920. } else {
  13921. xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
  13922. "Unsupported encoding %s\n", BAD_CAST encoding);
  13923. }
  13924. } else if (enc != XML_CHAR_ENCODING_NONE) {
  13925. xmlSwitchEncoding(ctxt, enc);
  13926. }
  13927. return(0);
  13928. }
  13929. /**
  13930. * xmlCtxtUseOptionsInternal:
  13931. * @ctxt: an XML parser context
  13932. * @options: a combination of xmlParserOption
  13933. * @encoding: the user provided encoding to use
  13934. *
  13935. * Applies the options to the parser context
  13936. *
  13937. * Returns 0 in case of success, the set of unknown or unimplemented options
  13938. * in case of error.
  13939. */
  13940. static int
  13941. xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
  13942. {
  13943. if (ctxt == NULL)
  13944. return(-1);
  13945. if (encoding != NULL) {
  13946. if (ctxt->encoding != NULL)
  13947. xmlFree((xmlChar *) ctxt->encoding);
  13948. ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
  13949. }
  13950. if (options & XML_PARSE_RECOVER) {
  13951. ctxt->recovery = 1;
  13952. options -= XML_PARSE_RECOVER;
  13953. ctxt->options |= XML_PARSE_RECOVER;
  13954. } else
  13955. ctxt->recovery = 0;
  13956. if (options & XML_PARSE_DTDLOAD) {
  13957. ctxt->loadsubset = XML_DETECT_IDS;
  13958. options -= XML_PARSE_DTDLOAD;
  13959. ctxt->options |= XML_PARSE_DTDLOAD;
  13960. } else
  13961. ctxt->loadsubset = 0;
  13962. if (options & XML_PARSE_DTDATTR) {
  13963. ctxt->loadsubset |= XML_COMPLETE_ATTRS;
  13964. options -= XML_PARSE_DTDATTR;
  13965. ctxt->options |= XML_PARSE_DTDATTR;
  13966. }
  13967. if (options & XML_PARSE_NOENT) {
  13968. ctxt->replaceEntities = 1;
  13969. /* ctxt->loadsubset |= XML_DETECT_IDS; */
  13970. options -= XML_PARSE_NOENT;
  13971. ctxt->options |= XML_PARSE_NOENT;
  13972. } else
  13973. ctxt->replaceEntities = 0;
  13974. if (options & XML_PARSE_PEDANTIC) {
  13975. ctxt->pedantic = 1;
  13976. options -= XML_PARSE_PEDANTIC;
  13977. ctxt->options |= XML_PARSE_PEDANTIC;
  13978. } else
  13979. ctxt->pedantic = 0;
  13980. if (options & XML_PARSE_NOBLANKS) {
  13981. ctxt->keepBlanks = 0;
  13982. ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
  13983. options -= XML_PARSE_NOBLANKS;
  13984. ctxt->options |= XML_PARSE_NOBLANKS;
  13985. } else
  13986. ctxt->keepBlanks = 1;
  13987. if (options & XML_PARSE_DTDVALID) {
  13988. ctxt->validate = 1;
  13989. if (options & XML_PARSE_NOWARNING)
  13990. ctxt->vctxt.warning = NULL;
  13991. if (options & XML_PARSE_NOERROR)
  13992. ctxt->vctxt.error = NULL;
  13993. options -= XML_PARSE_DTDVALID;
  13994. ctxt->options |= XML_PARSE_DTDVALID;
  13995. } else
  13996. ctxt->validate = 0;
  13997. if (options & XML_PARSE_NOWARNING) {
  13998. ctxt->sax->warning = NULL;
  13999. options -= XML_PARSE_NOWARNING;
  14000. }
  14001. if (options & XML_PARSE_NOERROR) {
  14002. ctxt->sax->error = NULL;
  14003. ctxt->sax->fatalError = NULL;
  14004. options -= XML_PARSE_NOERROR;
  14005. }
  14006. #ifdef LIBXML_SAX1_ENABLED
  14007. if (options & XML_PARSE_SAX1) {
  14008. ctxt->sax->startElement = xmlSAX2StartElement;
  14009. ctxt->sax->endElement = xmlSAX2EndElement;
  14010. ctxt->sax->startElementNs = NULL;
  14011. ctxt->sax->endElementNs = NULL;
  14012. ctxt->sax->initialized = 1;
  14013. options -= XML_PARSE_SAX1;
  14014. ctxt->options |= XML_PARSE_SAX1;
  14015. }
  14016. #endif /* LIBXML_SAX1_ENABLED */
  14017. if (options & XML_PARSE_NODICT) {
  14018. ctxt->dictNames = 0;
  14019. options -= XML_PARSE_NODICT;
  14020. ctxt->options |= XML_PARSE_NODICT;
  14021. } else {
  14022. ctxt->dictNames = 1;
  14023. }
  14024. if (options & XML_PARSE_NOCDATA) {
  14025. ctxt->sax->cdataBlock = NULL;
  14026. options -= XML_PARSE_NOCDATA;
  14027. ctxt->options |= XML_PARSE_NOCDATA;
  14028. }
  14029. if (options & XML_PARSE_NSCLEAN) {
  14030. ctxt->options |= XML_PARSE_NSCLEAN;
  14031. options -= XML_PARSE_NSCLEAN;
  14032. }
  14033. if (options & XML_PARSE_NONET) {
  14034. ctxt->options |= XML_PARSE_NONET;
  14035. options -= XML_PARSE_NONET;
  14036. }
  14037. if (options & XML_PARSE_COMPACT) {
  14038. ctxt->options |= XML_PARSE_COMPACT;
  14039. options -= XML_PARSE_COMPACT;
  14040. }
  14041. if (options & XML_PARSE_OLD10) {
  14042. ctxt->options |= XML_PARSE_OLD10;
  14043. options -= XML_PARSE_OLD10;
  14044. }
  14045. if (options & XML_PARSE_NOBASEFIX) {
  14046. ctxt->options |= XML_PARSE_NOBASEFIX;
  14047. options -= XML_PARSE_NOBASEFIX;
  14048. }
  14049. if (options & XML_PARSE_HUGE) {
  14050. ctxt->options |= XML_PARSE_HUGE;
  14051. options -= XML_PARSE_HUGE;
  14052. if (ctxt->dict != NULL)
  14053. xmlDictSetLimit(ctxt->dict, 0);
  14054. }
  14055. if (options & XML_PARSE_OLDSAX) {
  14056. ctxt->options |= XML_PARSE_OLDSAX;
  14057. options -= XML_PARSE_OLDSAX;
  14058. }
  14059. if (options & XML_PARSE_IGNORE_ENC) {
  14060. ctxt->options |= XML_PARSE_IGNORE_ENC;
  14061. options -= XML_PARSE_IGNORE_ENC;
  14062. }
  14063. if (options & XML_PARSE_BIG_LINES) {
  14064. ctxt->options |= XML_PARSE_BIG_LINES;
  14065. options -= XML_PARSE_BIG_LINES;
  14066. }
  14067. ctxt->linenumbers = 1;
  14068. return (options);
  14069. }
  14070. /**
  14071. * xmlCtxtUseOptions:
  14072. * @ctxt: an XML parser context
  14073. * @options: a combination of xmlParserOption
  14074. *
  14075. * Applies the options to the parser context
  14076. *
  14077. * Returns 0 in case of success, the set of unknown or unimplemented options
  14078. * in case of error.
  14079. */
  14080. int
  14081. xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
  14082. {
  14083. return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
  14084. }
  14085. /**
  14086. * xmlDoRead:
  14087. * @ctxt: an XML parser context
  14088. * @URL: the base URL to use for the document
  14089. * @encoding: the document encoding, or NULL
  14090. * @options: a combination of xmlParserOption
  14091. * @reuse: keep the context for reuse
  14092. *
  14093. * Common front-end for the xmlRead functions
  14094. *
  14095. * Returns the resulting document tree or NULL
  14096. */
  14097. static xmlDocPtr
  14098. xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
  14099. int options, int reuse)
  14100. {
  14101. xmlDocPtr ret;
  14102. xmlCtxtUseOptionsInternal(ctxt, options, encoding);
  14103. if (encoding != NULL) {
  14104. xmlCharEncodingHandlerPtr hdlr;
  14105. hdlr = xmlFindCharEncodingHandler(encoding);
  14106. if (hdlr != NULL)
  14107. xmlSwitchToEncoding(ctxt, hdlr);
  14108. }
  14109. if ((URL != NULL) && (ctxt->input != NULL) &&
  14110. (ctxt->input->filename == NULL))
  14111. ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
  14112. xmlParseDocument(ctxt);
  14113. if ((ctxt->wellFormed) || ctxt->recovery)
  14114. ret = ctxt->myDoc;
  14115. else {
  14116. ret = NULL;
  14117. if (ctxt->myDoc != NULL) {
  14118. xmlFreeDoc(ctxt->myDoc);
  14119. }
  14120. }
  14121. ctxt->myDoc = NULL;
  14122. if (!reuse) {
  14123. xmlFreeParserCtxt(ctxt);
  14124. }
  14125. return (ret);
  14126. }
  14127. /**
  14128. * xmlReadDoc:
  14129. * @cur: a pointer to a zero terminated string
  14130. * @URL: the base URL to use for the document
  14131. * @encoding: the document encoding, or NULL
  14132. * @options: a combination of xmlParserOption
  14133. *
  14134. * parse an XML in-memory document and build a tree.
  14135. *
  14136. * Returns the resulting document tree
  14137. */
  14138. xmlDocPtr
  14139. xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
  14140. {
  14141. xmlParserCtxtPtr ctxt;
  14142. if (cur == NULL)
  14143. return (NULL);
  14144. xmlInitParser();
  14145. ctxt = xmlCreateDocParserCtxt(cur);
  14146. if (ctxt == NULL)
  14147. return (NULL);
  14148. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  14149. }
  14150. /**
  14151. * xmlReadFile:
  14152. * @filename: a file or URL
  14153. * @encoding: the document encoding, or NULL
  14154. * @options: a combination of xmlParserOption
  14155. *
  14156. * parse an XML file from the filesystem or the network.
  14157. *
  14158. * Returns the resulting document tree
  14159. */
  14160. xmlDocPtr
  14161. xmlReadFile(const char *filename, const char *encoding, int options)
  14162. {
  14163. xmlParserCtxtPtr ctxt;
  14164. xmlInitParser();
  14165. ctxt = xmlCreateURLParserCtxt(filename, options);
  14166. if (ctxt == NULL)
  14167. return (NULL);
  14168. return (xmlDoRead(ctxt, NULL, encoding, options, 0));
  14169. }
  14170. /**
  14171. * xmlReadMemory:
  14172. * @buffer: a pointer to a char array
  14173. * @size: the size of the array
  14174. * @URL: the base URL to use for the document
  14175. * @encoding: the document encoding, or NULL
  14176. * @options: a combination of xmlParserOption
  14177. *
  14178. * parse an XML in-memory document and build a tree.
  14179. *
  14180. * Returns the resulting document tree
  14181. */
  14182. xmlDocPtr
  14183. xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
  14184. {
  14185. xmlParserCtxtPtr ctxt;
  14186. xmlInitParser();
  14187. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  14188. if (ctxt == NULL)
  14189. return (NULL);
  14190. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  14191. }
  14192. /**
  14193. * xmlReadFd:
  14194. * @fd: an open file descriptor
  14195. * @URL: the base URL to use for the document
  14196. * @encoding: the document encoding, or NULL
  14197. * @options: a combination of xmlParserOption
  14198. *
  14199. * parse an XML from a file descriptor and build a tree.
  14200. * NOTE that the file descriptor will not be closed when the
  14201. * reader is closed or reset.
  14202. *
  14203. * Returns the resulting document tree
  14204. */
  14205. xmlDocPtr
  14206. xmlReadFd(int fd, const char *URL, const char *encoding, int options)
  14207. {
  14208. xmlParserCtxtPtr ctxt;
  14209. xmlParserInputBufferPtr input;
  14210. xmlParserInputPtr stream;
  14211. if (fd < 0)
  14212. return (NULL);
  14213. xmlInitParser();
  14214. input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
  14215. if (input == NULL)
  14216. return (NULL);
  14217. input->closecallback = NULL;
  14218. ctxt = xmlNewParserCtxt();
  14219. if (ctxt == NULL) {
  14220. xmlFreeParserInputBuffer(input);
  14221. return (NULL);
  14222. }
  14223. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14224. if (stream == NULL) {
  14225. xmlFreeParserInputBuffer(input);
  14226. xmlFreeParserCtxt(ctxt);
  14227. return (NULL);
  14228. }
  14229. inputPush(ctxt, stream);
  14230. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  14231. }
  14232. /**
  14233. * xmlReadIO:
  14234. * @ioread: an I/O read function
  14235. * @ioclose: an I/O close function
  14236. * @ioctx: an I/O handler
  14237. * @URL: the base URL to use for the document
  14238. * @encoding: the document encoding, or NULL
  14239. * @options: a combination of xmlParserOption
  14240. *
  14241. * parse an XML document from I/O functions and source and build a tree.
  14242. *
  14243. * Returns the resulting document tree
  14244. */
  14245. xmlDocPtr
  14246. xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
  14247. void *ioctx, const char *URL, const char *encoding, int options)
  14248. {
  14249. xmlParserCtxtPtr ctxt;
  14250. xmlParserInputBufferPtr input;
  14251. xmlParserInputPtr stream;
  14252. if (ioread == NULL)
  14253. return (NULL);
  14254. xmlInitParser();
  14255. input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
  14256. XML_CHAR_ENCODING_NONE);
  14257. if (input == NULL) {
  14258. if (ioclose != NULL)
  14259. ioclose(ioctx);
  14260. return (NULL);
  14261. }
  14262. ctxt = xmlNewParserCtxt();
  14263. if (ctxt == NULL) {
  14264. xmlFreeParserInputBuffer(input);
  14265. return (NULL);
  14266. }
  14267. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14268. if (stream == NULL) {
  14269. xmlFreeParserInputBuffer(input);
  14270. xmlFreeParserCtxt(ctxt);
  14271. return (NULL);
  14272. }
  14273. inputPush(ctxt, stream);
  14274. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  14275. }
  14276. /**
  14277. * xmlCtxtReadDoc:
  14278. * @ctxt: an XML parser context
  14279. * @cur: a pointer to a zero terminated string
  14280. * @URL: the base URL to use for the document
  14281. * @encoding: the document encoding, or NULL
  14282. * @options: a combination of xmlParserOption
  14283. *
  14284. * parse an XML in-memory document and build a tree.
  14285. * This reuses the existing @ctxt parser context
  14286. *
  14287. * Returns the resulting document tree
  14288. */
  14289. xmlDocPtr
  14290. xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
  14291. const char *URL, const char *encoding, int options)
  14292. {
  14293. xmlParserInputPtr stream;
  14294. if (cur == NULL)
  14295. return (NULL);
  14296. if (ctxt == NULL)
  14297. return (NULL);
  14298. xmlInitParser();
  14299. xmlCtxtReset(ctxt);
  14300. stream = xmlNewStringInputStream(ctxt, cur);
  14301. if (stream == NULL) {
  14302. return (NULL);
  14303. }
  14304. inputPush(ctxt, stream);
  14305. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  14306. }
  14307. /**
  14308. * xmlCtxtReadFile:
  14309. * @ctxt: an XML parser context
  14310. * @filename: a file or URL
  14311. * @encoding: the document encoding, or NULL
  14312. * @options: a combination of xmlParserOption
  14313. *
  14314. * parse an XML file from the filesystem or the network.
  14315. * This reuses the existing @ctxt parser context
  14316. *
  14317. * Returns the resulting document tree
  14318. */
  14319. xmlDocPtr
  14320. xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
  14321. const char *encoding, int options)
  14322. {
  14323. xmlParserInputPtr stream;
  14324. if (filename == NULL)
  14325. return (NULL);
  14326. if (ctxt == NULL)
  14327. return (NULL);
  14328. xmlInitParser();
  14329. xmlCtxtReset(ctxt);
  14330. stream = xmlLoadExternalEntity(filename, NULL, ctxt);
  14331. if (stream == NULL) {
  14332. return (NULL);
  14333. }
  14334. inputPush(ctxt, stream);
  14335. return (xmlDoRead(ctxt, NULL, encoding, options, 1));
  14336. }
  14337. /**
  14338. * xmlCtxtReadMemory:
  14339. * @ctxt: an XML parser context
  14340. * @buffer: a pointer to a char array
  14341. * @size: the size of the array
  14342. * @URL: the base URL to use for the document
  14343. * @encoding: the document encoding, or NULL
  14344. * @options: a combination of xmlParserOption
  14345. *
  14346. * parse an XML in-memory document and build a tree.
  14347. * This reuses the existing @ctxt parser context
  14348. *
  14349. * Returns the resulting document tree
  14350. */
  14351. xmlDocPtr
  14352. xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
  14353. const char *URL, const char *encoding, int options)
  14354. {
  14355. xmlParserInputBufferPtr input;
  14356. xmlParserInputPtr stream;
  14357. if (ctxt == NULL)
  14358. return (NULL);
  14359. if (buffer == NULL)
  14360. return (NULL);
  14361. xmlInitParser();
  14362. xmlCtxtReset(ctxt);
  14363. input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
  14364. if (input == NULL) {
  14365. return(NULL);
  14366. }
  14367. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14368. if (stream == NULL) {
  14369. xmlFreeParserInputBuffer(input);
  14370. return(NULL);
  14371. }
  14372. inputPush(ctxt, stream);
  14373. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  14374. }
  14375. /**
  14376. * xmlCtxtReadFd:
  14377. * @ctxt: an XML parser context
  14378. * @fd: an open file descriptor
  14379. * @URL: the base URL to use for the document
  14380. * @encoding: the document encoding, or NULL
  14381. * @options: a combination of xmlParserOption
  14382. *
  14383. * parse an XML from a file descriptor and build a tree.
  14384. * This reuses the existing @ctxt parser context
  14385. * NOTE that the file descriptor will not be closed when the
  14386. * reader is closed or reset.
  14387. *
  14388. * Returns the resulting document tree
  14389. */
  14390. xmlDocPtr
  14391. xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
  14392. const char *URL, const char *encoding, int options)
  14393. {
  14394. xmlParserInputBufferPtr input;
  14395. xmlParserInputPtr stream;
  14396. if (fd < 0)
  14397. return (NULL);
  14398. if (ctxt == NULL)
  14399. return (NULL);
  14400. xmlInitParser();
  14401. xmlCtxtReset(ctxt);
  14402. input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
  14403. if (input == NULL)
  14404. return (NULL);
  14405. input->closecallback = NULL;
  14406. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14407. if (stream == NULL) {
  14408. xmlFreeParserInputBuffer(input);
  14409. return (NULL);
  14410. }
  14411. inputPush(ctxt, stream);
  14412. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  14413. }
  14414. /**
  14415. * xmlCtxtReadIO:
  14416. * @ctxt: an XML parser context
  14417. * @ioread: an I/O read function
  14418. * @ioclose: an I/O close function
  14419. * @ioctx: an I/O handler
  14420. * @URL: the base URL to use for the document
  14421. * @encoding: the document encoding, or NULL
  14422. * @options: a combination of xmlParserOption
  14423. *
  14424. * parse an XML document from I/O functions and source and build a tree.
  14425. * This reuses the existing @ctxt parser context
  14426. *
  14427. * Returns the resulting document tree
  14428. */
  14429. xmlDocPtr
  14430. xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
  14431. xmlInputCloseCallback ioclose, void *ioctx,
  14432. const char *URL,
  14433. const char *encoding, int options)
  14434. {
  14435. xmlParserInputBufferPtr input;
  14436. xmlParserInputPtr stream;
  14437. if (ioread == NULL)
  14438. return (NULL);
  14439. if (ctxt == NULL)
  14440. return (NULL);
  14441. xmlInitParser();
  14442. xmlCtxtReset(ctxt);
  14443. input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
  14444. XML_CHAR_ENCODING_NONE);
  14445. if (input == NULL) {
  14446. if (ioclose != NULL)
  14447. ioclose(ioctx);
  14448. return (NULL);
  14449. }
  14450. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  14451. if (stream == NULL) {
  14452. xmlFreeParserInputBuffer(input);
  14453. return (NULL);
  14454. }
  14455. inputPush(ctxt, stream);
  14456. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  14457. }
  14458. #define bottom_parser
  14459. #include "elfgcchack.h"