apr_xml.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976
  1. /* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
  2. * applicable.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "apr.h"
  17. #include "apr_strings.h"
  18. #define APR_WANT_STDIO /* for sprintf() */
  19. #define APR_WANT_STRFUNC
  20. #include "apr_want.h"
  21. #include "apr_xml.h"
  22. #include "apu_config.h"
  23. #ifdef APR_HAVE_OLD_EXPAT
  24. #include "xmlparse.h"
  25. #else
  26. #include "expat.h"
  27. #endif
  28. #define DEBUG_CR "\r\n"
  29. static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
  30. static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
  31. static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
  32. /* errors related to namespace processing */
  33. #define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
  34. #define APR_XML_NS_ERROR_INVALID_DECL (-1001)
  35. /* test for a namespace prefix that begins with [Xx][Mm][Ll] */
  36. #define APR_XML_NS_IS_RESERVED(name) \
  37. ( (name[0] == 0x58 || name[0] == 0x78) && \
  38. (name[1] == 0x4D || name[1] == 0x6D) && \
  39. (name[2] == 0x4C || name[2] == 0x6C) )
  40. /* the real (internal) definition of the parser context */
  41. struct apr_xml_parser {
  42. apr_xml_doc *doc; /* the doc we're parsing */
  43. apr_pool_t *p; /* the pool we allocate from */
  44. apr_xml_elem *cur_elem; /* current element */
  45. int error; /* an error has occurred */
  46. #define APR_XML_ERROR_EXPAT 1
  47. #define APR_XML_ERROR_PARSE_DONE 2
  48. /* also: public APR_XML_NS_ERROR_* values (if any) */
  49. XML_Parser xp; /* the actual (Expat) XML parser */
  50. enum XML_Error xp_err; /* stored Expat error code */
  51. };
  52. /* struct for scoping namespace declarations */
  53. typedef struct apr_xml_ns_scope {
  54. const char *prefix; /* prefix used for this ns */
  55. int ns; /* index into namespace table */
  56. int emptyURI; /* the namespace URI is the empty string */
  57. struct apr_xml_ns_scope *next; /* next scoped namespace */
  58. } apr_xml_ns_scope;
  59. /* return namespace table index for a given prefix */
  60. static int find_prefix(apr_xml_parser *parser, const char *prefix)
  61. {
  62. apr_xml_elem *elem = parser->cur_elem;
  63. /*
  64. ** Walk up the tree, looking for a namespace scope that defines this
  65. ** prefix.
  66. */
  67. for (; elem; elem = elem->parent) {
  68. apr_xml_ns_scope *ns_scope;
  69. for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
  70. if (strcmp(prefix, ns_scope->prefix) == 0) {
  71. if (ns_scope->emptyURI) {
  72. /*
  73. ** It is possible to set the default namespace to an
  74. ** empty URI string; this resets the default namespace
  75. ** to mean "no namespace." We just found the prefix
  76. ** refers to an empty URI, so return "no namespace."
  77. */
  78. return APR_XML_NS_NONE;
  79. }
  80. return ns_scope->ns;
  81. }
  82. }
  83. }
  84. /*
  85. * If the prefix is empty (""), this means that a prefix was not
  86. * specified in the element/attribute. The search that was performed
  87. * just above did not locate a default namespace URI (which is stored
  88. * into ns_scope with an empty prefix). This means the element/attribute
  89. * has "no namespace". We have a reserved value for this.
  90. */
  91. if (*prefix == '\0') {
  92. return APR_XML_NS_NONE;
  93. }
  94. /* not found */
  95. return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
  96. }
  97. static void start_handler(void *userdata, const char *name, const char **attrs)
  98. {
  99. apr_xml_parser *parser = userdata;
  100. apr_xml_elem *elem;
  101. apr_xml_attr *attr;
  102. apr_xml_attr *prev;
  103. char *colon;
  104. const char *quoted;
  105. char *elem_name;
  106. /* punt once we find an error */
  107. if (parser->error)
  108. return;
  109. elem = apr_pcalloc(parser->p, sizeof(*elem));
  110. /* prep the element */
  111. elem->name = elem_name = apr_pstrdup(parser->p, name);
  112. /* fill in the attributes (note: ends up in reverse order) */
  113. while (*attrs) {
  114. attr = apr_palloc(parser->p, sizeof(*attr));
  115. attr->name = apr_pstrdup(parser->p, *attrs++);
  116. attr->value = apr_pstrdup(parser->p, *attrs++);
  117. attr->next = elem->attr;
  118. elem->attr = attr;
  119. }
  120. /* hook the element into the tree */
  121. if (parser->cur_elem == NULL) {
  122. /* no current element; this also becomes the root */
  123. parser->cur_elem = parser->doc->root = elem;
  124. }
  125. else {
  126. /* this element appeared within the current elem */
  127. elem->parent = parser->cur_elem;
  128. /* set up the child/sibling links */
  129. if (elem->parent->last_child == NULL) {
  130. /* no first child either */
  131. elem->parent->first_child = elem->parent->last_child = elem;
  132. }
  133. else {
  134. /* hook onto the end of the parent's children */
  135. elem->parent->last_child->next = elem;
  136. elem->parent->last_child = elem;
  137. }
  138. /* this element is now the current element */
  139. parser->cur_elem = elem;
  140. }
  141. /* scan the attributes for namespace declarations */
  142. for (prev = NULL, attr = elem->attr;
  143. attr;
  144. attr = attr->next) {
  145. if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) {
  146. const char *prefix = &attr->name[5];
  147. apr_xml_ns_scope *ns_scope;
  148. /* test for xmlns:foo= form and xmlns= form */
  149. if (*prefix == 0x3A) {
  150. /* a namespace prefix declaration must have a
  151. non-empty value. */
  152. if (attr->value[0] == '\0') {
  153. parser->error = APR_XML_NS_ERROR_INVALID_DECL;
  154. return;
  155. }
  156. ++prefix;
  157. }
  158. else if (*prefix != '\0') {
  159. /* advance "prev" since "attr" is still present */
  160. prev = attr;
  161. continue;
  162. }
  163. /* quote the URI before we ever start working with it */
  164. quoted = apr_xml_quote_string(parser->p, attr->value, 1);
  165. /* build and insert the new scope */
  166. ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
  167. ns_scope->prefix = prefix;
  168. ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
  169. ns_scope->emptyURI = *quoted == '\0';
  170. ns_scope->next = elem->ns_scope;
  171. elem->ns_scope = ns_scope;
  172. /* remove this attribute from the element */
  173. if (prev == NULL)
  174. elem->attr = attr->next;
  175. else
  176. prev->next = attr->next;
  177. /* Note: prev will not be advanced since we just removed "attr" */
  178. }
  179. else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) {
  180. /* save away the language (in quoted form) */
  181. elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
  182. /* remove this attribute from the element */
  183. if (prev == NULL)
  184. elem->attr = attr->next;
  185. else
  186. prev->next = attr->next;
  187. /* Note: prev will not be advanced since we just removed "attr" */
  188. }
  189. else {
  190. /* advance "prev" since "attr" is still present */
  191. prev = attr;
  192. }
  193. }
  194. /*
  195. ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
  196. ** language from the parent element (if present).
  197. **
  198. ** NOTE: elem_size() *depends* upon this pointer equality.
  199. */
  200. if (elem->lang == NULL && elem->parent != NULL)
  201. elem->lang = elem->parent->lang;
  202. /* adjust the element's namespace */
  203. colon = strchr(elem_name, 0x3A);
  204. if (colon == NULL) {
  205. /*
  206. * The element is using the default namespace, which will always
  207. * be found. Either it will be "no namespace", or a default
  208. * namespace URI has been specified at some point.
  209. */
  210. elem->ns = find_prefix(parser, "");
  211. }
  212. else if (APR_XML_NS_IS_RESERVED(elem->name)) {
  213. elem->ns = APR_XML_NS_NONE;
  214. }
  215. else {
  216. *colon = '\0';
  217. elem->ns = find_prefix(parser, elem->name);
  218. elem->name = colon + 1;
  219. if (APR_XML_NS_IS_ERROR(elem->ns)) {
  220. parser->error = elem->ns;
  221. return;
  222. }
  223. }
  224. /* adjust all remaining attributes' namespaces */
  225. for (attr = elem->attr; attr; attr = attr->next) {
  226. /*
  227. * apr_xml_attr defines this as "const" but we dup'd it, so we
  228. * know that we can change it. a bit hacky, but the existing
  229. * structure def is best.
  230. */
  231. char *attr_name = (char *)attr->name;
  232. colon = strchr(attr_name, 0x3A);
  233. if (colon == NULL) {
  234. /*
  235. * Attributes do NOT use the default namespace. Therefore,
  236. * we place them into the "no namespace" category.
  237. */
  238. attr->ns = APR_XML_NS_NONE;
  239. }
  240. else if (APR_XML_NS_IS_RESERVED(attr->name)) {
  241. attr->ns = APR_XML_NS_NONE;
  242. }
  243. else {
  244. *colon = '\0';
  245. attr->ns = find_prefix(parser, attr->name);
  246. attr->name = colon + 1;
  247. if (APR_XML_NS_IS_ERROR(attr->ns)) {
  248. parser->error = attr->ns;
  249. return;
  250. }
  251. }
  252. }
  253. }
  254. static void end_handler(void *userdata, const char *name)
  255. {
  256. apr_xml_parser *parser = userdata;
  257. /* punt once we find an error */
  258. if (parser->error)
  259. return;
  260. /* pop up one level */
  261. parser->cur_elem = parser->cur_elem->parent;
  262. }
  263. static void cdata_handler(void *userdata, const char *data, int len)
  264. {
  265. apr_xml_parser *parser = userdata;
  266. apr_xml_elem *elem;
  267. apr_text_header *hdr;
  268. const char *s;
  269. /* punt once we find an error */
  270. if (parser->error)
  271. return;
  272. elem = parser->cur_elem;
  273. s = apr_pstrndup(parser->p, data, len);
  274. if (elem->last_child == NULL) {
  275. /* no children yet. this cdata follows the start tag */
  276. hdr = &elem->first_cdata;
  277. }
  278. else {
  279. /* child elements exist. this cdata follows the last child. */
  280. hdr = &elem->last_child->following_cdata;
  281. }
  282. apr_text_append(parser->p, hdr, s);
  283. }
  284. static apr_status_t cleanup_parser(void *ctx)
  285. {
  286. apr_xml_parser *parser = ctx;
  287. XML_ParserFree(parser->xp);
  288. parser->xp = NULL;
  289. return APR_SUCCESS;
  290. }
  291. APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
  292. {
  293. apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
  294. parser->p = pool;
  295. parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
  296. parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
  297. /* ### is there a way to avoid hard-coding this? */
  298. apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
  299. parser->xp = XML_ParserCreate(NULL);
  300. if (parser->xp == NULL) {
  301. (*apr_pool_abort_get(pool))(APR_ENOMEM);
  302. return NULL;
  303. }
  304. apr_pool_cleanup_register(pool, parser, cleanup_parser,
  305. apr_pool_cleanup_null);
  306. XML_SetUserData(parser->xp, parser);
  307. XML_SetElementHandler(parser->xp, start_handler, end_handler);
  308. XML_SetCharacterDataHandler(parser->xp, cdata_handler);
  309. return parser;
  310. }
  311. static apr_status_t do_parse(apr_xml_parser *parser,
  312. const char *data, apr_size_t len,
  313. int is_final)
  314. {
  315. if (parser->xp == NULL) {
  316. parser->error = APR_XML_ERROR_PARSE_DONE;
  317. }
  318. else {
  319. int rv = XML_Parse(parser->xp, data, len, is_final);
  320. if (rv == 0) {
  321. parser->error = APR_XML_ERROR_EXPAT;
  322. parser->xp_err = XML_GetErrorCode(parser->xp);
  323. }
  324. }
  325. /* ### better error code? */
  326. return parser->error ? APR_EGENERAL : APR_SUCCESS;
  327. }
  328. APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
  329. const char *data,
  330. apr_size_t len)
  331. {
  332. return do_parse(parser, data, len, 0 /* is_final */);
  333. }
  334. APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
  335. apr_xml_doc **pdoc)
  336. {
  337. char end;
  338. apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
  339. /* get rid of the parser */
  340. (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
  341. if (status)
  342. return status;
  343. if (pdoc != NULL)
  344. *pdoc = parser->doc;
  345. return APR_SUCCESS;
  346. }
  347. APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
  348. char *errbuf,
  349. apr_size_t errbufsize)
  350. {
  351. int error = parser->error;
  352. const char *msg;
  353. /* clear our record of an error */
  354. parser->error = 0;
  355. switch (error) {
  356. case 0:
  357. msg = "No error.";
  358. break;
  359. case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
  360. msg = "An undefined namespace prefix was used.";
  361. break;
  362. case APR_XML_NS_ERROR_INVALID_DECL:
  363. msg = "A namespace prefix was defined with an empty URI.";
  364. break;
  365. case APR_XML_ERROR_EXPAT:
  366. (void) apr_snprintf(errbuf, errbufsize,
  367. "XML parser error code: %s (%d)",
  368. XML_ErrorString(parser->xp_err), parser->xp_err);
  369. return errbuf;
  370. case APR_XML_ERROR_PARSE_DONE:
  371. msg = "The parser is not active.";
  372. break;
  373. default:
  374. msg = "There was an unknown error within the XML body.";
  375. break;
  376. }
  377. (void) apr_cpystrn(errbuf, msg, errbufsize);
  378. return errbuf;
  379. }
  380. APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
  381. apr_xml_parser **parser,
  382. apr_xml_doc **ppdoc,
  383. apr_file_t *xmlfd,
  384. apr_size_t buffer_length)
  385. {
  386. apr_status_t rv;
  387. char *buffer;
  388. apr_size_t length;
  389. *parser = apr_xml_parser_create(p);
  390. if (*parser == NULL) {
  391. /* FIXME: returning an error code would be nice,
  392. * but we dont get one ;( */
  393. return APR_EGENERAL;
  394. }
  395. buffer = apr_palloc(p, buffer_length);
  396. length = buffer_length;
  397. rv = apr_file_read(xmlfd, buffer, &length);
  398. while (rv == APR_SUCCESS) {
  399. rv = apr_xml_parser_feed(*parser, buffer, length);
  400. if (rv != APR_SUCCESS) {
  401. return rv;
  402. }
  403. length = buffer_length;
  404. rv = apr_file_read(xmlfd, buffer, &length);
  405. }
  406. if (rv != APR_EOF) {
  407. return rv;
  408. }
  409. rv = apr_xml_parser_done(*parser, ppdoc);
  410. *parser = NULL;
  411. return rv;
  412. }
  413. APU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr,
  414. const char *text)
  415. {
  416. apr_text *t = apr_palloc(p, sizeof(*t));
  417. t->text = text;
  418. t->next = NULL;
  419. if (hdr->first == NULL) {
  420. /* no text elements yet */
  421. hdr->first = hdr->last = t;
  422. }
  423. else {
  424. /* append to the last text element */
  425. hdr->last->next = t;
  426. hdr->last = t;
  427. }
  428. }
  429. /* ---------------------------------------------------------------
  430. **
  431. ** XML UTILITY FUNCTIONS
  432. */
  433. /*
  434. ** apr_xml_quote_string: quote an XML string
  435. **
  436. ** Replace '<', '>', and '&' with '&lt;', '&gt;', and '&amp;'.
  437. ** If quotes is true, then replace '"' with '&quot;'.
  438. **
  439. ** quotes is typically set to true for XML strings that will occur within
  440. ** double quotes -- attribute values.
  441. */
  442. APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
  443. int quotes)
  444. {
  445. const char *scan;
  446. apr_size_t len = 0;
  447. apr_size_t extra = 0;
  448. char *qstr;
  449. char *qscan;
  450. char c;
  451. for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
  452. if (c == '<' || c == '>')
  453. extra += 3; /* &lt; or &gt; */
  454. else if (c == '&')
  455. extra += 4; /* &amp; */
  456. else if (quotes && c == '"')
  457. extra += 5; /* &quot; */
  458. }
  459. /* nothing to do? */
  460. if (extra == 0)
  461. return s;
  462. qstr = apr_palloc(p, len + extra + 1);
  463. for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
  464. if (c == '<') {
  465. *qscan++ = '&';
  466. *qscan++ = 'l';
  467. *qscan++ = 't';
  468. *qscan++ = ';';
  469. }
  470. else if (c == '>') {
  471. *qscan++ = '&';
  472. *qscan++ = 'g';
  473. *qscan++ = 't';
  474. *qscan++ = ';';
  475. }
  476. else if (c == '&') {
  477. *qscan++ = '&';
  478. *qscan++ = 'a';
  479. *qscan++ = 'm';
  480. *qscan++ = 'p';
  481. *qscan++ = ';';
  482. }
  483. else if (quotes && c == '"') {
  484. *qscan++ = '&';
  485. *qscan++ = 'q';
  486. *qscan++ = 'u';
  487. *qscan++ = 'o';
  488. *qscan++ = 't';
  489. *qscan++ = ';';
  490. }
  491. else {
  492. *qscan++ = c;
  493. }
  494. }
  495. *qscan = '\0';
  496. return qstr;
  497. }
  498. /* how many characters for the given integer? */
  499. #define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
  500. (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
  501. (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
  502. (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
  503. static apr_size_t text_size(const apr_text *t)
  504. {
  505. apr_size_t size = 0;
  506. for (; t; t = t->next)
  507. size += strlen(t->text);
  508. return size;
  509. }
  510. static apr_size_t elem_size(const apr_xml_elem *elem, int style,
  511. apr_array_header_t *namespaces, int *ns_map)
  512. {
  513. apr_size_t size;
  514. if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
  515. const apr_xml_attr *attr;
  516. size = 0;
  517. if (style == APR_XML_X2T_FULL_NS_LANG) {
  518. int i;
  519. /*
  520. ** The outer element will contain xmlns:ns%d="%s" attributes
  521. ** and an xml:lang attribute, if applicable.
  522. */
  523. for (i = namespaces->nelts; i--;) {
  524. /* compute size of: ' xmlns:ns%d="%s"' */
  525. size += (9 + APR_XML_NS_LEN(i) + 2 +
  526. strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1);
  527. }
  528. if (elem->lang != NULL) {
  529. /* compute size of: ' xml:lang="%s"' */
  530. size += 11 + strlen(elem->lang) + 1;
  531. }
  532. }
  533. if (elem->ns == APR_XML_NS_NONE) {
  534. /* compute size of: <%s> */
  535. size += 1 + strlen(elem->name) + 1;
  536. }
  537. else {
  538. int ns = ns_map ? ns_map[elem->ns] : elem->ns;
  539. /* compute size of: <ns%d:%s> */
  540. size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
  541. }
  542. if (APR_XML_ELEM_IS_EMPTY(elem)) {
  543. /* insert a closing "/" */
  544. size += 1;
  545. }
  546. else {
  547. /*
  548. * two of above plus "/":
  549. * <ns%d:%s> ... </ns%d:%s>
  550. * OR <%s> ... </%s>
  551. */
  552. size = 2 * size + 1;
  553. }
  554. for (attr = elem->attr; attr; attr = attr->next) {
  555. if (attr->ns == APR_XML_NS_NONE) {
  556. /* compute size of: ' %s="%s"' */
  557. size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
  558. }
  559. else {
  560. /* compute size of: ' ns%d:%s="%s"' */
  561. size += 3 + APR_XML_NS_LEN(attr->ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
  562. }
  563. }
  564. /*
  565. ** If the element has an xml:lang value that is *different* from
  566. ** its parent, then add the thing in: ' xml:lang="%s"'.
  567. **
  568. ** NOTE: we take advantage of the pointer equality established by
  569. ** the parsing for "inheriting" the xml:lang values from parents.
  570. */
  571. if (elem->lang != NULL &&
  572. (elem->parent == NULL || elem->lang != elem->parent->lang)) {
  573. size += 11 + strlen(elem->lang) + 1;
  574. }
  575. }
  576. else if (style == APR_XML_X2T_LANG_INNER) {
  577. /*
  578. * This style prepends the xml:lang value plus a null terminator.
  579. * If a lang value is not present, then we insert a null term.
  580. */
  581. size = elem->lang ? strlen(elem->lang) + 1 : 1;
  582. }
  583. else
  584. size = 0;
  585. size += text_size(elem->first_cdata.first);
  586. for (elem = elem->first_child; elem; elem = elem->next) {
  587. /* the size of the child element plus the CDATA that follows it */
  588. size += (elem_size(elem, APR_XML_X2T_FULL, NULL, ns_map) +
  589. text_size(elem->following_cdata.first));
  590. }
  591. return size;
  592. }
  593. static char *write_text(char *s, const apr_text *t)
  594. {
  595. for (; t; t = t->next) {
  596. apr_size_t len = strlen(t->text);
  597. memcpy(s, t->text, len);
  598. s += len;
  599. }
  600. return s;
  601. }
  602. static char *write_elem(char *s, const apr_xml_elem *elem, int style,
  603. apr_array_header_t *namespaces, int *ns_map)
  604. {
  605. const apr_xml_elem *child;
  606. apr_size_t len;
  607. int ns;
  608. if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
  609. int empty = APR_XML_ELEM_IS_EMPTY(elem);
  610. const apr_xml_attr *attr;
  611. if (elem->ns == APR_XML_NS_NONE) {
  612. len = sprintf(s, "<%s", elem->name);
  613. }
  614. else {
  615. ns = ns_map ? ns_map[elem->ns] : elem->ns;
  616. len = sprintf(s, "<ns%d:%s", ns, elem->name);
  617. }
  618. s += len;
  619. for (attr = elem->attr; attr; attr = attr->next) {
  620. if (attr->ns == APR_XML_NS_NONE)
  621. len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
  622. else
  623. len = sprintf(s, " ns%d:%s=\"%s\"", attr->ns, attr->name, attr->value);
  624. s += len;
  625. }
  626. /* add the xml:lang value if necessary */
  627. if (elem->lang != NULL &&
  628. (style == APR_XML_X2T_FULL_NS_LANG ||
  629. elem->parent == NULL ||
  630. elem->lang != elem->parent->lang)) {
  631. len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
  632. s += len;
  633. }
  634. /* add namespace definitions, if required */
  635. if (style == APR_XML_X2T_FULL_NS_LANG) {
  636. int i;
  637. for (i = namespaces->nelts; i--;) {
  638. len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
  639. APR_XML_GET_URI_ITEM(namespaces, i));
  640. s += len;
  641. }
  642. }
  643. /* no more to do. close it up and go. */
  644. if (empty) {
  645. *s++ = '/';
  646. *s++ = '>';
  647. return s;
  648. }
  649. /* just close it */
  650. *s++ = '>';
  651. }
  652. else if (style == APR_XML_X2T_LANG_INNER) {
  653. /* prepend the xml:lang value */
  654. if (elem->lang != NULL) {
  655. len = strlen(elem->lang);
  656. memcpy(s, elem->lang, len);
  657. s += len;
  658. }
  659. *s++ = '\0';
  660. }
  661. s = write_text(s, elem->first_cdata.first);
  662. for (child = elem->first_child; child; child = child->next) {
  663. s = write_elem(s, child, APR_XML_X2T_FULL, NULL, ns_map);
  664. s = write_text(s, child->following_cdata.first);
  665. }
  666. if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
  667. if (elem->ns == APR_XML_NS_NONE) {
  668. len = sprintf(s, "</%s>", elem->name);
  669. }
  670. else {
  671. ns = ns_map ? ns_map[elem->ns] : elem->ns;
  672. len = sprintf(s, "</ns%d:%s>", ns, elem->name);
  673. }
  674. s += len;
  675. }
  676. return s;
  677. }
  678. APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem)
  679. {
  680. apr_text *scan_txt;
  681. apr_xml_attr *scan_attr;
  682. apr_xml_elem *scan_elem;
  683. /* convert the element's text */
  684. for (scan_txt = elem->first_cdata.first;
  685. scan_txt != NULL;
  686. scan_txt = scan_txt->next) {
  687. scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
  688. }
  689. for (scan_txt = elem->following_cdata.first;
  690. scan_txt != NULL;
  691. scan_txt = scan_txt->next) {
  692. scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
  693. }
  694. /* convert the attribute values */
  695. for (scan_attr = elem->attr;
  696. scan_attr != NULL;
  697. scan_attr = scan_attr->next) {
  698. scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1);
  699. }
  700. /* convert the child elements */
  701. for (scan_elem = elem->first_child;
  702. scan_elem != NULL;
  703. scan_elem = scan_elem->next) {
  704. apr_xml_quote_elem(p, scan_elem);
  705. }
  706. }
  707. /* convert an element to a text string */
  708. APU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem,
  709. int style, apr_array_header_t *namespaces,
  710. int *ns_map, const char **pbuf,
  711. apr_size_t *psize)
  712. {
  713. /* get the exact size, plus a null terminator */
  714. apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
  715. char *s = apr_palloc(p, size);
  716. (void) write_elem(s, elem, style, namespaces, ns_map);
  717. s[size - 1] = '\0';
  718. *pbuf = s;
  719. if (psize)
  720. *psize = size;
  721. }
  722. APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p,
  723. const apr_xml_elem *elem)
  724. {
  725. if (elem->ns == APR_XML_NS_NONE) {
  726. /*
  727. * The prefix (xml...) is already within the prop name, or
  728. * the element simply has no prefix.
  729. */
  730. return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
  731. }
  732. return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
  733. }
  734. /* return the URI's (existing) index, or insert it and return a new index */
  735. APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
  736. const char *uri)
  737. {
  738. int i;
  739. const char **pelt;
  740. /* never insert an empty URI; this index is always APR_XML_NS_NONE */
  741. if (*uri == '\0')
  742. return APR_XML_NS_NONE;
  743. for (i = uri_array->nelts; i--;) {
  744. if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0)
  745. return i;
  746. }
  747. pelt = apr_array_push(uri_array);
  748. *pelt = uri; /* assume uri is const or in a pool */
  749. return uri_array->nelts - 1;
  750. }
  751. /* convert the element to EBCDIC */
  752. #if APR_CHARSET_EBCDIC
  753. static apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e,
  754. apr_xlate_t *convset)
  755. {
  756. apr_xml_attr *a;
  757. apr_xml_elem *ec;
  758. apr_text *t;
  759. apr_size_t inbytes_left, outbytes_left;
  760. apr_status_t status;
  761. inbytes_left = outbytes_left = strlen(e->name);
  762. status = apr_xlate_conv_buffer(convset, e->name, &inbytes_left, (char *) e->name, &outbytes_left);
  763. if (status) {
  764. return status;
  765. }
  766. for (t = e->first_cdata.first; t != NULL; t = t->next) {
  767. inbytes_left = outbytes_left = strlen(t->text);
  768. status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
  769. if (status) {
  770. return status;
  771. }
  772. }
  773. for (t = e->following_cdata.first; t != NULL; t = t->next) {
  774. inbytes_left = outbytes_left = strlen(t->text);
  775. status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
  776. if (status) {
  777. return status;
  778. }
  779. }
  780. for (a = e->attr; a != NULL; a = a->next) {
  781. inbytes_left = outbytes_left = strlen(a->name);
  782. status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left);
  783. if (status) {
  784. return status;
  785. }
  786. inbytes_left = outbytes_left = strlen(a->value);
  787. status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left);
  788. if (status) {
  789. return status;
  790. }
  791. }
  792. for (ec = e->first_child; ec != NULL; ec = ec->next) {
  793. status = apr_xml_parser_convert_elem(ec, convset);
  794. if (status) {
  795. return status;
  796. }
  797. }
  798. return APR_SUCCESS;
  799. }
  800. /* convert the whole document to EBCDIC */
  801. APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool,
  802. apr_xml_doc *pdoc,
  803. apr_xlate_t *convset)
  804. {
  805. apr_status_t status;
  806. /* Don't convert the namespaces: they are constant! */
  807. if (pdoc->namespaces != NULL) {
  808. int i;
  809. apr_array_header_t *namespaces;
  810. namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *));
  811. if (namespaces == NULL)
  812. return APR_ENOMEM;
  813. for (i = 0; i < pdoc->namespaces->nelts; i++) {
  814. apr_size_t inbytes_left, outbytes_left;
  815. char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i);
  816. ptr = apr_pstrdup(pool, ptr);
  817. if ( ptr == NULL)
  818. return APR_ENOMEM;
  819. inbytes_left = outbytes_left = strlen(ptr);
  820. status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left);
  821. if (status) {
  822. return status;
  823. }
  824. apr_xml_insert_uri(namespaces, ptr);
  825. }
  826. pdoc->namespaces = namespaces;
  827. }
  828. return apr_xml_parser_convert_elem(pdoc->root, convset);
  829. }
  830. #endif