apr_xml.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
  2. * applicable.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /**
  17. * @file apr_xml.h
  18. * @brief APR-UTIL XML Library
  19. */
  20. #ifndef APR_XML_H
  21. #define APR_XML_H
  22. /**
  23. * @defgroup APR_Util_XML XML
  24. * @ingroup APR_Util
  25. * @{
  26. */
  27. #include "apr_pools.h"
  28. #include "apr_tables.h"
  29. #include "apr_file_io.h"
  30. #include "apu.h"
  31. #if APR_CHARSET_EBCDIC
  32. #include "apr_xlate.h"
  33. #endif
  34. #ifdef __cplusplus
  35. extern "C" {
  36. #endif
  37. /**
  38. * @package Apache XML library
  39. */
  40. /* -------------------------------------------------------------------- */
  41. /* ### these will need to move at some point to a more logical spot */
  42. /** @see apr_text */
  43. typedef struct apr_text apr_text;
  44. /** Structure to keep a linked list of pieces of text */
  45. struct apr_text {
  46. /** The current piece of text */
  47. const char *text;
  48. /** a pointer to the next piece of text */
  49. struct apr_text *next;
  50. };
  51. /** @see apr_text_header */
  52. typedef struct apr_text_header apr_text_header;
  53. /** A list of pieces of text */
  54. struct apr_text_header {
  55. /** The first piece of text in the list */
  56. apr_text *first;
  57. /** The last piece of text in the list */
  58. apr_text *last;
  59. };
  60. /**
  61. * Append a piece of text to the end of a list
  62. * @param p The pool to allocate out of
  63. * @param hdr The text header to append to
  64. * @param text The new text to append
  65. */
  66. APU_DECLARE(void) apr_text_append(apr_pool_t *p, apr_text_header *hdr,
  67. const char *text);
  68. /* --------------------------------------------------------------------
  69. **
  70. ** XML PARSING
  71. */
  72. /*
  73. ** Qualified namespace values
  74. **
  75. ** APR_XML_NS_DAV_ID
  76. ** We always insert the "DAV:" namespace URI at the head of the
  77. ** namespace array. This means that it will always be at ID==0,
  78. ** making it much easier to test for.
  79. **
  80. ** APR_XML_NS_NONE
  81. ** This special ID is used for two situations:
  82. **
  83. ** 1) The namespace prefix begins with "xml" (and we do not know
  84. ** what it means). Namespace prefixes with "xml" (any case) as
  85. ** their first three characters are reserved by the XML Namespaces
  86. ** specification for future use. mod_dav will pass these through
  87. ** unchanged. When this identifier is used, the prefix is LEFT in
  88. ** the element/attribute name. Downstream processing should not
  89. ** prepend another prefix.
  90. **
  91. ** 2) The element/attribute does not have a namespace.
  92. **
  93. ** a) No prefix was used, and a default namespace has not been
  94. ** defined.
  95. ** b) No prefix was used, and the default namespace was specified
  96. ** to mean "no namespace". This is done with a namespace
  97. ** declaration of: xmlns=""
  98. ** (this declaration is typically used to override a previous
  99. ** specification for the default namespace)
  100. **
  101. ** In these cases, we need to record that the elem/attr has no
  102. ** namespace so that we will not attempt to prepend a prefix.
  103. ** All namespaces that are used will have a prefix assigned to
  104. ** them -- mod_dav will never set or use the default namespace
  105. ** when generating XML. This means that "no prefix" will always
  106. ** mean "no namespace".
  107. **
  108. ** In both cases, the XML generation will avoid prepending a prefix.
  109. ** For the first case, this means the original prefix/name will be
  110. ** inserted into the output stream. For the latter case, it means
  111. ** the name will have no prefix, and since we never define a default
  112. ** namespace, this means it will have no namespace.
  113. **
  114. ** Note: currently, mod_dav understands the "xmlns" prefix and the
  115. ** "xml:lang" attribute. These are handled specially (they aren't
  116. ** left within the XML tree), so the APR_XML_NS_NONE value won't ever
  117. ** really apply to these values.
  118. */
  119. #define APR_XML_NS_DAV_ID 0 /**< namespace ID for "DAV:" */
  120. #define APR_XML_NS_NONE -10 /**< no namespace for this elem/attr */
  121. #define APR_XML_NS_ERROR_BASE -100 /**< used only during processing */
  122. /** Is this namespace an error? */
  123. #define APR_XML_NS_IS_ERROR(e) ((e) <= APR_XML_NS_ERROR_BASE)
  124. /** @see apr_xml_attr */
  125. typedef struct apr_xml_attr apr_xml_attr;
  126. /** @see apr_xml_elem */
  127. typedef struct apr_xml_elem apr_xml_elem;
  128. /** @see apr_xml_doc */
  129. typedef struct apr_xml_doc apr_xml_doc;
  130. /** apr_xml_attr: holds a parsed XML attribute */
  131. struct apr_xml_attr {
  132. /** attribute name */
  133. const char *name;
  134. /** index into namespace array */
  135. int ns;
  136. /** attribute value */
  137. const char *value;
  138. /** next attribute */
  139. struct apr_xml_attr *next;
  140. };
  141. /** apr_xml_elem: holds a parsed XML element */
  142. struct apr_xml_elem {
  143. /** element name */
  144. const char *name;
  145. /** index into namespace array */
  146. int ns;
  147. /** xml:lang for attrs/contents */
  148. const char *lang;
  149. /** cdata right after start tag */
  150. apr_text_header first_cdata;
  151. /** cdata after MY end tag */
  152. apr_text_header following_cdata;
  153. /** parent element */
  154. struct apr_xml_elem *parent;
  155. /** next (sibling) element */
  156. struct apr_xml_elem *next;
  157. /** first child element */
  158. struct apr_xml_elem *first_child;
  159. /** first attribute */
  160. struct apr_xml_attr *attr;
  161. /* used only during parsing */
  162. /** last child element */
  163. struct apr_xml_elem *last_child;
  164. /** namespaces scoped by this elem */
  165. struct apr_xml_ns_scope *ns_scope;
  166. /* used by modules during request processing */
  167. /** Place for modules to store private data */
  168. void *priv;
  169. };
  170. /** Is this XML element empty? */
  171. #define APR_XML_ELEM_IS_EMPTY(e) ((e)->first_child == NULL && \
  172. (e)->first_cdata.first == NULL)
  173. /** apr_xml_doc: holds a parsed XML document */
  174. struct apr_xml_doc {
  175. /** root element */
  176. apr_xml_elem *root;
  177. /** array of namespaces used */
  178. apr_array_header_t *namespaces;
  179. };
  180. /** Opaque XML parser structure */
  181. typedef struct apr_xml_parser apr_xml_parser;
  182. /**
  183. * Create an XML parser
  184. * @param pool The pool for allocating the parser and the parse results.
  185. * @return The new parser.
  186. */
  187. APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool);
  188. /**
  189. * Parse a File, producing a xml_doc
  190. * @param p The pool for allocating the parse results.
  191. * @param parser A pointer to *parser (needed so calling function can get
  192. * errors), will be set to NULL on successfull completion.
  193. * @param ppdoc A pointer to *apr_xml_doc (which has the parsed results in it)
  194. * @param xmlfd A file to read from.
  195. * @param buffer_length Buffer length which would be suitable
  196. * @return Any errors found during parsing.
  197. */
  198. APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
  199. apr_xml_parser **parser,
  200. apr_xml_doc **ppdoc,
  201. apr_file_t *xmlfd,
  202. apr_size_t buffer_length);
  203. /**
  204. * Feed input into the parser
  205. * @param parser The XML parser for parsing this data.
  206. * @param data The data to parse.
  207. * @param len The length of the data.
  208. * @return Any errors found during parsing.
  209. * @remark Use apr_xml_parser_geterror() to get more error information.
  210. */
  211. APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
  212. const char *data,
  213. apr_size_t len);
  214. /**
  215. * Terminate the parsing and return the result
  216. * @param parser The XML parser for parsing this data.
  217. * @param pdoc The resulting parse information. May be NULL to simply
  218. * terminate the parsing without fetching the info.
  219. * @return Any errors found during the final stage of parsing.
  220. * @remark Use apr_xml_parser_geterror() to get more error information.
  221. */
  222. APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
  223. apr_xml_doc **pdoc);
  224. /**
  225. * Fetch additional error information from the parser.
  226. * @param parser The XML parser to query for errors.
  227. * @param errbuf A buffer for storing error text.
  228. * @param errbufsize The length of the error text buffer.
  229. * @return The error buffer
  230. */
  231. APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
  232. char *errbuf,
  233. apr_size_t errbufsize);
  234. /**
  235. * Converts an XML element tree to flat text
  236. * @param p The pool to allocate out of
  237. * @param elem The XML element to convert
  238. * @param style How to covert the XML. One of:
  239. * <PRE>
  240. * APR_XML_X2T_FULL start tag, contents, end tag
  241. * APR_XML_X2T_INNER contents only
  242. * APR_XML_X2T_LANG_INNER xml:lang + inner contents
  243. * APR_XML_X2T_FULL_NS_LANG FULL + ns defns + xml:lang
  244. * </PRE>
  245. * @param namespaces The namespace of the current XML element
  246. * @param ns_map Namespace mapping
  247. * @param pbuf Buffer to put the converted text into
  248. * @param psize Size of the converted text
  249. */
  250. APU_DECLARE(void) apr_xml_to_text(apr_pool_t *p, const apr_xml_elem *elem,
  251. int style, apr_array_header_t *namespaces,
  252. int *ns_map, const char **pbuf,
  253. apr_size_t *psize);
  254. /* style argument values: */
  255. #define APR_XML_X2T_FULL 0 /**< start tag, contents, end tag */
  256. #define APR_XML_X2T_INNER 1 /**< contents only */
  257. #define APR_XML_X2T_LANG_INNER 2 /**< xml:lang + inner contents */
  258. #define APR_XML_X2T_FULL_NS_LANG 3 /**< FULL + ns defns + xml:lang */
  259. /**
  260. * empty XML element
  261. * @param p The pool to allocate out of
  262. * @param elem The XML element to empty
  263. * @return the string that was stored in the XML element
  264. */
  265. APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t *p,
  266. const apr_xml_elem *elem);
  267. /**
  268. * quote an XML string
  269. * Replace '<', '>', and '&' with '&lt;', '&gt;', and '&amp;'.
  270. * @param p The pool to allocate out of
  271. * @param s The string to quote
  272. * @param quotes If quotes is true, then replace '"' with '&quot;'.
  273. * @return The quoted string
  274. * @note If the string does not contain special characters, it is not
  275. * duplicated into the pool and the original string is returned.
  276. */
  277. APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
  278. int quotes);
  279. /**
  280. * Quote an XML element
  281. * @param p The pool to allocate out of
  282. * @param elem The element to quote
  283. */
  284. APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem);
  285. /* manage an array of unique URIs: apr_xml_insert_uri() and APR_XML_URI_ITEM() */
  286. /**
  287. * return the URI's (existing) index, or insert it and return a new index
  288. * @param uri_array array to insert into
  289. * @param uri The uri to insert
  290. * @return int The uri's index
  291. */
  292. APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
  293. const char *uri);
  294. /** Get the URI item for this XML element */
  295. #define APR_XML_GET_URI_ITEM(ary, i) (((const char * const *)(ary)->elts)[i])
  296. #if APR_CHARSET_EBCDIC
  297. /**
  298. * Convert parsed tree in EBCDIC
  299. * @param p The pool to allocate out of
  300. * @param pdoc The apr_xml_doc to convert.
  301. * @param xlate The translation handle to use.
  302. * @return Any errors found during conversion.
  303. */
  304. APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *p,
  305. apr_xml_doc *pdoc,
  306. apr_xlate_t *convset);
  307. #endif
  308. #ifdef __cplusplus
  309. }
  310. #endif
  311. /** @} */
  312. #endif /* APR_XML_H */