ikslint.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /* iksemel (XML parser for Jabber)
  2. ** Copyright (C) 2000-2003 Gurer Ozen <madcat@e-kolay.net>
  3. ** This code is free software; you can redistribute it and/or
  4. ** modify it under the terms of GNU Lesser General Public License.
  5. */
  6. #include "common.h"
  7. #include "iksemel.h"
  8. struct hash_s;
  9. typedef struct hash_s hash;
  10. hash *hash_new (unsigned int table_size);
  11. char *hash_insert (hash *table, const char *name);
  12. void hash_print (hash *h, char *title_fmt, char *line_fmt);
  13. void hash_delete (hash *table);
  14. #include <sys/stat.h>
  15. #ifdef HAVE_GETOPT_LONG
  16. #include <getopt.h>
  17. #endif
  18. #ifdef HAVE_GETOPT_LONG
  19. static struct option longopts[] = {
  20. { "stats", 0, 0, 's' },
  21. { "histogram", 0, 0, 't' },
  22. { "help", 0, 0, 'h' },
  23. { "version", 0, 0, 'V' },
  24. { 0, 0, 0, 0 }
  25. };
  26. #endif
  27. static char *shortopts = "sthV";
  28. static void
  29. print_usage (void)
  30. {
  31. puts ("Usage: ikslint [OPTIONS] FILE\n"
  32. "This tool checks the well-formedness of an XML document.\n"
  33. " -s, --stats Print statistics.\n"
  34. " -t, --histogram Print tag histogram.\n"
  35. " -h, --help Print this text and exit.\n"
  36. " -V, --version Print version and exit.\n"
  37. #ifndef HAVE_GETOPT_LONG
  38. "(long options are not supported on your system)\n"
  39. #endif
  40. "Report bugs to <iksemel-dev@jabberstudio.org>.");
  41. }
  42. /* calculate and print statistics */
  43. int lint_pr_stats = 0;
  44. /* print tag histogram */
  45. int lint_pr_hist = 0;
  46. hash *tag_table;
  47. char **tag_list;
  48. int tag_size, tag_pos;
  49. void
  50. tag_push (const char *name)
  51. {
  52. if (!tag_list) {
  53. tag_size = 128;
  54. tag_list = malloc (sizeof (char *) * tag_size);
  55. if (!tag_list) exit (2);
  56. }
  57. tag_list[tag_pos] = hash_insert (tag_table, name);
  58. if (!tag_list[tag_pos]) exit (2);
  59. tag_pos++;
  60. if (tag_pos == tag_size) {
  61. char **tmp;
  62. tmp = malloc (sizeof (char *) * tag_size * 2);
  63. if (!tmp) exit (2);
  64. memcpy (tmp, tag_list, sizeof (char *) * tag_size);
  65. free (tag_list);
  66. tag_list = tmp;
  67. tag_size *= 2;
  68. }
  69. }
  70. char *
  71. tag_pull (void)
  72. {
  73. tag_pos--;
  74. return tag_list[tag_pos];
  75. }
  76. struct stats {
  77. unsigned int level;
  78. unsigned int max_depth;
  79. unsigned int nr_tags;
  80. unsigned int nr_stags;
  81. unsigned int cdata_size;
  82. };
  83. int
  84. tagHook (void *udata, char *name, char **atts, int type)
  85. {
  86. struct stats *st = (struct stats *) udata;
  87. char *tmp;
  88. switch (type) {
  89. case IKS_OPEN:
  90. tag_push (name);
  91. st->level++;
  92. if (st->level > st->max_depth) st->max_depth = st->level;
  93. break;
  94. case IKS_CLOSE:
  95. tmp = tag_pull ();
  96. if (iks_strcmp (tmp, name) != 0) {
  97. fprintf (stderr, "Tag mismatch, expecting '%s', got '%s'.\n",
  98. tmp, name);
  99. return IKS_HOOK;
  100. }
  101. st->level--;
  102. st->nr_tags++;
  103. break;
  104. case IKS_SINGLE:
  105. if (NULL == hash_insert (tag_table, name)) exit (2);
  106. st->nr_stags++;
  107. break;
  108. }
  109. return IKS_OK;
  110. }
  111. int
  112. cdataHook (void *udata, char *data, size_t len)
  113. {
  114. struct stats *st = (struct stats *) udata;
  115. st->cdata_size += len;
  116. return IKS_OK;
  117. }
  118. void
  119. check_file (char *fname)
  120. {
  121. iksparser *prs;
  122. struct stats st;
  123. FILE *f;
  124. char *buf;
  125. struct stat fs;
  126. size_t sz, blk, ret, pos;
  127. enum ikserror err;
  128. int done;
  129. memset (&st, 0, sizeof (struct stats));
  130. prs = iks_sax_new (&st, tagHook, cdataHook);
  131. if (NULL == prs) exit (2);
  132. if (fname) {
  133. if (stat (fname, &fs) != 0) {
  134. fprintf (stderr, "Cannot access file '%s'.\n", fname);
  135. exit (1);
  136. }
  137. sz = fs.st_size;
  138. #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
  139. blk = fs.st_blksize;
  140. #else
  141. blk = 4096;
  142. #endif
  143. f = fopen (fname, "r");
  144. if (!f) {
  145. fprintf (stderr, "Cannot open file '%s'.\n", fname);
  146. exit (1);
  147. }
  148. buf = malloc (blk);
  149. if (!buf) {
  150. fclose (f);
  151. fprintf (stderr, "Cannot allocate %d bytes.\n", blk);
  152. exit (2);
  153. }
  154. } else {
  155. f = stdin;
  156. blk = 4096;
  157. sz = 0;
  158. buf = malloc (blk);
  159. if (!buf) exit (2);
  160. }
  161. tag_table = hash_new (367);
  162. if (!tag_table) exit (2);
  163. pos = 0;
  164. done = 0;
  165. while (0 == done) {
  166. ret = fread (buf, 1, blk, f);
  167. pos += ret;
  168. if (feof (f)) {
  169. done = 1;
  170. } else {
  171. if (ret != blk) {
  172. if (fname)
  173. fprintf (stderr, "Read error in file '%s'.\n", fname);
  174. else
  175. fprintf (stderr, "Read error in stream.\n");
  176. exit (1);
  177. }
  178. }
  179. err = iks_parse (prs, buf, ret, done);
  180. switch (err) {
  181. case IKS_OK:
  182. break;
  183. case IKS_NOMEM:
  184. exit (2);
  185. case IKS_BADXML:
  186. if (fname)
  187. fprintf (stderr, "Invalid xml at byte %ld, line %ld in file '%s'.\n",
  188. iks_nr_bytes (prs), iks_nr_lines (prs), fname);
  189. else
  190. fprintf (stderr, "Invalid xml at byte %ld, line %ld in stream.\n",
  191. iks_nr_bytes (prs), iks_nr_lines (prs));
  192. exit (1);
  193. case IKS_HOOK:
  194. if (fname)
  195. fprintf (stderr, "Byte %ld, line %ld in file '%s'.\n",
  196. iks_nr_bytes (prs), iks_nr_lines (prs), fname);
  197. else
  198. fprintf (stderr, "Byte %ld, line %ld in stream.\n",
  199. iks_nr_bytes (prs), iks_nr_lines (prs));
  200. exit (1);
  201. }
  202. }
  203. free (buf);
  204. if (fname) fclose (f);
  205. if (fname && (lint_pr_stats || lint_pr_hist)) {
  206. printf ("File '%s' (%d bytes):\n", fname, sz);
  207. }
  208. if (lint_pr_stats) {
  209. printf ("Tags: %d pairs, %d single, %d max depth.\n", st.nr_tags, st.nr_stags, st.max_depth);
  210. printf ("Total size of character data: %d bytes.\n", st.cdata_size);
  211. }
  212. if (lint_pr_hist) {
  213. hash_print (tag_table,
  214. "Histogram of %d unique tags:\n",
  215. "<%s> %d times.\n");
  216. }
  217. hash_delete (tag_table);
  218. iks_parser_delete (prs);
  219. }
  220. int
  221. main (int argc, char *argv[])
  222. {
  223. int c;
  224. #ifdef HAVE_GETOPT_LONG
  225. int i;
  226. while ((c = getopt_long (argc, argv, shortopts, longopts, &i)) != -1) {
  227. #else
  228. while ((c = getopt (argc, argv, shortopts)) != -1) {
  229. #endif
  230. switch (c) {
  231. case 's':
  232. lint_pr_stats = 1;
  233. break;
  234. case 't':
  235. lint_pr_hist = 1;
  236. break;
  237. case 'h':
  238. print_usage ();
  239. exit (0);
  240. case 'V':
  241. puts ("ikslint (iksemel) "VERSION);
  242. exit (0);
  243. }
  244. }
  245. if (!argv[optind]) {
  246. check_file (NULL);
  247. } else {
  248. for (; optind < argc; optind++) {
  249. check_file (argv[optind]);
  250. }
  251. }
  252. return 0;
  253. }