123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
- /* iksemel (XML parser for Jabber)
- ** Copyright (C) 2000-2003 Gurer Ozen <madcat@e-kolay.net>
- ** This code is free software; you can redistribute it and/or
- ** modify it under the terms of GNU Lesser General Public License.
- */
- #include "common.h"
- #include "iksemel.h"
- struct hash_s;
- typedef struct hash_s hash;
- hash *hash_new (unsigned int table_size);
- char *hash_insert (hash *table, const char *name);
- void hash_print (hash *h, char *title_fmt, char *line_fmt);
- void hash_delete (hash *table);
- #include <sys/stat.h>
- #ifdef HAVE_GETOPT_LONG
- #include <getopt.h>
- #endif
- #ifdef HAVE_GETOPT_LONG
- static struct option longopts[] = {
- { "stats", 0, 0, 's' },
- { "histogram", 0, 0, 't' },
- { "help", 0, 0, 'h' },
- { "version", 0, 0, 'V' },
- { 0, 0, 0, 0 }
- };
- #endif
- static char *shortopts = "sthV";
- static void
- print_usage (void)
- {
- puts ("Usage: ikslint [OPTIONS] FILE\n"
- "This tool checks the well-formedness of an XML document.\n"
- " -s, --stats Print statistics.\n"
- " -t, --histogram Print tag histogram.\n"
- " -h, --help Print this text and exit.\n"
- " -V, --version Print version and exit.\n"
- #ifndef HAVE_GETOPT_LONG
- "(long options are not supported on your system)\n"
- #endif
- "Report bugs to <iksemel-dev@jabberstudio.org>.");
- }
- /* calculate and print statistics */
- int lint_pr_stats = 0;
- /* print tag histogram */
- int lint_pr_hist = 0;
- hash *tag_table;
- char **tag_list;
- int tag_size, tag_pos;
- void
- tag_push (const char *name)
- {
- if (!tag_list) {
- tag_size = 128;
- tag_list = malloc (sizeof (char *) * tag_size);
- if (!tag_list) exit (2);
- }
- tag_list[tag_pos] = hash_insert (tag_table, name);
- if (!tag_list[tag_pos]) exit (2);
- tag_pos++;
- if (tag_pos == tag_size) {
- char **tmp;
- tmp = malloc (sizeof (char *) * tag_size * 2);
- if (!tmp) exit (2);
- memcpy (tmp, tag_list, sizeof (char *) * tag_size);
- free (tag_list);
- tag_list = tmp;
- tag_size *= 2;
- }
- }
- char *
- tag_pull (void)
- {
- tag_pos--;
- return tag_list[tag_pos];
- }
- struct stats {
- unsigned int level;
- unsigned int max_depth;
- unsigned int nr_tags;
- unsigned int nr_stags;
- unsigned int cdata_size;
- };
- int
- tagHook (void *udata, char *name, char **atts, int type)
- {
- struct stats *st = (struct stats *) udata;
- char *tmp;
- switch (type) {
- case IKS_OPEN:
- tag_push (name);
- st->level++;
- if (st->level > st->max_depth) st->max_depth = st->level;
- break;
- case IKS_CLOSE:
- tmp = tag_pull ();
- if (iks_strcmp (tmp, name) != 0) {
- fprintf (stderr, "Tag mismatch, expecting '%s', got '%s'.\n",
- tmp, name);
- return IKS_HOOK;
- }
- st->level--;
- st->nr_tags++;
- break;
- case IKS_SINGLE:
- if (NULL == hash_insert (tag_table, name)) exit (2);
- st->nr_stags++;
- break;
- }
- return IKS_OK;
- }
- int
- cdataHook (void *udata, char *data, size_t len)
- {
- struct stats *st = (struct stats *) udata;
- st->cdata_size += len;
- return IKS_OK;
- }
- void
- check_file (char *fname)
- {
- iksparser *prs;
- struct stats st;
- FILE *f;
- char *buf;
- struct stat fs;
- size_t sz, blk, ret, pos;
- enum ikserror err;
- int done;
- memset (&st, 0, sizeof (struct stats));
- prs = iks_sax_new (&st, tagHook, cdataHook);
- if (NULL == prs) exit (2);
- if (fname) {
- if (stat (fname, &fs) != 0) {
- fprintf (stderr, "Cannot access file '%s'.\n", fname);
- exit (1);
- }
- sz = fs.st_size;
- #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
- blk = fs.st_blksize;
- #else
- blk = 4096;
- #endif
- f = fopen (fname, "r");
- if (!f) {
- fprintf (stderr, "Cannot open file '%s'.\n", fname);
- exit (1);
- }
- buf = malloc (blk);
- if (!buf) {
- fclose (f);
- fprintf (stderr, "Cannot allocate %d bytes.\n", blk);
- exit (2);
- }
- } else {
- f = stdin;
- blk = 4096;
- sz = 0;
- buf = malloc (blk);
- if (!buf) exit (2);
- }
- tag_table = hash_new (367);
- if (!tag_table) exit (2);
- pos = 0;
- done = 0;
- while (0 == done) {
- ret = fread (buf, 1, blk, f);
- pos += ret;
- if (feof (f)) {
- done = 1;
- } else {
- if (ret != blk) {
- if (fname)
- fprintf (stderr, "Read error in file '%s'.\n", fname);
- else
- fprintf (stderr, "Read error in stream.\n");
- exit (1);
- }
- }
- err = iks_parse (prs, buf, ret, done);
- switch (err) {
- case IKS_OK:
- break;
- case IKS_NOMEM:
- exit (2);
- case IKS_BADXML:
- if (fname)
- fprintf (stderr, "Invalid xml at byte %ld, line %ld in file '%s'.\n",
- iks_nr_bytes (prs), iks_nr_lines (prs), fname);
- else
- fprintf (stderr, "Invalid xml at byte %ld, line %ld in stream.\n",
- iks_nr_bytes (prs), iks_nr_lines (prs));
- exit (1);
- case IKS_HOOK:
- if (fname)
- fprintf (stderr, "Byte %ld, line %ld in file '%s'.\n",
- iks_nr_bytes (prs), iks_nr_lines (prs), fname);
- else
- fprintf (stderr, "Byte %ld, line %ld in stream.\n",
- iks_nr_bytes (prs), iks_nr_lines (prs));
- exit (1);
- }
- }
- free (buf);
- if (fname) fclose (f);
- if (fname && (lint_pr_stats || lint_pr_hist)) {
- printf ("File '%s' (%d bytes):\n", fname, sz);
- }
- if (lint_pr_stats) {
- printf ("Tags: %d pairs, %d single, %d max depth.\n", st.nr_tags, st.nr_stags, st.max_depth);
- printf ("Total size of character data: %d bytes.\n", st.cdata_size);
- }
- if (lint_pr_hist) {
- hash_print (tag_table,
- "Histogram of %d unique tags:\n",
- "<%s> %d times.\n");
- }
- hash_delete (tag_table);
- iks_parser_delete (prs);
- }
- int
- main (int argc, char *argv[])
- {
- int c;
- #ifdef HAVE_GETOPT_LONG
- int i;
- while ((c = getopt_long (argc, argv, shortopts, longopts, &i)) != -1) {
- #else
- while ((c = getopt (argc, argv, shortopts)) != -1) {
- #endif
- switch (c) {
- case 's':
- lint_pr_stats = 1;
- break;
- case 't':
- lint_pr_hist = 1;
- break;
- case 'h':
- print_usage ();
- exit (0);
- case 'V':
- puts ("ikslint (iksemel) "VERSION);
- exit (0);
- }
- }
- if (!argv[optind]) {
- check_file (NULL);
- } else {
- for (; optind < argc; optind++) {
- check_file (argv[optind]);
- }
- }
- return 0;
- }
|