binmode-rpc2xml-rpc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. #!/usr/bin/perl -w
  2. use strict;
  3. # Some constants.
  4. my $crlf = "\015\012";
  5. # Try to load our external libraries, but fail gracefully.
  6. eval {
  7. require Frontier::Client;
  8. require MIME::Base64;
  9. };
  10. if ($@) {
  11. print STDERR <<"EOD";
  12. This script requires Ken MacLeod\'s Frontier::RPC2 module. You can get this
  13. from CPAN or from his website at http://bitsko.slc.ut.us/~ken/xml-rpc/ .
  14. For installation instructions, see the XML-RPC HOWTO at:
  15. http://www.linuxdoc.org/HOWTO/XML-RPC-HOWTO/index.html
  16. This script also requires MIME::Base64. You can get this from CPAN.
  17. EOD
  18. exit 1;
  19. }
  20. # Parse our command-line arguments.
  21. if (@ARGV != 0) {
  22. print STDERR "Usage: binmode-rpc2xml-rpc < data.binmode > data.xml\n";
  23. exit 1;
  24. }
  25. # Perform our I/O in binary mode (hence the name of the protocol).
  26. binmode STDIN; # Because we're reading raw binary data.
  27. binmode STDOUT; # Because we want our XML left unmolested.
  28. # Just suck all our input into one string and glom it together.
  29. my $binmode_data = join('', <STDIN>);
  30. # Check for the mandatory header.
  31. unless ($binmode_data =~ /^binmode-rpc:/) {
  32. die "$0: No 'binmode-rpc:' header present, stopping";
  33. }
  34. # Set our decoding-position counter to point just past the header, and
  35. # our end pointer to just beyond the end of the entire message.
  36. my $position = length('binmode-rpc:');
  37. my $end = length($binmode_data);
  38. # Set our starting output indentation to zero (for the pretty-printer).
  39. my $indentation = 0;
  40. # Build an empty codebook of strings.
  41. my @codebook;
  42. # Begin the hard work.
  43. decode_call_or_response();
  44. # Print a warning if there's leftover data.
  45. if ($position != $end) {
  46. printf STDERR "binmode-rpc2xml-rpc: warning: Trailing data ignored\n";
  47. }
  48. # We're done!
  49. exit (0);
  50. #--------------------------------------------------------------------------
  51. # Pretty-printing
  52. #--------------------------------------------------------------------------
  53. sub escape_string ($) {
  54. my ($string) = @_;
  55. $string =~ s/&/&amp;/g;
  56. $string =~ s/</&lt;/g;
  57. $string =~ s/\"/&quot;/g;
  58. return $string;
  59. }
  60. sub push_indentation_level () {
  61. $indentation += 2;
  62. }
  63. sub pop_indentation_level () {
  64. $indentation -= 2;
  65. }
  66. sub print_xml_line ($) {
  67. my ($xml) = @_;
  68. print STDOUT (' ' x $indentation) . $xml . $crlf;
  69. }
  70. #--------------------------------------------------------------------------
  71. # Raw input routines
  72. #--------------------------------------------------------------------------
  73. # These routines read raw input from our string, advance the current
  74. # position, and return something in a Perl-friendly format.
  75. #
  76. # This is all icky binary I/O using Perl's built-in unpack function.
  77. sub read_byte () {
  78. die "Unexpected end of input" unless ($position + 1 <= $end);
  79. my $byte = unpack('C', substr($binmode_data, $position, 1));
  80. $position += 1;
  81. die "Weird error decoding byte" unless (defined $byte);
  82. return $byte;
  83. }
  84. sub peek_character () {
  85. die "Unexpected end of input" unless ($position + 1 <= $end);
  86. my $byte = chr(unpack('c', substr($binmode_data, $position, 1)));
  87. die "Weird error decoding character" unless (defined $byte);
  88. return $byte;
  89. }
  90. sub read_character () {
  91. my $byte = peek_character();
  92. $position += 1;
  93. return $byte;
  94. }
  95. sub read_unsigned_lsb () {
  96. die "Unexpected end of input" unless ($position + 4 <= $end);
  97. my $integer = unpack('V', substr($binmode_data, $position, 4));
  98. $position += 4;
  99. die "Weird error decoding integer" unless (defined $integer);
  100. unless ($integer >= 0) {
  101. die "Perl can't handle 32-bit unsigned integers portably, stopping";
  102. }
  103. return $integer;
  104. }
  105. sub read_signed_lsb () {
  106. die "Unexpected end of input" unless ($position + 4 <= $end);
  107. my $integer = unpack('V', substr($binmode_data, $position, 4));
  108. $position += 4;
  109. die "Weird error decoding integer" unless (defined $integer);
  110. return $integer;
  111. }
  112. sub read_data ($) {
  113. my ($length) = @_;
  114. die "Unexpected end of input" unless ($position + $length <= $end);
  115. my $data = unpack("a$length", substr($binmode_data, $position, $length));
  116. $position += $length;
  117. die "Weird error decoding data" unless (defined $data);
  118. die "Wrong data length" unless (length($data) == $length);
  119. return $data;
  120. }
  121. sub read_data_w_byte_length () {
  122. my $length = read_byte();
  123. return read_data($length);
  124. }
  125. sub read_data_w_unsigned_lsb_length () {
  126. my $length = read_unsigned_lsb();
  127. return read_data($length)
  128. }
  129. sub read_string_data () {
  130. my $string = read_data_w_unsigned_lsb_length();
  131. validate_utf8($string);
  132. return $string;
  133. }
  134. #--------------------------------------------------------------------------
  135. # High-level input routines
  136. #--------------------------------------------------------------------------
  137. # These use the low-level input routines to read data from the buffer,
  138. # and then convert it into Frontier::RPC2 objects.
  139. sub read_value () {
  140. my $type = read_character();
  141. #print STDERR "DEBUG: Reading from '$type'\n";
  142. if ($type eq 'I') {
  143. return _read_int_value();
  144. } elsif ($type eq 't') {
  145. return Frontier::RPC2::Boolean->new(1);
  146. } elsif ($type eq 'f') {
  147. return Frontier::RPC2::Boolean->new(0);
  148. } elsif ($type eq 'D') {
  149. return _read_double_value();
  150. } elsif ($type eq '8') {
  151. return _read_dateTime_value();
  152. } elsif ($type eq 'B') {
  153. return _read_base64_value();
  154. } elsif ($type eq 'A') {
  155. return _read_array_value();
  156. } elsif ($type eq 'S') {
  157. return _read_struct_value();
  158. } elsif ($type eq 'U') {
  159. return _read_regular_string_value();
  160. } elsif ($type eq '>') {
  161. return _read_recorded_string_value();
  162. } elsif ($type eq '<') {
  163. return _read_recalled_string_value();
  164. } elsif ($type eq 'O') {
  165. die "Type 'O' Binmode RPC data not supported";
  166. } else {
  167. die "Type '$type' Binmode RPC data does not exist";
  168. }
  169. }
  170. sub read_value_and_typecheck ($) {
  171. my ($wanted_type) = @_;
  172. my $value = read_value();
  173. my $value_type = ref($value);
  174. die "$0: Expected $wanted_type, got $value_type, stopping"
  175. unless ($wanted_type eq $value_type);
  176. return $value;
  177. }
  178. sub _read_int_value () {
  179. return Frontier::RPC2::Integer->new(read_signed_lsb);
  180. }
  181. sub _read_double_value () {
  182. return Frontier::RPC2::Double->new(read_data_w_byte_length);
  183. }
  184. sub _read_dateTime_value () {
  185. return Frontier::RPC2::DateTime::ISO8601->new(read_data_w_byte_length);
  186. }
  187. sub _read_base64_value () {
  188. my $binary = read_data_w_unsigned_lsb_length;
  189. my $encoded = MIME::Base64::encode_base64($binary, $crlf);
  190. return Frontier::RPC2::Base64->new($encoded);
  191. }
  192. sub _read_array_value () {
  193. my $size = read_unsigned_lsb;
  194. my @values;
  195. for (my $i = 0; $i < $size; $i++) {
  196. push @values, read_value;
  197. }
  198. return \@values;
  199. }
  200. sub _read_struct_value () {
  201. my $size = read_unsigned_lsb;
  202. my %struct;
  203. for (my $i = 0; $i < $size; $i++) {
  204. my $key = read_value_and_typecheck('Frontier::RPC2::String');
  205. $struct{$key->value} = read_value;
  206. }
  207. return \%struct;
  208. }
  209. sub _read_regular_string_value () {
  210. return Frontier::RPC2::String->new(read_string_data);
  211. }
  212. sub _read_recorded_string_value () {
  213. my $codebook_entry = read_byte;
  214. my $string = Frontier::RPC2::String->new(read_string_data);
  215. $codebook[$codebook_entry] = $string;
  216. return $string;
  217. }
  218. sub _read_recalled_string_value () {
  219. my $codebook_entry = read_byte;
  220. my $string = $codebook[$codebook_entry];
  221. unless (defined $string) {
  222. die "$0: Attempted to use undefined codebook position $codebook_entry";
  223. }
  224. return $string;
  225. }
  226. #--------------------------------------------------------------------------
  227. # High-level output routines
  228. #--------------------------------------------------------------------------
  229. # We don't use Frontier::RPC2's output routines, because we're looking
  230. # for maximum readability. This is a debugging tool, after all.
  231. sub print_xml_header () {
  232. print_xml_line '<?xml version="1.0" encoding="UTF-8"?>';
  233. }
  234. sub get_escaped_string ($) {
  235. my ($value) = @_;
  236. return escape_string($value->value);
  237. }
  238. sub print_simple_value ($$) {
  239. my ($tag, $value) = @_;
  240. my $string = get_escaped_string($value);
  241. print_xml_line "<value><$tag>$string</$tag></value>";
  242. }
  243. sub print_value ($) {
  244. my ($value) = @_;
  245. my $type = ref($value);
  246. if ($type eq 'Frontier::RPC2::Integer') {
  247. print_simple_value("int", $value);
  248. } elsif ($type eq 'Frontier::RPC2::Double') {
  249. print_simple_value("double", $value);
  250. } elsif ($type eq 'Frontier::RPC2::Boolean') {
  251. print_simple_value("boolean", $value);
  252. } elsif ($type eq 'Frontier::RPC2::String') {
  253. print_simple_value("string", $value);
  254. } elsif ($type eq 'Frontier::RPC2::DateTime::ISO8601') {
  255. print_simple_value("dateTime.iso8601", $value);
  256. } elsif ($type eq 'Frontier::RPC2::Base64') {
  257. print_base64_data($value);
  258. } elsif ($type eq 'ARRAY') {
  259. print_array_value($value);
  260. } elsif ($type eq 'HASH') {
  261. print_struct_value($value);
  262. } else {
  263. die "Unxpected type '$type', stopping";
  264. }
  265. }
  266. sub print_params ($) {
  267. my ($params) = @_;
  268. die "Wanted array" unless (ref($params) eq 'ARRAY');
  269. print_xml_line '<params>';
  270. push_indentation_level;
  271. foreach my $item (@$params) {
  272. print_xml_line '<param>';
  273. push_indentation_level;
  274. print_value($item);
  275. pop_indentation_level;
  276. print_xml_line '</param>';
  277. }
  278. pop_indentation_level;
  279. print_xml_line '</params>';
  280. }
  281. sub print_base64_data ($) {
  282. my ($value) = @_;
  283. print_xml_line '<value>';
  284. push_indentation_level;
  285. print_xml_line '<base64>';
  286. print $value->value;
  287. print_xml_line '</base64>';
  288. pop_indentation_level;
  289. print_xml_line '</value>';
  290. }
  291. sub print_array_value ($) {
  292. my ($array) = @_;
  293. print_xml_line '<value>';
  294. push_indentation_level;
  295. print_xml_line '<array>';
  296. push_indentation_level;
  297. print_xml_line '<data>';
  298. push_indentation_level;
  299. foreach my $item (@$array) {
  300. print_value($item);
  301. }
  302. pop_indentation_level;
  303. print_xml_line '</data>';
  304. pop_indentation_level;
  305. print_xml_line '</array>';
  306. pop_indentation_level;
  307. print_xml_line '</value>';
  308. }
  309. sub print_struct_value ($) {
  310. my ($struct) = @_;
  311. print_xml_line '<value>';
  312. push_indentation_level;
  313. print_xml_line '<struct>';
  314. push_indentation_level;
  315. for my $key (keys %$struct) {
  316. print_xml_line '<member>';
  317. push_indentation_level;
  318. my $name = escape_string($key);
  319. print_xml_line "<name>$name</name>";
  320. print_value($struct->{$key});
  321. pop_indentation_level;
  322. print_xml_line '</member>';
  323. }
  324. pop_indentation_level;
  325. print_xml_line '</struct>';
  326. pop_indentation_level;
  327. print_xml_line '</value>';
  328. }
  329. #--------------------------------------------------------------------------
  330. # High-level decoder routines
  331. #--------------------------------------------------------------------------
  332. # These routines convert Binmode RPC data into the corresponding XML-RPC
  333. # documents.
  334. sub decode_call_or_response () {
  335. my $type = read_character();
  336. if ($type eq 'C') {
  337. decode_call();
  338. } elsif ($type eq 'R') {
  339. decode_response();
  340. } else {
  341. die "$0: Unknown binmode-rpc request type '$type', stopping";
  342. }
  343. }
  344. sub decode_call () {
  345. my $namevalue = read_value_and_typecheck('Frontier::RPC2::String');
  346. my $params = read_value_and_typecheck('ARRAY');
  347. print_xml_header;
  348. print_xml_line '<methodCall>';
  349. push_indentation_level;
  350. my $name = get_escaped_string($namevalue);
  351. print_xml_line "<methodName>$name</methodName>";
  352. print_params($params);
  353. pop_indentation_level;
  354. print_xml_line '</methodCall>';
  355. }
  356. sub decode_response () {
  357. my $maybe_fault = peek_character;
  358. if ($maybe_fault eq 'F') {
  359. read_character;
  360. my $fault = read_value_and_typecheck('HASH');
  361. print_xml_header;
  362. print_xml_line '<methodResponse>';
  363. push_indentation_level;
  364. print_xml_line '<fault>';
  365. push_indentation_level;
  366. print_value $fault;
  367. pop_indentation_level;
  368. print_xml_line '</fault>';
  369. pop_indentation_level;
  370. print_xml_line '</methodResponse>';
  371. } else {
  372. my $value = read_value;
  373. print_xml_header;
  374. print_xml_line '<methodResponse>';
  375. push_indentation_level;
  376. print_params [$value];
  377. pop_indentation_level;
  378. print_xml_line '</methodResponse>';
  379. }
  380. }
  381. #--------------------------------------------------------------------------
  382. # UTF-8 Validation
  383. #--------------------------------------------------------------------------
  384. # This is based on the UTF-8 section of the Secure Programs HOWTO.
  385. # http://new.linuxnow.com/docs/content/HOWTO/Secure-Programs-HOWTO/
  386. # This code *hasn't* been stress-tested for correctness yet; please see:
  387. # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
  388. # This is not yet good enough to be used as part of a UTF-8 decoder or
  389. # security validator, but it's OK to make sure nobody is sending Latin-1.
  390. BEGIN {
  391. use vars qw{@illegal_initial_bytes @sequence_length_info};
  392. # Bytes are represented as data/mask pairs.
  393. @illegal_initial_bytes =
  394. (# 10xxxxxx illegal as initial byte of char (80..BF)
  395. [0x80, 0xC0],
  396. # 1100000x illegal, overlong (C0..C1 80..BF)
  397. [0xC0, 0xFE],
  398. # 11100000 100xxxxx illegal, overlong (E0 80..9F)
  399. [0xE0, 0xFF, 0x80, 0xE0],
  400. # 11110000 1000xxxx illegal, overlong (F0 80..8F)
  401. [0xF0, 0xFF, 0x80, 0xF0],
  402. # 11111000 10000xxx illegal, overlong (F8 80..87)
  403. [0xF8, 0xFF, 0x80, 0xF8],
  404. # 11111100 100000xx illegal, overlong (FC 80..83)
  405. [0xFC, 0xFF, 0x80, 0xFC],
  406. # 1111111x illegal; prohibited by spec
  407. [0xFE, 0xFE]);
  408. # Items are byte, mask, sequence length.
  409. @sequence_length_info =
  410. (# 110xxxxx 10xxxxxx
  411. [0xC0, 0xE0, 2],
  412. # 1110xxxx 10xxxxxx 10xxxxxx
  413. [0xE0, 0xF0, 3],
  414. # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  415. [0xF0, 0xF8, 4],
  416. # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  417. [0xF8, 0xFC, 5],
  418. # 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  419. [0xFC, 0xFE, 6]);
  420. }
  421. sub validate_utf8 ($) {
  422. my ($string) = @_;
  423. my $end = length($string);
  424. my $i = 0;
  425. while ($i < $end) {
  426. my $byte = ord(substr($string, $i, 1));
  427. #print STDERR "Checking byte $byte\n";
  428. # Check for illegal bytes at the start of this sequence.
  429. NEXT_CANDIDATE:
  430. foreach my $illegal_byte_info (@illegal_initial_bytes) {
  431. my $offset = 0;
  432. for (my $j = 0; $j < @$illegal_byte_info; $j += 2) {
  433. my $pattern = $illegal_byte_info->[$j];
  434. my $mask = $illegal_byte_info->[$j+1];
  435. my $data = ord(substr($string, $i+$offset, 1));
  436. #print STDERR " B: $byte P: $pattern M: $mask D: $data\n";
  437. next NEXT_CANDIDATE unless ($data & $mask) == $pattern;
  438. $offset++;
  439. }
  440. die "Illegal UTF-8 sequence (" . substr($string, $i, 2) . ")";
  441. }
  442. # Find the length of the sequence, and make sure we have enough data.
  443. my $length = 1;
  444. foreach my $length_info (@sequence_length_info) {
  445. my ($pattern, $mask, $length_candidate) = @$length_info;
  446. if (($byte & $mask) == $pattern) {
  447. $length = $length_candidate;
  448. last;
  449. }
  450. }
  451. die "$0: Unexpected end of UTF-8 sequence, stopping"
  452. unless $i + $length <= $end;
  453. # Verify the sequence is well-formed.
  454. $i++, $length--;
  455. while ($length > 0) {
  456. die "$0: Malformed UTF-8 sequence, stopping"
  457. unless (ord(substr($string, $i, 1)) & 0xC0) == 0x80;
  458. $i++, $length--;
  459. }
  460. }
  461. #printf STDERR "DEBUG: Verified $i bytes\n";
  462. }