hll-gnuplot-graph.rb 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. # hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
  2. # BSD license, See the COPYING file for more information.
  3. #
  4. # This program is suited to output average and maximum errors of
  5. # the Redis HyperLogLog implementation in a format suitable to print
  6. # graphs using gnuplot.
  7. require 'rubygems'
  8. require 'redis'
  9. require 'digest/sha1'
  10. # Generate an array of [cardinality,relative_error] pairs
  11. # in the 0 - max range, with the specified step.
  12. #
  13. # 'r' is the Redis object used to perform the queries.
  14. # 'seed' must be different every time you want a test performed
  15. # with a different set. The function guarantees that if 'seed' is the
  16. # same, exactly the same dataset is used, and when it is different,
  17. # a totally unrelated different data set is used (without any common
  18. # element in practice).
  19. def run_experiment(r,seed,max,step)
  20. r.del('hll')
  21. i = 0
  22. samples = []
  23. step = 1000 if step > 1000
  24. while i < max do
  25. elements = []
  26. step.times {
  27. ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
  28. elements << ele
  29. i += 1
  30. }
  31. r.pfadd('hll',elements)
  32. approx = r.pfcount('hll')
  33. err = approx-i
  34. rel_err = 100.to_f*err/i
  35. samples << [i,rel_err]
  36. end
  37. samples
  38. end
  39. def filter_samples(numsets,max,step,filter)
  40. r = Redis.new
  41. dataset = {}
  42. (0...numsets).each{|i|
  43. dataset[i] = run_experiment(r,i,max,step)
  44. STDERR.puts "Set #{i}"
  45. }
  46. dataset[0].each_with_index{|ele,index|
  47. if filter == :max
  48. card=ele[0]
  49. err=ele[1].abs
  50. (1...numsets).each{|i|
  51. err = dataset[i][index][1] if err < dataset[i][index][1]
  52. }
  53. puts "#{card} #{err}"
  54. elsif filter == :avg
  55. card=ele[0]
  56. err = 0
  57. (0...numsets).each{|i|
  58. err += dataset[i][index][1]
  59. }
  60. err /= numsets
  61. puts "#{card} #{err}"
  62. elsif filter == :absavg
  63. card=ele[0]
  64. err = 0
  65. (0...numsets).each{|i|
  66. err += dataset[i][index][1].abs
  67. }
  68. err /= numsets
  69. puts "#{card} #{err}"
  70. elsif filter == :all
  71. (0...numsets).each{|i|
  72. card,err = dataset[i][index]
  73. puts "#{card} #{err}"
  74. }
  75. else
  76. raise "Unknown filter #{filter}"
  77. end
  78. }
  79. end
  80. if ARGV.length != 4
  81. puts "Usage: hll-gnuplot-graph <samples> <max> <step> (max|avg|absavg|all)"
  82. exit 1
  83. end
  84. filter_samples(ARGV[0].to_i,ARGV[1].to_i,ARGV[2].to_i,ARGV[3].to_sym)