instances.tcl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. # Multi-instance test framework.
  2. # This is used in order to test Sentinel and Redis Cluster, and provides
  3. # basic capabilities for spawning and handling N parallel Redis / Sentinel
  4. # instances.
  5. #
  6. # Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
  7. # This software is released under the BSD License. See the COPYING file for
  8. # more information.
  9. package require Tcl 8.5
  10. set tcl_precision 17
  11. source ../support/redis.tcl
  12. source ../support/util.tcl
  13. source ../support/server.tcl
  14. source ../support/test.tcl
  15. set ::verbose 0
  16. set ::pause_on_error 0
  17. set ::simulate_error 0
  18. set ::sentinel_instances {}
  19. set ::redis_instances {}
  20. set ::sentinel_base_port 20000
  21. set ::redis_base_port 30000
  22. set ::pids {} ; # We kill everything at exit
  23. set ::dirs {} ; # We remove all the temp dirs at exit
  24. set ::run_matching {} ; # If non empty, only tests matching pattern are run.
  25. if {[catch {cd tmp}]} {
  26. puts "tmp directory not found."
  27. puts "Please run this test from the Redis source root."
  28. exit 1
  29. }
  30. # Spawn a redis or sentinel instance, depending on 'type'.
  31. proc spawn_instance {type base_port count {conf {}}} {
  32. for {set j 0} {$j < $count} {incr j} {
  33. set port [find_available_port $base_port]
  34. incr base_port
  35. puts "Starting $type #$j at port $port"
  36. # Create a directory for this instance.
  37. set dirname "${type}_${j}"
  38. lappend ::dirs $dirname
  39. catch {exec rm -rf $dirname}
  40. file mkdir $dirname
  41. # Write the instance config file.
  42. set cfgfile [file join $dirname $type.conf]
  43. set cfg [open $cfgfile w]
  44. puts $cfg "port $port"
  45. puts $cfg "dir ./$dirname"
  46. puts $cfg "logfile log.txt"
  47. # Add additional config files
  48. foreach directive $conf {
  49. puts $cfg $directive
  50. }
  51. close $cfg
  52. # Finally exec it and remember the pid for later cleanup.
  53. if {$type eq "redis"} {
  54. set prgname redis-server
  55. } elseif {$type eq "sentinel"} {
  56. set prgname redis-sentinel
  57. } else {
  58. error "Unknown instance type."
  59. }
  60. set pid [exec ../../../src/${prgname} $cfgfile &]
  61. lappend ::pids $pid
  62. # Check availability
  63. if {[server_is_up 127.0.0.1 $port 100] == 0} {
  64. abort_sentinel_test "Problems starting $type #$j: ping timeout"
  65. }
  66. # Push the instance into the right list
  67. set link [redis 127.0.0.1 $port]
  68. $link reconnect 1
  69. lappend ::${type}_instances [list \
  70. pid $pid \
  71. host 127.0.0.1 \
  72. port $port \
  73. link $link \
  74. ]
  75. }
  76. }
  77. proc cleanup {} {
  78. puts "Cleaning up..."
  79. foreach pid $::pids {
  80. catch {exec kill -9 $pid}
  81. }
  82. foreach dir $::dirs {
  83. catch {exec rm -rf $dir}
  84. }
  85. }
  86. proc abort_sentinel_test msg {
  87. puts "WARNING: Aborting the test."
  88. puts ">>>>>>>> $msg"
  89. cleanup
  90. exit 1
  91. }
  92. proc parse_options {} {
  93. for {set j 0} {$j < [llength $::argv]} {incr j} {
  94. set opt [lindex $::argv $j]
  95. set val [lindex $::argv [expr $j+1]]
  96. if {$opt eq "--single"} {
  97. incr j
  98. set ::run_matching "*${val}*"
  99. } elseif {$opt eq "--pause-on-error"} {
  100. set ::pause_on_error 1
  101. } elseif {$opt eq "--fail"} {
  102. set ::simulate_error 1
  103. } elseif {$opt eq "--help"} {
  104. puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests."
  105. puts "\nOptions:"
  106. puts "--single <pattern> Only runs tests specified by pattern."
  107. puts "--pause-on-error Pause for manual inspection on error."
  108. puts "--fail Simulate a test failure."
  109. puts "--help Shows this help."
  110. exit 0
  111. } else {
  112. puts "Unknown option $opt"
  113. exit 1
  114. }
  115. }
  116. }
  117. # If --pause-on-error option was passed at startup this function is called
  118. # on error in order to give the developer a chance to understand more about
  119. # the error condition while the instances are still running.
  120. proc pause_on_error {} {
  121. puts ""
  122. puts [colorstr yellow "*** Please inspect the error now ***"]
  123. puts "\nType \"continue\" to resume the test, \"help\" for help screen.\n"
  124. while 1 {
  125. puts -nonewline "> "
  126. flush stdout
  127. set line [gets stdin]
  128. set argv [split $line " "]
  129. set cmd [lindex $argv 0]
  130. if {$cmd eq {continue}} {
  131. break
  132. } elseif {$cmd eq {show-redis-logs}} {
  133. set count 10
  134. if {[lindex $argv 1] ne {}} {set count [lindex $argv 1]}
  135. foreach_redis_id id {
  136. puts "=== REDIS $id ===="
  137. puts [exec tail -$count redis_$id/log.txt]
  138. puts "---------------------\n"
  139. }
  140. } elseif {$cmd eq {show-sentinel-logs}} {
  141. set count 10
  142. if {[lindex $argv 1] ne {}} {set count [lindex $argv 1]}
  143. foreach_sentinel_id id {
  144. puts "=== SENTINEL $id ===="
  145. puts [exec tail -$count sentinel_$id/log.txt]
  146. puts "---------------------\n"
  147. }
  148. } elseif {$cmd eq {ls}} {
  149. foreach_redis_id id {
  150. puts -nonewline "Redis $id"
  151. set errcode [catch {
  152. set str {}
  153. append str "@[RI $id tcp_port]: "
  154. append str "[RI $id role] "
  155. if {[RI $id role] eq {slave}} {
  156. append str "[RI $id master_host]:[RI $id master_port]"
  157. }
  158. set str
  159. } retval]
  160. if {$errcode} {
  161. puts " -- $retval"
  162. } else {
  163. puts $retval
  164. }
  165. }
  166. foreach_sentinel_id id {
  167. puts -nonewline "Sentinel $id"
  168. set errcode [catch {
  169. set str {}
  170. append str "@[SI $id tcp_port]: "
  171. append str "[join [S $id sentinel get-master-addr-by-name mymaster]]"
  172. set str
  173. } retval]
  174. if {$errcode} {
  175. puts " -- $retval"
  176. } else {
  177. puts $retval
  178. }
  179. }
  180. } elseif {$cmd eq {help}} {
  181. puts "ls List Sentinel and Redis instances."
  182. puts "show-sentinel-logs \[N\] Show latest N lines of logs."
  183. puts "show-redis-logs \[N\] Show latest N lines of logs."
  184. puts "S <id> cmd ... arg Call command in Sentinel <id>."
  185. puts "R <id> cmd ... arg Call command in Redis <id>."
  186. puts "SI <id> <field> Show Sentinel <id> INFO <field>."
  187. puts "RI <id> <field> Show Sentinel <id> INFO <field>."
  188. puts "continue Resume test."
  189. } else {
  190. set errcode [catch {eval $line} retval]
  191. if {$retval ne {}} {puts "$retval"}
  192. }
  193. }
  194. }
  195. # We redefine 'test' as for Sentinel we don't use the server-client
  196. # architecture for the test, everything is sequential.
  197. proc test {descr code} {
  198. set ts [clock format [clock seconds] -format %H:%M:%S]
  199. puts -nonewline "$ts> $descr: "
  200. flush stdout
  201. if {[catch {set retval [uplevel 1 $code]} error]} {
  202. if {[string match "assertion:*" $error]} {
  203. set msg [string range $error 10 end]
  204. puts [colorstr red $msg]
  205. if {$::pause_on_error} pause_on_error
  206. puts "(Jumping to next unit after error)"
  207. return -code continue
  208. } else {
  209. # Re-raise, let handler up the stack take care of this.
  210. error $error $::errorInfo
  211. }
  212. } else {
  213. puts [colorstr green OK]
  214. }
  215. }
  216. proc run_tests {} {
  217. set tests [lsort [glob ../tests/*]]
  218. foreach test $tests {
  219. if {$::run_matching ne {} && [string match $::run_matching $test] == 0} {
  220. continue
  221. }
  222. if {[file isdirectory $test]} continue
  223. puts [colorstr yellow "Testing unit: [lindex [file split $test] end]"]
  224. source $test
  225. }
  226. }
  227. # The "S" command is used to interact with the N-th Sentinel.
  228. # The general form is:
  229. #
  230. # S <sentinel-id> command arg arg arg ...
  231. #
  232. # Example to ping the Sentinel 0 (first instance): S 0 PING
  233. proc S {n args} {
  234. set s [lindex $::sentinel_instances $n]
  235. [dict get $s link] {*}$args
  236. }
  237. # Like R but to chat with Redis instances.
  238. proc R {n args} {
  239. set r [lindex $::redis_instances $n]
  240. [dict get $r link] {*}$args
  241. }
  242. proc get_info_field {info field} {
  243. set fl [string length $field]
  244. append field :
  245. foreach line [split $info "\n"] {
  246. set line [string trim $line "\r\n "]
  247. if {[string range $line 0 $fl] eq $field} {
  248. return [string range $line [expr {$fl+1}] end]
  249. }
  250. }
  251. return {}
  252. }
  253. proc SI {n field} {
  254. get_info_field [S $n info] $field
  255. }
  256. proc RI {n field} {
  257. get_info_field [R $n info] $field
  258. }
  259. # Iterate over IDs of sentinel or redis instances.
  260. proc foreach_instance_id {instances idvar code} {
  261. upvar 1 $idvar id
  262. for {set id 0} {$id < [llength $instances]} {incr id} {
  263. set errcode [catch {uplevel 1 $code} result]
  264. if {$errcode == 1} {
  265. error $result $::errorInfo $::errorCode
  266. } elseif {$errcode == 4} {
  267. continue
  268. } elseif {$errcode == 3} {
  269. break
  270. } elseif {$errcode != 0} {
  271. return -code $errcode $result
  272. }
  273. }
  274. }
  275. proc foreach_sentinel_id {idvar code} {
  276. set errcode [catch {uplevel 1 [list foreach_instance_id $::sentinel_instances $idvar $code]} result]
  277. return -code $errcode $result
  278. }
  279. proc foreach_redis_id {idvar code} {
  280. set errcode [catch {uplevel 1 [list foreach_instance_id $::redis_instances $idvar $code]} result]
  281. return -code $errcode $result
  282. }
  283. # Get the specific attribute of the specified instance type, id.
  284. proc get_instance_attrib {type id attrib} {
  285. dict get [lindex [set ::${type}_instances] $id] $attrib
  286. }
  287. # Set the specific attribute of the specified instance type, id.
  288. proc set_instance_attrib {type id attrib newval} {
  289. set d [lindex [set ::${type}_instances] $id]
  290. dict set d $attrib $newval
  291. lset ::${type}_instances $id $d
  292. }
  293. # Create a master-slave cluster of the given number of total instances.
  294. # The first instance "0" is the master, all others are configured as
  295. # slaves.
  296. proc create_redis_master_slave_cluster n {
  297. foreach_redis_id id {
  298. if {$id == 0} {
  299. # Our master.
  300. R $id slaveof no one
  301. R $id flushall
  302. } elseif {$id < $n} {
  303. R $id slaveof [get_instance_attrib redis 0 host] \
  304. [get_instance_attrib redis 0 port]
  305. } else {
  306. # Instances not part of the cluster.
  307. R $id slaveof no one
  308. }
  309. }
  310. # Wait for all the slaves to sync.
  311. wait_for_condition 1000 50 {
  312. [RI 0 connected_slaves] == ($n-1)
  313. } else {
  314. fail "Unable to create a master-slaves cluster."
  315. }
  316. }
  317. proc get_instance_id_by_port {type port} {
  318. foreach_${type}_id id {
  319. if {[get_instance_attrib $type $id port] == $port} {
  320. return $id
  321. }
  322. }
  323. fail "Instance $type port $port not found."
  324. }
  325. # Kill an instance of the specified type/id with SIGKILL.
  326. # This function will mark the instance PID as -1 to remember that this instance
  327. # is no longer running and will remove its PID from the list of pids that
  328. # we kill at cleanup.
  329. #
  330. # The instance can be restarted with restart-instance.
  331. proc kill_instance {type id} {
  332. set pid [get_instance_attrib $type $id pid]
  333. if {$pid == -1} {
  334. error "You tried to kill $type $id twice."
  335. }
  336. exec kill -9 $pid
  337. set_instance_attrib $type $id pid -1
  338. set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
  339. # Remove the PID from the list of pids to kill at exit.
  340. set ::pids [lsearch -all -inline -not -exact $::pids $pid]
  341. }
  342. # Return true of the instance of the specified type/id is killed.
  343. proc instance_is_killed {type id} {
  344. set pid [get_instance_attrib $type $id pid]
  345. expr {$pid == -1}
  346. }
  347. # Restart an instance previously killed by kill_instance
  348. proc restart_instance {type id} {
  349. set dirname "${type}_${id}"
  350. set cfgfile [file join $dirname $type.conf]
  351. set port [get_instance_attrib $type $id port]
  352. # Execute the instance with its old setup and append the new pid
  353. # file for cleanup.
  354. if {$type eq "redis"} {
  355. set prgname redis-server
  356. } else {
  357. set prgname redis-sentinel
  358. }
  359. set pid [exec ../../../src/${prgname} $cfgfile &]
  360. set_instance_attrib $type $id pid $pid
  361. lappend ::pids $pid
  362. # Check that the instance is running
  363. if {[server_is_up 127.0.0.1 $port 100] == 0} {
  364. abort_sentinel_test "Problems starting $type #$id: ping timeout"
  365. }
  366. # Connect with it with a fresh link
  367. set link [redis 127.0.0.1 $port]
  368. $link reconnect 1
  369. set_instance_attrib $type $id link $link
  370. }