123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676 |
- # Multi-instance test framework.
- # This is used in order to test Sentinel and Redis Cluster, and provides
- # basic capabilities for spawning and handling N parallel Redis / Sentinel
- # instances.
- #
- # Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
- # This software is released under the BSD License. See the COPYING file for
- # more information.
- package require Tcl 8.5
- set tcl_precision 17
- source ../support/redis.tcl
- source ../support/util.tcl
- source ../support/server.tcl
- source ../support/test.tcl
- set ::verbose 0
- set ::valgrind 0
- set ::tls 0
- set ::pause_on_error 0
- set ::dont_clean 0
- set ::simulate_error 0
- set ::failed 0
- set ::sentinel_instances {}
- set ::redis_instances {}
- set ::global_config {}
- set ::sentinel_base_port 20000
- set ::redis_base_port 30000
- set ::redis_port_count 1024
- set ::host "127.0.0.1"
- set ::leaked_fds_file [file normalize "tmp/leaked_fds.txt"]
- set ::pids {} ; # We kill everything at exit
- set ::dirs {} ; # We remove all the temp dirs at exit
- set ::run_matching {} ; # If non empty, only tests matching pattern are run.
- if {[catch {cd tmp}]} {
- puts "tmp directory not found."
- puts "Please run this test from the Redis source root."
- exit 1
- }
- # Execute the specified instance of the server specified by 'type', using
- # the provided configuration file. Returns the PID of the process.
- proc exec_instance {type dirname cfgfile} {
- if {$type eq "redis"} {
- set prgname redis-server
- } elseif {$type eq "sentinel"} {
- set prgname redis-sentinel
- } else {
- error "Unknown instance type."
- }
- set errfile [file join $dirname err.txt]
- if {$::valgrind} {
- set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile 2>> $errfile &]
- } else {
- set pid [exec ../../../src/${prgname} $cfgfile 2>> $errfile &]
- }
- return $pid
- }
- # Spawn a redis or sentinel instance, depending on 'type'.
- proc spawn_instance {type base_port count {conf {}} {base_conf_file ""}} {
- for {set j 0} {$j < $count} {incr j} {
- set port [find_available_port $base_port $::redis_port_count]
- # plaintext port (only used for TLS cluster)
- set pport 0
- # Create a directory for this instance.
- set dirname "${type}_${j}"
- lappend ::dirs $dirname
- catch {exec rm -rf $dirname}
- file mkdir $dirname
- # Write the instance config file.
- set cfgfile [file join $dirname $type.conf]
- if {$base_conf_file ne ""} {
- file copy -- $base_conf_file $cfgfile
- set cfg [open $cfgfile a+]
- } else {
- set cfg [open $cfgfile w]
- }
- if {$::tls} {
- puts $cfg "tls-port $port"
- puts $cfg "tls-replication yes"
- puts $cfg "tls-cluster yes"
- # plaintext port, only used by plaintext clients in a TLS cluster
- set pport [find_available_port $base_port $::redis_port_count]
- puts $cfg "port $pport"
- puts $cfg [format "tls-cert-file %s/../../tls/server.crt" [pwd]]
- puts $cfg [format "tls-key-file %s/../../tls/server.key" [pwd]]
- puts $cfg [format "tls-client-cert-file %s/../../tls/client.crt" [pwd]]
- puts $cfg [format "tls-client-key-file %s/../../tls/client.key" [pwd]]
- puts $cfg [format "tls-dh-params-file %s/../../tls/redis.dh" [pwd]]
- puts $cfg [format "tls-ca-cert-file %s/../../tls/ca.crt" [pwd]]
- puts $cfg "loglevel debug"
- } else {
- puts $cfg "port $port"
- }
- puts $cfg "dir ./$dirname"
- puts $cfg "logfile log.txt"
- # Add additional config files
- foreach directive $conf {
- puts $cfg $directive
- }
- dict for {name val} $::global_config {
- puts $cfg "$name $val"
- }
- close $cfg
- # Finally exec it and remember the pid for later cleanup.
- set retry 100
- while {$retry} {
- set pid [exec_instance $type $dirname $cfgfile]
- # Check availability
- if {[server_is_up 127.0.0.1 $port 100] == 0} {
- puts "Starting $type #$j at port $port failed, try another"
- incr retry -1
- set port [find_available_port $base_port $::redis_port_count]
- set cfg [open $cfgfile a+]
- if {$::tls} {
- puts $cfg "tls-port $port"
- set pport [find_available_port $base_port $::redis_port_count]
- puts $cfg "port $pport"
- } else {
- puts $cfg "port $port"
- }
- close $cfg
- } else {
- puts "Starting $type #$j at port $port"
- lappend ::pids $pid
- break
- }
- }
- # Check availability finally
- if {[server_is_up $::host $port 100] == 0} {
- set logfile [file join $dirname log.txt]
- puts [exec tail $logfile]
- abort_sentinel_test "Problems starting $type #$j: ping timeout, maybe server start failed, check $logfile"
- }
- # Push the instance into the right list
- set link [redis $::host $port 0 $::tls]
- $link reconnect 1
- lappend ::${type}_instances [list \
- pid $pid \
- host $::host \
- port $port \
- plaintext-port $pport \
- link $link \
- ]
- }
- }
- proc log_crashes {} {
- set start_pattern {*REDIS BUG REPORT START*}
- set logs [glob */log.txt]
- foreach log $logs {
- set fd [open $log]
- set found 0
- while {[gets $fd line] >= 0} {
- if {[string match $start_pattern $line]} {
- puts "\n*** Crash report found in $log ***"
- set found 1
- }
- if {$found} {
- puts $line
- incr ::failed
- }
- }
- }
- set logs [glob */err.txt]
- foreach log $logs {
- set res [find_valgrind_errors $log true]
- if {$res != ""} {
- puts $res
- incr ::failed
- }
- }
- }
- proc is_alive pid {
- if {[catch {exec ps -p $pid} err]} {
- return 0
- } else {
- return 1
- }
- }
- proc stop_instance pid {
- catch {exec kill $pid}
- # Node might have been stopped in the test
- catch {exec kill -SIGCONT $pid}
- if {$::valgrind} {
- set max_wait 120000
- } else {
- set max_wait 10000
- }
- while {[is_alive $pid]} {
- incr wait 10
- if {$wait == $max_wait} {
- puts [colorstr red "Forcing process $pid to crash..."]
- catch {exec kill -SEGV $pid}
- } elseif {$wait >= $max_wait * 2} {
- puts [colorstr red "Forcing process $pid to exit..."]
- catch {exec kill -KILL $pid}
- } elseif {$wait % 1000 == 0} {
- puts "Waiting for process $pid to exit..."
- }
- after 10
- }
- }
- proc cleanup {} {
- puts "Cleaning up..."
- foreach pid $::pids {
- puts "killing stale instance $pid"
- stop_instance $pid
- }
- log_crashes
- if {$::dont_clean} {
- return
- }
- foreach dir $::dirs {
- catch {exec rm -rf $dir}
- }
- }
- proc abort_sentinel_test msg {
- incr ::failed
- puts "WARNING: Aborting the test."
- puts ">>>>>>>> $msg"
- if {$::pause_on_error} pause_on_error
- cleanup
- exit 1
- }
- proc parse_options {} {
- for {set j 0} {$j < [llength $::argv]} {incr j} {
- set opt [lindex $::argv $j]
- set val [lindex $::argv [expr $j+1]]
- if {$opt eq "--single"} {
- incr j
- set ::run_matching "*${val}*"
- } elseif {$opt eq "--pause-on-error"} {
- set ::pause_on_error 1
- } elseif {$opt eq {--dont-clean}} {
- set ::dont_clean 1
- } elseif {$opt eq "--fail"} {
- set ::simulate_error 1
- } elseif {$opt eq {--valgrind}} {
- set ::valgrind 1
- } elseif {$opt eq {--host}} {
- incr j
- set ::host ${val}
- } elseif {$opt eq {--tls}} {
- package require tls 1.6
- ::tls::init \
- -cafile "$::tlsdir/ca.crt" \
- -certfile "$::tlsdir/client.crt" \
- -keyfile "$::tlsdir/client.key"
- set ::tls 1
- } elseif {$opt eq {--config}} {
- set val2 [lindex $::argv [expr $j+2]]
- dict set ::global_config $val $val2
- incr j 2
- } elseif {$opt eq "--help"} {
- puts "--single <pattern> Only runs tests specified by pattern."
- puts "--dont-clean Keep log files on exit."
- puts "--pause-on-error Pause for manual inspection on error."
- puts "--fail Simulate a test failure."
- puts "--valgrind Run with valgrind."
- puts "--tls Run tests in TLS mode."
- puts "--host <host> Use hostname instead of 127.0.0.1."
- puts "--config <k> <v> Extra config argument(s)."
- puts "--help Shows this help."
- exit 0
- } else {
- puts "Unknown option $opt"
- exit 1
- }
- }
- }
- # If --pause-on-error option was passed at startup this function is called
- # on error in order to give the developer a chance to understand more about
- # the error condition while the instances are still running.
- proc pause_on_error {} {
- puts ""
- puts [colorstr yellow "*** Please inspect the error now ***"]
- puts "\nType \"continue\" to resume the test, \"help\" for help screen.\n"
- while 1 {
- puts -nonewline "> "
- flush stdout
- set line [gets stdin]
- set argv [split $line " "]
- set cmd [lindex $argv 0]
- if {$cmd eq {continue}} {
- break
- } elseif {$cmd eq {show-redis-logs}} {
- set count 10
- if {[lindex $argv 1] ne {}} {set count [lindex $argv 1]}
- foreach_redis_id id {
- puts "=== REDIS $id ===="
- puts [exec tail -$count redis_$id/log.txt]
- puts "---------------------\n"
- }
- } elseif {$cmd eq {show-sentinel-logs}} {
- set count 10
- if {[lindex $argv 1] ne {}} {set count [lindex $argv 1]}
- foreach_sentinel_id id {
- puts "=== SENTINEL $id ===="
- puts [exec tail -$count sentinel_$id/log.txt]
- puts "---------------------\n"
- }
- } elseif {$cmd eq {ls}} {
- foreach_redis_id id {
- puts -nonewline "Redis $id"
- set errcode [catch {
- set str {}
- append str "@[RI $id tcp_port]: "
- append str "[RI $id role] "
- if {[RI $id role] eq {slave}} {
- append str "[RI $id master_host]:[RI $id master_port]"
- }
- set str
- } retval]
- if {$errcode} {
- puts " -- $retval"
- } else {
- puts $retval
- }
- }
- foreach_sentinel_id id {
- puts -nonewline "Sentinel $id"
- set errcode [catch {
- set str {}
- append str "@[SI $id tcp_port]: "
- append str "[join [S $id sentinel get-master-addr-by-name mymaster]]"
- set str
- } retval]
- if {$errcode} {
- puts " -- $retval"
- } else {
- puts $retval
- }
- }
- } elseif {$cmd eq {help}} {
- puts "ls List Sentinel and Redis instances."
- puts "show-sentinel-logs \[N\] Show latest N lines of logs."
- puts "show-redis-logs \[N\] Show latest N lines of logs."
- puts "S <id> cmd ... arg Call command in Sentinel <id>."
- puts "R <id> cmd ... arg Call command in Redis <id>."
- puts "SI <id> <field> Show Sentinel <id> INFO <field>."
- puts "RI <id> <field> Show Redis <id> INFO <field>."
- puts "continue Resume test."
- } else {
- set errcode [catch {eval $line} retval]
- if {$retval ne {}} {puts "$retval"}
- }
- }
- }
- # We redefine 'test' as for Sentinel we don't use the server-client
- # architecture for the test, everything is sequential.
- proc test {descr code} {
- set ts [clock format [clock seconds] -format %H:%M:%S]
- puts -nonewline "$ts> $descr: "
- flush stdout
- if {[catch {set retval [uplevel 1 $code]} error]} {
- incr ::failed
- if {[string match "assertion:*" $error]} {
- set msg "FAILED: [string range $error 10 end]"
- puts [colorstr red $msg]
- if {$::pause_on_error} pause_on_error
- puts [colorstr red "(Jumping to next unit after error)"]
- return -code continue
- } else {
- # Re-raise, let handler up the stack take care of this.
- error $error $::errorInfo
- }
- } else {
- puts [colorstr green OK]
- }
- }
- # Check memory leaks when running on OSX using the "leaks" utility.
- proc check_leaks instance_types {
- if {[string match {*Darwin*} [exec uname -a]]} {
- puts -nonewline "Testing for memory leaks..."; flush stdout
- foreach type $instance_types {
- foreach_instance_id [set ::${type}_instances] id {
- if {[instance_is_killed $type $id]} continue
- set pid [get_instance_attrib $type $id pid]
- set output {0 leaks}
- catch {exec leaks $pid} output
- if {[string match {*process does not exist*} $output] ||
- [string match {*cannot examine*} $output]} {
- # In a few tests we kill the server process.
- set output "0 leaks"
- } else {
- puts -nonewline "$type/$pid "
- flush stdout
- }
- if {![string match {*0 leaks*} $output]} {
- puts [colorstr red "=== MEMORY LEAK DETECTED ==="]
- puts "Instance type $type, ID $id:"
- puts $output
- puts "==="
- incr ::failed
- }
- }
- }
- puts ""
- }
- }
- # Execute all the units inside the 'tests' directory.
- proc run_tests {} {
- set tests [lsort [glob ../tests/*]]
- foreach test $tests {
- # Remove leaked_fds file before starting
- if {$::leaked_fds_file != "" && [file exists $::leaked_fds_file]} {
- file delete $::leaked_fds_file
- }
- if {$::run_matching ne {} && [string match $::run_matching $test] == 0} {
- continue
- }
- if {[file isdirectory $test]} continue
- puts [colorstr yellow "Testing unit: [lindex [file split $test] end]"]
- source $test
- check_leaks {redis sentinel}
- # Check if a leaked fds file was created and abort the test.
- if {$::leaked_fds_file != "" && [file exists $::leaked_fds_file]} {
- puts [colorstr red "ERROR: Sentinel has leaked fds to scripts:"]
- puts [exec cat $::leaked_fds_file]
- puts "----"
- incr ::failed
- }
- }
- }
- # Print a message and exists with 0 / 1 according to zero or more failures.
- proc end_tests {} {
- if {$::failed == 0 } {
- puts [colorstr green "GOOD! No errors."]
- exit 0
- } else {
- puts [colorstr red "WARNING $::failed test(s) failed."]
- exit 1
- }
- }
- # The "S" command is used to interact with the N-th Sentinel.
- # The general form is:
- #
- # S <sentinel-id> command arg arg arg ...
- #
- # Example to ping the Sentinel 0 (first instance): S 0 PING
- proc S {n args} {
- set s [lindex $::sentinel_instances $n]
- [dict get $s link] {*}$args
- }
- # Returns a Redis instance by index.
- # Example:
- # [Rn 0] info
- proc Rn {n} {
- return [dict get [lindex $::redis_instances $n] link]
- }
- # Like R but to chat with Redis instances.
- proc R {n args} {
- [Rn $n] {*}$args
- }
- proc get_info_field {info field} {
- set fl [string length $field]
- append field :
- foreach line [split $info "\n"] {
- set line [string trim $line "\r\n "]
- if {[string range $line 0 $fl] eq $field} {
- return [string range $line [expr {$fl+1}] end]
- }
- }
- return {}
- }
- proc SI {n field} {
- get_info_field [S $n info] $field
- }
- proc RI {n field} {
- get_info_field [R $n info] $field
- }
- proc RPort {n} {
- if {$::tls} {
- return [lindex [R $n config get tls-port] 1]
- } else {
- return [lindex [R $n config get port] 1]
- }
- }
- # Iterate over IDs of sentinel or redis instances.
- proc foreach_instance_id {instances idvar code} {
- upvar 1 $idvar id
- for {set id 0} {$id < [llength $instances]} {incr id} {
- set errcode [catch {uplevel 1 $code} result]
- if {$errcode == 1} {
- error $result $::errorInfo $::errorCode
- } elseif {$errcode == 4} {
- continue
- } elseif {$errcode == 3} {
- break
- } elseif {$errcode != 0} {
- return -code $errcode $result
- }
- }
- }
- proc foreach_sentinel_id {idvar code} {
- set errcode [catch {uplevel 1 [list foreach_instance_id $::sentinel_instances $idvar $code]} result]
- return -code $errcode $result
- }
- proc foreach_redis_id {idvar code} {
- set errcode [catch {uplevel 1 [list foreach_instance_id $::redis_instances $idvar $code]} result]
- return -code $errcode $result
- }
- # Get the specific attribute of the specified instance type, id.
- proc get_instance_attrib {type id attrib} {
- dict get [lindex [set ::${type}_instances] $id] $attrib
- }
- # Set the specific attribute of the specified instance type, id.
- proc set_instance_attrib {type id attrib newval} {
- set d [lindex [set ::${type}_instances] $id]
- dict set d $attrib $newval
- lset ::${type}_instances $id $d
- }
- # Create a master-slave cluster of the given number of total instances.
- # The first instance "0" is the master, all others are configured as
- # slaves.
- proc create_redis_master_slave_cluster n {
- foreach_redis_id id {
- if {$id == 0} {
- # Our master.
- R $id slaveof no one
- R $id flushall
- } elseif {$id < $n} {
- R $id slaveof [get_instance_attrib redis 0 host] \
- [get_instance_attrib redis 0 port]
- } else {
- # Instances not part of the cluster.
- R $id slaveof no one
- }
- }
- # Wait for all the slaves to sync.
- wait_for_condition 1000 50 {
- [RI 0 connected_slaves] == ($n-1)
- } else {
- fail "Unable to create a master-slaves cluster."
- }
- }
- proc get_instance_id_by_port {type port} {
- foreach_${type}_id id {
- if {[get_instance_attrib $type $id port] == $port} {
- return $id
- }
- }
- fail "Instance $type port $port not found."
- }
- # Kill an instance of the specified type/id with SIGKILL.
- # This function will mark the instance PID as -1 to remember that this instance
- # is no longer running and will remove its PID from the list of pids that
- # we kill at cleanup.
- #
- # The instance can be restarted with restart-instance.
- proc kill_instance {type id} {
- set pid [get_instance_attrib $type $id pid]
- set port [get_instance_attrib $type $id port]
- if {$pid == -1} {
- error "You tried to kill $type $id twice."
- }
- stop_instance $pid
- set_instance_attrib $type $id pid -1
- set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
- # Remove the PID from the list of pids to kill at exit.
- set ::pids [lsearch -all -inline -not -exact $::pids $pid]
- # Wait for the port it was using to be available again, so that's not
- # an issue to start a new server ASAP with the same port.
- set retry 100
- while {[incr retry -1]} {
- set port_is_free [catch {set s [socket 127.0.0.1 $port]}]
- if {$port_is_free} break
- catch {close $s}
- after 100
- }
- if {$retry == 0} {
- error "Port $port does not return available after killing instance."
- }
- }
- # Return true of the instance of the specified type/id is killed.
- proc instance_is_killed {type id} {
- set pid [get_instance_attrib $type $id pid]
- expr {$pid == -1}
- }
- # Restart an instance previously killed by kill_instance
- proc restart_instance {type id} {
- set dirname "${type}_${id}"
- set cfgfile [file join $dirname $type.conf]
- set port [get_instance_attrib $type $id port]
- # Execute the instance with its old setup and append the new pid
- # file for cleanup.
- set pid [exec_instance $type $dirname $cfgfile]
- set_instance_attrib $type $id pid $pid
- lappend ::pids $pid
- # Check that the instance is running
- if {[server_is_up 127.0.0.1 $port 100] == 0} {
- set logfile [file join $dirname log.txt]
- puts [exec tail $logfile]
- abort_sentinel_test "Problems starting $type #$id: ping timeout, maybe server start failed, check $logfile"
- }
- # Connect with it with a fresh link
- set link [redis 127.0.0.1 $port 0 $::tls]
- $link reconnect 1
- set_instance_attrib $type $id link $link
- # Make sure the instance is not loading the dataset when this
- # function returns.
- while 1 {
- catch {[$link ping]} retval
- if {[string match {*LOADING*} $retval]} {
- after 100
- continue
- } else {
- break
- }
- }
- }
- proc redis_deferring_client {type id} {
- set port [get_instance_attrib $type $id port]
- set host [get_instance_attrib $type $id host]
- set client [redis $host $port 1 $::tls]
- return $client
- }
- proc redis_client {type id} {
- set port [get_instance_attrib $type $id port]
- set host [get_instance_attrib $type $id host]
- set client [redis $host $port 0 $::tls]
- return $client
- }
|