server.tcl 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689
  1. set ::global_overrides {}
  2. set ::tags {}
  3. set ::valgrind_errors {}
  4. proc start_server_error {config_file error} {
  5. set err {}
  6. append err "Can't start the Redis server\n"
  7. append err "CONFIGURATION:"
  8. append err [exec cat $config_file]
  9. append err "\nERROR:"
  10. append err [string trim $error]
  11. send_data_packet $::test_server_fd err $err
  12. }
  13. proc check_valgrind_errors stderr {
  14. set res [find_valgrind_errors $stderr true]
  15. if {$res != ""} {
  16. send_data_packet $::test_server_fd err "Valgrind error: $res\n"
  17. }
  18. }
  19. proc clean_persistence config {
  20. # we may wanna keep the logs for later, but let's clean the persistence
  21. # files right away, since they can accumulate and take up a lot of space
  22. set config [dict get $config "config"]
  23. set rdb [format "%s/%s" [dict get $config "dir"] "dump.rdb"]
  24. set aof [format "%s/%s" [dict get $config "dir"] "appendonly.aof"]
  25. catch {exec rm -rf $rdb}
  26. catch {exec rm -rf $aof}
  27. }
  28. proc kill_server config {
  29. # nothing to kill when running against external server
  30. if {$::external} return
  31. # Close client connection if exists
  32. if {[dict exists $config "client"]} {
  33. [dict get $config "client"] close
  34. }
  35. # nevermind if its already dead
  36. if {![is_alive $config]} {
  37. # Check valgrind errors if needed
  38. if {$::valgrind} {
  39. check_valgrind_errors [dict get $config stderr]
  40. }
  41. return
  42. }
  43. set pid [dict get $config pid]
  44. # check for leaks
  45. if {![dict exists $config "skipleaks"]} {
  46. catch {
  47. if {[string match {*Darwin*} [exec uname -a]]} {
  48. tags {"leaks"} {
  49. test "Check for memory leaks (pid $pid)" {
  50. set output {0 leaks}
  51. catch {exec leaks $pid} output option
  52. # In a few tests we kill the server process, so leaks will not find it.
  53. # It'll exits with exit code >1 on error, so we ignore these.
  54. if {[dict exists $option -errorcode]} {
  55. set details [dict get $option -errorcode]
  56. if {[lindex $details 0] eq "CHILDSTATUS"} {
  57. set status [lindex $details 2]
  58. if {$status > 1} {
  59. set output "0 leaks"
  60. }
  61. }
  62. }
  63. set output
  64. } {*0 leaks*}
  65. }
  66. }
  67. }
  68. }
  69. # kill server and wait for the process to be totally exited
  70. send_data_packet $::test_server_fd server-killing $pid
  71. catch {exec kill $pid}
  72. # Node might have been stopped in the test
  73. catch {exec kill -SIGCONT $pid}
  74. if {$::valgrind} {
  75. set max_wait 120000
  76. } else {
  77. set max_wait 10000
  78. }
  79. while {[is_alive $config]} {
  80. incr wait 10
  81. if {$wait == $max_wait} {
  82. puts "Forcing process $pid to crash..."
  83. catch {exec kill -SEGV $pid}
  84. } elseif {$wait >= $max_wait * 2} {
  85. puts "Forcing process $pid to exit..."
  86. catch {exec kill -KILL $pid}
  87. } elseif {$wait % 1000 == 0} {
  88. puts "Waiting for process $pid to exit..."
  89. }
  90. after 10
  91. }
  92. # Check valgrind errors if needed
  93. if {$::valgrind} {
  94. check_valgrind_errors [dict get $config stderr]
  95. }
  96. # Remove this pid from the set of active pids in the test server.
  97. send_data_packet $::test_server_fd server-killed $pid
  98. }
  99. proc is_alive config {
  100. set pid [dict get $config pid]
  101. if {[catch {exec kill -0 $pid} err]} {
  102. return 0
  103. } else {
  104. return 1
  105. }
  106. }
  107. proc ping_server {host port} {
  108. set retval 0
  109. if {[catch {
  110. if {$::tls} {
  111. set fd [::tls::socket $host $port]
  112. } else {
  113. set fd [socket $host $port]
  114. }
  115. fconfigure $fd -translation binary
  116. puts $fd "PING\r\n"
  117. flush $fd
  118. set reply [gets $fd]
  119. if {[string range $reply 0 0] eq {+} ||
  120. [string range $reply 0 0] eq {-}} {
  121. set retval 1
  122. }
  123. close $fd
  124. } e]} {
  125. if {$::verbose} {
  126. puts -nonewline "."
  127. }
  128. } else {
  129. if {$::verbose} {
  130. puts -nonewline "ok"
  131. }
  132. }
  133. return $retval
  134. }
  135. # Return 1 if the server at the specified addr is reachable by PING, otherwise
  136. # returns 0. Performs a try every 50 milliseconds for the specified number
  137. # of retries.
  138. proc server_is_up {host port retrynum} {
  139. after 10 ;# Use a small delay to make likely a first-try success.
  140. set retval 0
  141. while {[incr retrynum -1]} {
  142. if {[catch {ping_server $host $port} ping]} {
  143. set ping 0
  144. }
  145. if {$ping} {return 1}
  146. after 50
  147. }
  148. return 0
  149. }
  150. # Check if current ::tags match requested tags. If ::allowtags are used,
  151. # there must be some intersection. If ::denytags are used, no intersection
  152. # is allowed. Returns 1 if tags are acceptable or 0 otherwise, in which
  153. # case err_return names a return variable for the message to be logged.
  154. proc tags_acceptable {tags err_return} {
  155. upvar $err_return err
  156. # If tags are whitelisted, make sure there's match
  157. if {[llength $::allowtags] > 0} {
  158. set matched 0
  159. foreach tag $::allowtags {
  160. if {[lsearch $tags $tag] >= 0} {
  161. incr matched
  162. }
  163. }
  164. if {$matched < 1} {
  165. set err "Tag: none of the tags allowed"
  166. return 0
  167. }
  168. }
  169. foreach tag $::denytags {
  170. if {[lsearch $tags $tag] >= 0} {
  171. set err "Tag: $tag denied"
  172. return 0
  173. }
  174. }
  175. if {$::external && [lsearch $tags "external:skip"] >= 0} {
  176. set err "Not supported on external server"
  177. return 0
  178. }
  179. if {$::singledb && [lsearch $tags "singledb:skip"] >= 0} {
  180. set err "Not supported on singledb"
  181. return 0
  182. }
  183. if {$::cluster_mode && [lsearch $tags "cluster:skip"] >= 0} {
  184. set err "Not supported in cluster mode"
  185. return 0
  186. }
  187. if {$::tls && [lsearch $tags "tls:skip"] >= 0} {
  188. set err "Not supported in tls mode"
  189. return 0
  190. }
  191. return 1
  192. }
  193. # doesn't really belong here, but highly coupled to code in start_server
  194. proc tags {tags code} {
  195. # If we 'tags' contain multiple tags, quoted and separated by spaces,
  196. # we want to get rid of the quotes in order to have a proper list
  197. set tags [string map { \" "" } $tags]
  198. set ::tags [concat $::tags $tags]
  199. if {![tags_acceptable $::tags err]} {
  200. incr ::num_aborted
  201. send_data_packet $::test_server_fd ignore $err
  202. set ::tags [lrange $::tags 0 end-[llength $tags]]
  203. return
  204. }
  205. uplevel 1 $code
  206. set ::tags [lrange $::tags 0 end-[llength $tags]]
  207. }
  208. # Write the configuration in the dictionary 'config' in the specified
  209. # file name.
  210. proc create_server_config_file {filename config} {
  211. set fp [open $filename w+]
  212. foreach directive [dict keys $config] {
  213. puts -nonewline $fp "$directive "
  214. puts $fp [dict get $config $directive]
  215. }
  216. close $fp
  217. }
  218. proc spawn_server {config_file stdout stderr} {
  219. if {$::valgrind} {
  220. set pid [exec valgrind --track-origins=yes --trace-children=yes --suppressions=[pwd]/src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file >> $stdout 2>> $stderr &]
  221. } elseif ($::stack_logging) {
  222. set pid [exec /usr/bin/env MallocStackLogging=1 MallocLogFile=/tmp/malloc_log.txt src/redis-server $config_file >> $stdout 2>> $stderr &]
  223. } else {
  224. set pid [exec src/redis-server $config_file >> $stdout 2>> $stderr &]
  225. }
  226. if {$::wait_server} {
  227. set msg "server started PID: $pid. press any key to continue..."
  228. puts $msg
  229. read stdin 1
  230. }
  231. # Tell the test server about this new instance.
  232. send_data_packet $::test_server_fd server-spawned $pid
  233. return $pid
  234. }
  235. # Wait for actual startup, return 1 if port is busy, 0 otherwise
  236. proc wait_server_started {config_file stdout pid} {
  237. set checkperiod 100; # Milliseconds
  238. set maxiter [expr {120*1000/$checkperiod}] ; # Wait up to 2 minutes.
  239. set port_busy 0
  240. while 1 {
  241. if {[regexp -- " PID: $pid" [exec cat $stdout]]} {
  242. break
  243. }
  244. after $checkperiod
  245. incr maxiter -1
  246. if {$maxiter == 0} {
  247. start_server_error $config_file "No PID detected in log $stdout"
  248. puts "--- LOG CONTENT ---"
  249. puts [exec cat $stdout]
  250. puts "-------------------"
  251. break
  252. }
  253. # Check if the port is actually busy and the server failed
  254. # for this reason.
  255. if {[regexp {Failed listening on port} [exec cat $stdout]]} {
  256. set port_busy 1
  257. break
  258. }
  259. }
  260. return $port_busy
  261. }
  262. proc dump_server_log {srv} {
  263. set pid [dict get $srv "pid"]
  264. puts "\n===== Start of server log (pid $pid) =====\n"
  265. puts [exec cat [dict get $srv "stdout"]]
  266. puts "===== End of server log (pid $pid) =====\n"
  267. }
  268. proc run_external_server_test {code overrides} {
  269. set srv {}
  270. dict set srv "host" $::host
  271. dict set srv "port" $::port
  272. set client [redis $::host $::port 0 $::tls]
  273. dict set srv "client" $client
  274. if {!$::singledb} {
  275. $client select 9
  276. }
  277. set config {}
  278. dict set config "port" $::port
  279. dict set srv "config" $config
  280. # append the server to the stack
  281. lappend ::servers $srv
  282. if {[llength $::servers] > 1} {
  283. if {$::verbose} {
  284. puts "Notice: nested start_server statements in external server mode, test must be aware of that!"
  285. }
  286. }
  287. r flushall
  288. # store overrides
  289. set saved_config {}
  290. foreach {param val} $overrides {
  291. dict set saved_config $param [lindex [r config get $param] 1]
  292. r config set $param $val
  293. # If we enable appendonly, wait for for rewrite to complete. This is
  294. # required for tests that begin with a bg* command which will fail if
  295. # the rewriteaof operation is not completed at this point.
  296. if {$param == "appendonly" && $val == "yes"} {
  297. waitForBgrewriteaof r
  298. }
  299. }
  300. if {[catch {set retval [uplevel 2 $code]} error]} {
  301. if {$::durable} {
  302. set msg [string range $error 10 end]
  303. lappend details $msg
  304. lappend details $::errorInfo
  305. lappend ::tests_failed $details
  306. incr ::num_failed
  307. send_data_packet $::test_server_fd err [join $details "\n"]
  308. } else {
  309. # Re-raise, let handler up the stack take care of this.
  310. error $error $::errorInfo
  311. }
  312. }
  313. # restore overrides
  314. dict for {param val} $saved_config {
  315. r config set $param $val
  316. }
  317. lpop ::servers
  318. }
  319. proc start_server {options {code undefined}} {
  320. # setup defaults
  321. set baseconfig "default.conf"
  322. set overrides {}
  323. set omit {}
  324. set tags {}
  325. set keep_persistence false
  326. # parse options
  327. foreach {option value} $options {
  328. switch $option {
  329. "config" {
  330. set baseconfig $value
  331. }
  332. "overrides" {
  333. set overrides $value
  334. }
  335. "omit" {
  336. set omit $value
  337. }
  338. "tags" {
  339. # If we 'tags' contain multiple tags, quoted and separated by spaces,
  340. # we want to get rid of the quotes in order to have a proper list
  341. set tags [string map { \" "" } $value]
  342. set ::tags [concat $::tags $tags]
  343. }
  344. "keep_persistence" {
  345. set keep_persistence $value
  346. }
  347. default {
  348. error "Unknown option $option"
  349. }
  350. }
  351. }
  352. # We skip unwanted tags
  353. if {![tags_acceptable $::tags err]} {
  354. incr ::num_aborted
  355. send_data_packet $::test_server_fd ignore $err
  356. set ::tags [lrange $::tags 0 end-[llength $tags]]
  357. return
  358. }
  359. # If we are running against an external server, we just push the
  360. # host/port pair in the stack the first time
  361. if {$::external} {
  362. run_external_server_test $code $overrides
  363. set ::tags [lrange $::tags 0 end-[llength $tags]]
  364. return
  365. }
  366. set data [split [exec cat "tests/assets/$baseconfig"] "\n"]
  367. set config {}
  368. if {$::tls} {
  369. dict set config "tls-cert-file" [format "%s/tests/tls/server.crt" [pwd]]
  370. dict set config "tls-key-file" [format "%s/tests/tls/server.key" [pwd]]
  371. dict set config "tls-client-cert-file" [format "%s/tests/tls/client.crt" [pwd]]
  372. dict set config "tls-client-key-file" [format "%s/tests/tls/client.key" [pwd]]
  373. dict set config "tls-dh-params-file" [format "%s/tests/tls/redis.dh" [pwd]]
  374. dict set config "tls-ca-cert-file" [format "%s/tests/tls/ca.crt" [pwd]]
  375. dict set config "loglevel" "debug"
  376. }
  377. foreach line $data {
  378. if {[string length $line] > 0 && [string index $line 0] ne "#"} {
  379. set elements [split $line " "]
  380. set directive [lrange $elements 0 0]
  381. set arguments [lrange $elements 1 end]
  382. dict set config $directive $arguments
  383. }
  384. }
  385. # use a different directory every time a server is started
  386. dict set config dir [tmpdir server]
  387. # start every server on a different port
  388. set port [find_available_port $::baseport $::portcount]
  389. if {$::tls} {
  390. dict set config "port" 0
  391. dict set config "tls-port" $port
  392. dict set config "tls-cluster" "yes"
  393. dict set config "tls-replication" "yes"
  394. } else {
  395. dict set config port $port
  396. }
  397. set unixsocket [file normalize [format "%s/%s" [dict get $config "dir"] "socket"]]
  398. dict set config "unixsocket" $unixsocket
  399. # apply overrides from global space and arguments
  400. foreach {directive arguments} [concat $::global_overrides $overrides] {
  401. dict set config $directive $arguments
  402. }
  403. # remove directives that are marked to be omitted
  404. foreach directive $omit {
  405. dict unset config $directive
  406. }
  407. # write new configuration to temporary file
  408. set config_file [tmpfile redis.conf]
  409. create_server_config_file $config_file $config
  410. set stdout [format "%s/%s" [dict get $config "dir"] "stdout"]
  411. set stderr [format "%s/%s" [dict get $config "dir"] "stderr"]
  412. # if we're inside a test, write the test name to the server log file
  413. if {[info exists ::cur_test]} {
  414. set fd [open $stdout "a+"]
  415. puts $fd "### Starting server for test $::cur_test"
  416. close $fd
  417. }
  418. # We need a loop here to retry with different ports.
  419. set server_started 0
  420. while {$server_started == 0} {
  421. if {$::verbose} {
  422. puts -nonewline "=== ($tags) Starting server ${::host}:${port} "
  423. }
  424. send_data_packet $::test_server_fd "server-spawning" "port $port"
  425. set pid [spawn_server $config_file $stdout $stderr]
  426. # check that the server actually started
  427. set port_busy [wait_server_started $config_file $stdout $pid]
  428. # Sometimes we have to try a different port, even if we checked
  429. # for availability. Other test clients may grab the port before we
  430. # are able to do it for example.
  431. if {$port_busy} {
  432. puts "Port $port was already busy, trying another port..."
  433. set port [find_available_port $::baseport $::portcount]
  434. if {$::tls} {
  435. dict set config "tls-port" $port
  436. } else {
  437. dict set config port $port
  438. }
  439. create_server_config_file $config_file $config
  440. # Truncate log so wait_server_started will not be looking at
  441. # output of the failed server.
  442. close [open $stdout "w"]
  443. continue; # Try again
  444. }
  445. if {$::valgrind} {set retrynum 1000} else {set retrynum 100}
  446. if {$code ne "undefined"} {
  447. set serverisup [server_is_up $::host $port $retrynum]
  448. } else {
  449. set serverisup 1
  450. }
  451. if {$::verbose} {
  452. puts ""
  453. }
  454. if {!$serverisup} {
  455. set err {}
  456. append err [exec cat $stdout] "\n" [exec cat $stderr]
  457. start_server_error $config_file $err
  458. return
  459. }
  460. set server_started 1
  461. }
  462. # setup properties to be able to initialize a client object
  463. set port_param [expr $::tls ? {"tls-port"} : {"port"}]
  464. set host $::host
  465. if {[dict exists $config bind]} { set host [dict get $config bind] }
  466. if {[dict exists $config $port_param]} { set port [dict get $config $port_param] }
  467. # setup config dict
  468. dict set srv "config_file" $config_file
  469. dict set srv "config" $config
  470. dict set srv "pid" $pid
  471. dict set srv "host" $host
  472. dict set srv "port" $port
  473. dict set srv "stdout" $stdout
  474. dict set srv "stderr" $stderr
  475. dict set srv "unixsocket" $unixsocket
  476. # if a block of code is supplied, we wait for the server to become
  477. # available, create a client object and kill the server afterwards
  478. if {$code ne "undefined"} {
  479. set line [exec head -n1 $stdout]
  480. if {[string match {*already in use*} $line]} {
  481. error_and_quit $config_file $line
  482. }
  483. while 1 {
  484. # check that the server actually started and is ready for connections
  485. if {[count_message_lines $stdout "Ready to accept"] > 0} {
  486. break
  487. }
  488. after 10
  489. }
  490. # append the server to the stack
  491. lappend ::servers $srv
  492. # connect client (after server dict is put on the stack)
  493. reconnect
  494. # remember previous num_failed to catch new errors
  495. set prev_num_failed $::num_failed
  496. # execute provided block
  497. set num_tests $::num_tests
  498. if {[catch { uplevel 1 $code } error]} {
  499. set backtrace $::errorInfo
  500. set assertion [string match "assertion:*" $error]
  501. # fetch srv back from the server list, in case it was restarted by restart_server (new PID)
  502. set srv [lindex $::servers end]
  503. # pop the server object
  504. set ::servers [lrange $::servers 0 end-1]
  505. # Kill the server without checking for leaks
  506. dict set srv "skipleaks" 1
  507. kill_server $srv
  508. if {$::dump_logs && $assertion} {
  509. # if we caught an assertion ($::num_failed isn't incremented yet)
  510. # this happens when the test spawns a server and not the other way around
  511. dump_server_log $srv
  512. } else {
  513. # Print crash report from log
  514. set crashlog [crashlog_from_file [dict get $srv "stdout"]]
  515. if {[string length $crashlog] > 0} {
  516. puts [format "\nLogged crash report (pid %d):" [dict get $srv "pid"]]
  517. puts "$crashlog"
  518. puts ""
  519. }
  520. }
  521. if {!$assertion && $::durable} {
  522. # durable is meant to prevent the whole tcl test from exiting on
  523. # an exception. an assertion will be caught by the test proc.
  524. set msg [string range $error 10 end]
  525. lappend details $msg
  526. lappend details $backtrace
  527. lappend ::tests_failed $details
  528. incr ::num_failed
  529. send_data_packet $::test_server_fd err [join $details "\n"]
  530. } else {
  531. # Re-raise, let handler up the stack take care of this.
  532. error $error $backtrace
  533. }
  534. } else {
  535. if {$::dump_logs && $prev_num_failed != $::num_failed} {
  536. dump_server_log $srv
  537. }
  538. }
  539. # fetch srv back from the server list, in case it was restarted by restart_server (new PID)
  540. set srv [lindex $::servers end]
  541. # Don't do the leak check when no tests were run
  542. if {$num_tests == $::num_tests} {
  543. dict set srv "skipleaks" 1
  544. }
  545. # pop the server object
  546. set ::servers [lrange $::servers 0 end-1]
  547. set ::tags [lrange $::tags 0 end-[llength $tags]]
  548. kill_server $srv
  549. if {!$keep_persistence} {
  550. clean_persistence $srv
  551. }
  552. set _ ""
  553. } else {
  554. set ::tags [lrange $::tags 0 end-[llength $tags]]
  555. set _ $srv
  556. }
  557. }
  558. proc restart_server {level wait_ready rotate_logs {reconnect 1}} {
  559. set srv [lindex $::servers end+$level]
  560. kill_server $srv
  561. # Remove the default client from the server
  562. dict unset srv "client"
  563. set pid [dict get $srv "pid"]
  564. set stdout [dict get $srv "stdout"]
  565. set stderr [dict get $srv "stderr"]
  566. if {$rotate_logs} {
  567. set ts [clock format [clock seconds] -format %y%m%d%H%M%S]
  568. file rename $stdout $stdout.$ts.$pid
  569. file rename $stderr $stderr.$ts.$pid
  570. }
  571. set prev_ready_count [count_message_lines $stdout "Ready to accept"]
  572. # if we're inside a test, write the test name to the server log file
  573. if {[info exists ::cur_test]} {
  574. set fd [open $stdout "a+"]
  575. puts $fd "### Restarting server for test $::cur_test"
  576. close $fd
  577. }
  578. set config_file [dict get $srv "config_file"]
  579. set pid [spawn_server $config_file $stdout $stderr]
  580. # check that the server actually started
  581. wait_server_started $config_file $stdout $pid
  582. # update the pid in the servers list
  583. dict set srv "pid" $pid
  584. # re-set $srv in the servers list
  585. lset ::servers end+$level $srv
  586. if {$wait_ready} {
  587. while 1 {
  588. # check that the server actually started and is ready for connections
  589. if {[count_message_lines $stdout "Ready to accept"] > $prev_ready_count} {
  590. break
  591. }
  592. after 10
  593. }
  594. }
  595. if {$reconnect} {
  596. reconnect $level
  597. }
  598. }