psync2.tcl 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. proc show_cluster_status {} {
  2. uplevel 1 {
  3. # The following is the regexp we use to match the log line
  4. # time info. Logs are in the following form:
  5. #
  6. # 11296:M 25 May 2020 17:37:14.652 # Server initialized
  7. set log_regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*}
  8. set repl_regexp {(master|repl|sync|backlog|meaningful|offset)}
  9. puts "Master ID is $master_id"
  10. for {set j 0} {$j < 5} {incr j} {
  11. puts "$j: sync_full: [status $R($j) sync_full]"
  12. puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]"
  13. puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]"
  14. puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]"
  15. puts "$j: x var is : [$R($j) GET x]"
  16. puts "---"
  17. }
  18. # Show the replication logs of every instance, interleaving
  19. # them by the log date.
  20. #
  21. # First: load the lines as lists for each instance.
  22. array set log {}
  23. for {set j 0} {$j < 5} {incr j} {
  24. set fd [open $R_log($j)]
  25. while {[gets $fd l] >= 0} {
  26. if {[regexp $log_regexp $l] &&
  27. [regexp -nocase $repl_regexp $l]} {
  28. lappend log($j) $l
  29. }
  30. }
  31. close $fd
  32. }
  33. # To interleave the lines, at every step consume the element of
  34. # the list with the lowest time and remove it. Do it until
  35. # all the lists are empty.
  36. #
  37. # regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} $l - logdate
  38. while 1 {
  39. # Find the log with smallest time.
  40. set empty 0
  41. set best 0
  42. set bestdate {}
  43. for {set j 0} {$j < 5} {incr j} {
  44. if {[llength $log($j)] == 0} {
  45. incr empty
  46. continue
  47. }
  48. regexp $log_regexp [lindex $log($j) 0] - date
  49. if {$bestdate eq {}} {
  50. set best $j
  51. set bestdate $date
  52. } else {
  53. if {[string compare $bestdate $date] > 0} {
  54. set best $j
  55. set bestdate $date
  56. }
  57. }
  58. }
  59. if {$empty == 5} break ; # Our exit condition: no more logs
  60. # Emit the one with the smallest time (that is the first
  61. # event in the time line).
  62. puts "\[$best port $R_port($best)\] [lindex $log($best) 0]"
  63. set log($best) [lrange $log($best) 1 end]
  64. }
  65. }
  66. }
  67. start_server {tags {"psync2"}} {
  68. start_server {} {
  69. start_server {} {
  70. start_server {} {
  71. start_server {} {
  72. set master_id 0 ; # Current master
  73. set start_time [clock seconds] ; # Test start time
  74. set counter_value 0 ; # Current value of the Redis counter "x"
  75. # Config
  76. set debug_msg 0 ; # Enable additional debug messages
  77. set no_exit 0 ; # Do not exit at end of the test
  78. set duration 40 ; # Total test seconds
  79. set genload 1 ; # Load master with writes at every cycle
  80. set genload_time 5000 ; # Writes duration time in ms
  81. set disconnect 1 ; # Break replication link between random
  82. # master and slave instances while the
  83. # master is loaded with writes.
  84. set disconnect_period 1000 ; # Disconnect repl link every N ms.
  85. for {set j 0} {$j < 5} {incr j} {
  86. set R($j) [srv [expr 0-$j] client]
  87. set R_host($j) [srv [expr 0-$j] host]
  88. set R_port($j) [srv [expr 0-$j] port]
  89. set R_log($j) [srv [expr 0-$j] stdout]
  90. if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
  91. }
  92. set cycle 1
  93. while {([clock seconds]-$start_time) < $duration} {
  94. test "PSYNC2: --- CYCLE $cycle ---" {}
  95. incr cycle
  96. # Create a random replication layout.
  97. # Start with switching master (this simulates a failover).
  98. # 1) Select the new master.
  99. set master_id [randomInt 5]
  100. set used [list $master_id]
  101. test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" {
  102. $R($master_id) slaveof no one
  103. $R($master_id) config set repl-ping-replica-period 1 ;# increse the chance that random ping will cause issues
  104. if {$counter_value == 0} {
  105. $R($master_id) set x $counter_value
  106. }
  107. }
  108. # 2) Attach all the slaves to a random instance
  109. while {[llength $used] != 5} {
  110. while 1 {
  111. set slave_id [randomInt 5]
  112. if {[lsearch -exact $used $slave_id] == -1} break
  113. }
  114. set rand [randomInt [llength $used]]
  115. set mid [lindex $used $rand]
  116. set master_host $R_host($mid)
  117. set master_port $R_port($mid)
  118. test "PSYNC2: Set #$slave_id to replicate from #$mid" {
  119. $R($slave_id) slaveof $master_host $master_port
  120. }
  121. lappend used $slave_id
  122. }
  123. # Wait for replicas to sync. so next loop won't get -LOADING error
  124. wait_for_condition 50 1000 {
  125. [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
  126. [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
  127. [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
  128. [status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
  129. } else {
  130. show_cluster_status
  131. fail "Replica not reconnecting"
  132. }
  133. # 3) Increment the counter and wait for all the instances
  134. # to converge.
  135. test "PSYNC2: cluster is consistent after failover" {
  136. $R($master_id) incr x; incr counter_value
  137. for {set j 0} {$j < 5} {incr j} {
  138. wait_for_condition 50 1000 {
  139. [$R($j) get x] == $counter_value
  140. } else {
  141. show_cluster_status
  142. fail "Instance #$j x variable is inconsistent"
  143. }
  144. }
  145. }
  146. # 4) Generate load while breaking the connection of random
  147. # slave-master pairs.
  148. test "PSYNC2: generate load while killing replication links" {
  149. set t [clock milliseconds]
  150. set next_break [expr {$t+$disconnect_period}]
  151. while {[clock milliseconds]-$t < $genload_time} {
  152. if {$genload} {
  153. $R($master_id) incr x; incr counter_value
  154. }
  155. if {[clock milliseconds] == $next_break} {
  156. set next_break \
  157. [expr {[clock milliseconds]+$disconnect_period}]
  158. set slave_id [randomInt 5]
  159. if {$disconnect} {
  160. $R($slave_id) client kill type master
  161. if {$debug_msg} {
  162. puts "+++ Breaking link for replica #$slave_id"
  163. }
  164. }
  165. }
  166. }
  167. }
  168. # 5) Increment the counter and wait for all the instances
  169. set x [$R($master_id) get x]
  170. test "PSYNC2: cluster is consistent after load (x = $x)" {
  171. for {set j 0} {$j < 5} {incr j} {
  172. wait_for_condition 50 1000 {
  173. [$R($j) get x] == $counter_value
  174. } else {
  175. show_cluster_status
  176. fail "Instance #$j x variable is inconsistent"
  177. }
  178. }
  179. }
  180. # wait for all the slaves to be in sync.
  181. set masteroff [status $R($master_id) master_repl_offset]
  182. wait_for_condition 500 100 {
  183. [status $R(0) master_repl_offset] >= $masteroff &&
  184. [status $R(1) master_repl_offset] >= $masteroff &&
  185. [status $R(2) master_repl_offset] >= $masteroff &&
  186. [status $R(3) master_repl_offset] >= $masteroff &&
  187. [status $R(4) master_repl_offset] >= $masteroff
  188. } else {
  189. show_cluster_status
  190. fail "Replicas offsets didn't catch up with the master after too long time."
  191. }
  192. if {$debug_msg} {
  193. show_cluster_status
  194. }
  195. test "PSYNC2: total sum of full synchronizations is exactly 4" {
  196. set sum 0
  197. for {set j 0} {$j < 5} {incr j} {
  198. incr sum [status $R($j) sync_full]
  199. }
  200. if {$sum != 4} {
  201. show_cluster_status
  202. assert {$sum == 4}
  203. }
  204. }
  205. # In absence of pings, are the instances really able to have
  206. # the exact same offset?
  207. $R($master_id) config set repl-ping-replica-period 3600
  208. wait_for_condition 500 100 {
  209. [status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] &&
  210. [status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] &&
  211. [status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] &&
  212. [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] &&
  213. [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset]
  214. } else {
  215. show_cluster_status
  216. fail "Replicas and master offsets were unable to match *exactly*."
  217. }
  218. # Limit anyway the maximum number of cycles. This is useful when the
  219. # test is skipped via --only option of the test suite. In that case
  220. # we don't want to see many seconds of this test being just skipped.
  221. if {$cycle > 50} break
  222. }
  223. test "PSYNC2: Bring the master back again for next test" {
  224. $R($master_id) slaveof no one
  225. set master_host $R_host($master_id)
  226. set master_port $R_port($master_id)
  227. for {set j 0} {$j < 5} {incr j} {
  228. if {$j == $master_id} continue
  229. $R($j) slaveof $master_host $master_port
  230. }
  231. # Wait for replicas to sync. it is not enough to just wait for connected_slaves==4
  232. # since we might do the check before the master realized that they're disconnected
  233. wait_for_condition 50 1000 {
  234. [status $R($master_id) connected_slaves] == 4 &&
  235. [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
  236. [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
  237. [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
  238. [status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
  239. } else {
  240. show_cluster_status
  241. fail "Replica not reconnecting"
  242. }
  243. }
  244. test "PSYNC2: Partial resync after restart using RDB aux fields" {
  245. # Pick a random slave
  246. set slave_id [expr {($master_id+1)%5}]
  247. set sync_count [status $R($master_id) sync_full]
  248. set sync_partial [status $R($master_id) sync_partial_ok]
  249. set sync_partial_err [status $R($master_id) sync_partial_err]
  250. catch {
  251. $R($slave_id) config rewrite
  252. $R($slave_id) debug restart
  253. }
  254. # note: just waiting for connected_slaves==4 has a race condition since
  255. # we might do the check before the master realized that the slave disconnected
  256. wait_for_condition 50 1000 {
  257. [status $R($master_id) sync_partial_ok] == $sync_partial + 1
  258. } else {
  259. puts "prev sync_full: $sync_count"
  260. puts "prev sync_partial_ok: $sync_partial"
  261. puts "prev sync_partial_err: $sync_partial_err"
  262. puts [$R($master_id) info stats]
  263. show_cluster_status
  264. fail "Replica didn't partial sync"
  265. }
  266. set new_sync_count [status $R($master_id) sync_full]
  267. assert {$sync_count == $new_sync_count}
  268. }
  269. test "PSYNC2: Replica RDB restart with EVALSHA in backlog issue #4483" {
  270. # Pick a random slave
  271. set slave_id [expr {($master_id+1)%5}]
  272. set sync_count [status $R($master_id) sync_full]
  273. # Make sure to replicate the first EVAL while the salve is online
  274. # so that it's part of the scripts the master believes it's safe
  275. # to propagate as EVALSHA.
  276. $R($master_id) EVAL {return redis.call("incr","__mycounter")} 0
  277. $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
  278. # Wait for the two to sync
  279. wait_for_condition 50 1000 {
  280. [$R($master_id) debug digest] == [$R($slave_id) debug digest]
  281. } else {
  282. show_cluster_status
  283. fail "Replica not reconnecting"
  284. }
  285. # Prevent the slave from receiving master updates, and at
  286. # the same time send a new script several times to the
  287. # master, so that we'll end with EVALSHA into the backlog.
  288. $R($slave_id) slaveof 127.0.0.1 0
  289. $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
  290. $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
  291. $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
  292. catch {
  293. $R($slave_id) config rewrite
  294. $R($slave_id) debug restart
  295. }
  296. # Reconfigure the slave correctly again, when it's back online.
  297. set retry 50
  298. while {$retry} {
  299. if {[catch {
  300. $R($slave_id) slaveof $master_host $master_port
  301. }]} {
  302. after 1000
  303. } else {
  304. break
  305. }
  306. incr retry -1
  307. }
  308. # The master should be back at 4 slaves eventually
  309. wait_for_condition 50 1000 {
  310. [status $R($master_id) connected_slaves] == 4
  311. } else {
  312. show_cluster_status
  313. fail "Replica not reconnecting"
  314. }
  315. set new_sync_count [status $R($master_id) sync_full]
  316. assert {$sync_count == $new_sync_count}
  317. # However if the slave started with the full state of the
  318. # scripting engine, we should now have the same digest.
  319. wait_for_condition 50 1000 {
  320. [$R($master_id) debug digest] == [$R($slave_id) debug digest]
  321. } else {
  322. show_cluster_status
  323. fail "Debug digest mismatch between master and replica in post-restart handshake"
  324. }
  325. }
  326. if {$no_exit} {
  327. while 1 { puts -nonewline .; flush stdout; after 1000}
  328. }
  329. }}}}}