2
0

psync2.tcl 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. proc show_cluster_status {} {
  2. uplevel 1 {
  3. # The following is the regexp we use to match the log line
  4. # time info. Logs are in the following form:
  5. #
  6. # 11296:M 25 May 2020 17:37:14.652 # Server initialized
  7. set log_regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*}
  8. set repl_regexp {(master|repl|sync|backlog|meaningful|offset)}
  9. puts "Master ID is $master_id"
  10. for {set j 0} {$j < 5} {incr j} {
  11. puts "$j: sync_full: [status $R($j) sync_full]"
  12. puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]"
  13. puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]"
  14. puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]"
  15. puts "$j: x var is : [$R($j) GET x]"
  16. puts "---"
  17. }
  18. # Show the replication logs of every instance, interleaving
  19. # them by the log date.
  20. #
  21. # First: load the lines as lists for each instance.
  22. array set log {}
  23. for {set j 0} {$j < 5} {incr j} {
  24. set fd [open $R_log($j)]
  25. while {[gets $fd l] >= 0} {
  26. if {[regexp $log_regexp $l] &&
  27. [regexp -nocase $repl_regexp $l]} {
  28. lappend log($j) $l
  29. }
  30. }
  31. close $fd
  32. }
  33. # To interleave the lines, at every step consume the element of
  34. # the list with the lowest time and remove it. Do it until
  35. # all the lists are empty.
  36. #
  37. # regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} $l - logdate
  38. while 1 {
  39. # Find the log with smallest time.
  40. set empty 0
  41. set best 0
  42. set bestdate {}
  43. for {set j 0} {$j < 5} {incr j} {
  44. if {[llength $log($j)] == 0} {
  45. incr empty
  46. continue
  47. }
  48. regexp $log_regexp [lindex $log($j) 0] - date
  49. if {$bestdate eq {}} {
  50. set best $j
  51. set bestdate $date
  52. } else {
  53. if {[string compare $bestdate $date] > 0} {
  54. set best $j
  55. set bestdate $date
  56. }
  57. }
  58. }
  59. if {$empty == 5} break ; # Our exit condition: no more logs
  60. # Emit the one with the smallest time (that is the first
  61. # event in the time line).
  62. puts "\[$best port $R_port($best)\] [lindex $log($best) 0]"
  63. set log($best) [lrange $log($best) 1 end]
  64. }
  65. }
  66. }
  67. start_server {tags {"psync2 external:skip"}} {
  68. start_server {} {
  69. start_server {} {
  70. start_server {} {
  71. start_server {} {
  72. set master_id 0 ; # Current master
  73. set start_time [clock seconds] ; # Test start time
  74. set counter_value 0 ; # Current value of the Redis counter "x"
  75. # Config
  76. set debug_msg 0 ; # Enable additional debug messages
  77. set no_exit 0 ; # Do not exit at end of the test
  78. set duration 40 ; # Total test seconds
  79. set genload 1 ; # Load master with writes at every cycle
  80. set genload_time 5000 ; # Writes duration time in ms
  81. set disconnect 1 ; # Break replication link between random
  82. # master and slave instances while the
  83. # master is loaded with writes.
  84. set disconnect_period 1000 ; # Disconnect repl link every N ms.
  85. for {set j 0} {$j < 5} {incr j} {
  86. set R($j) [srv [expr 0-$j] client]
  87. set R_host($j) [srv [expr 0-$j] host]
  88. set R_port($j) [srv [expr 0-$j] port]
  89. set R_id_from_port($R_port($j)) $j ;# To get a replica index by port
  90. set R_log($j) [srv [expr 0-$j] stdout]
  91. if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
  92. }
  93. set cycle 0
  94. while {([clock seconds]-$start_time) < $duration} {
  95. incr cycle
  96. test "PSYNC2: --- CYCLE $cycle ---" {}
  97. # Create a random replication layout.
  98. # Start with switching master (this simulates a failover).
  99. # 1) Select the new master.
  100. set master_id [randomInt 5]
  101. set used [list $master_id]
  102. test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" {
  103. $R($master_id) slaveof no one
  104. $R($master_id) config set repl-ping-replica-period 1 ;# increase the chance that random ping will cause issues
  105. if {$counter_value == 0} {
  106. $R($master_id) set x $counter_value
  107. }
  108. }
  109. # Build a lookup with the root master of each replica (head of the chain).
  110. array set root_master {}
  111. for {set j 0} {$j < 5} {incr j} {
  112. set r $j
  113. while {1} {
  114. set r_master_port [status $R($r) master_port]
  115. if {$r_master_port == ""} {
  116. set root_master($j) $r
  117. break
  118. }
  119. set r_master_id $R_id_from_port($r_master_port)
  120. set r $r_master_id
  121. }
  122. }
  123. # Wait for the newly detached master-replica chain (new master and existing replicas that were
  124. # already connected to it, to get updated on the new replication id.
  125. # This is needed to avoid a race that can result in a full sync when a replica that already
  126. # got an updated repl id, tries to psync from one that's not yet aware of it.
  127. wait_for_condition 50 1000 {
  128. ([status $R(0) master_replid] == [status $R($root_master(0)) master_replid]) &&
  129. ([status $R(1) master_replid] == [status $R($root_master(1)) master_replid]) &&
  130. ([status $R(2) master_replid] == [status $R($root_master(2)) master_replid]) &&
  131. ([status $R(3) master_replid] == [status $R($root_master(3)) master_replid]) &&
  132. ([status $R(4) master_replid] == [status $R($root_master(4)) master_replid])
  133. } else {
  134. show_cluster_status
  135. fail "Replica did not inherit the new replid."
  136. }
  137. # Build a lookup with the direct connection master of each replica.
  138. # First loop that uses random to decide who replicates from who.
  139. array set slave_to_master {}
  140. while {[llength $used] != 5} {
  141. while 1 {
  142. set slave_id [randomInt 5]
  143. if {[lsearch -exact $used $slave_id] == -1} break
  144. }
  145. set rand [randomInt [llength $used]]
  146. set mid [lindex $used $rand]
  147. set slave_to_master($slave_id) $mid
  148. lappend used $slave_id
  149. }
  150. # 2) Attach all the slaves to a random instance
  151. # Second loop that does the actual SLAVEOF command and make sure execute it in the right order.
  152. while {[array size slave_to_master] > 0} {
  153. foreach slave_id [array names slave_to_master] {
  154. set mid $slave_to_master($slave_id)
  155. # We only attach the replica to a random instance that already in the old/new chain.
  156. if {$root_master($mid) == $root_master($master_id)} {
  157. # Find a replica that can be attached to the new chain already attached to the new master.
  158. # My new master is in the new chain.
  159. } elseif {$root_master($mid) == $root_master($slave_id)} {
  160. # My new master and I are in the old chain.
  161. } else {
  162. # In cycle 1, we do not care about the order.
  163. if {$cycle != 1} {
  164. # skipping this replica for now to avoid attaching in a bad order
  165. # this is done to avoid an unexpected full sync, when we take a
  166. # replica that already reconnected to the new chain and got a new replid
  167. # and is then set to connect to a master that's still not aware of that new replid
  168. continue
  169. }
  170. }
  171. set master_host $R_host($master_id)
  172. set master_port $R_port($master_id)
  173. test "PSYNC2: Set #$slave_id to replicate from #$mid" {
  174. $R($slave_id) slaveof $master_host $master_port
  175. }
  176. # Wait for replica to be connected before we proceed.
  177. wait_for_condition 50 1000 {
  178. [status $R($slave_id) master_link_status] == "up"
  179. } else {
  180. show_cluster_status
  181. fail "Replica not reconnecting."
  182. }
  183. set root_master($slave_id) $root_master($mid)
  184. unset slave_to_master($slave_id)
  185. break
  186. }
  187. }
  188. # Wait for replicas to sync. so next loop won't get -LOADING error
  189. wait_for_condition 50 1000 {
  190. [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
  191. [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
  192. [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
  193. [status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
  194. } else {
  195. show_cluster_status
  196. fail "Replica not reconnecting"
  197. }
  198. # 3) Increment the counter and wait for all the instances
  199. # to converge.
  200. test "PSYNC2: cluster is consistent after failover" {
  201. $R($master_id) incr x; incr counter_value
  202. for {set j 0} {$j < 5} {incr j} {
  203. wait_for_condition 50 1000 {
  204. [$R($j) get x] == $counter_value
  205. } else {
  206. show_cluster_status
  207. fail "Instance #$j x variable is inconsistent"
  208. }
  209. }
  210. }
  211. # 4) Generate load while breaking the connection of random
  212. # slave-master pairs.
  213. test "PSYNC2: generate load while killing replication links" {
  214. set t [clock milliseconds]
  215. set next_break [expr {$t+$disconnect_period}]
  216. while {[clock milliseconds]-$t < $genload_time} {
  217. if {$genload} {
  218. $R($master_id) incr x; incr counter_value
  219. }
  220. if {[clock milliseconds] == $next_break} {
  221. set next_break \
  222. [expr {[clock milliseconds]+$disconnect_period}]
  223. set slave_id [randomInt 5]
  224. if {$disconnect} {
  225. $R($slave_id) client kill type master
  226. if {$debug_msg} {
  227. puts "+++ Breaking link for replica #$slave_id"
  228. }
  229. }
  230. }
  231. }
  232. }
  233. # 5) Increment the counter and wait for all the instances
  234. set x [$R($master_id) get x]
  235. test "PSYNC2: cluster is consistent after load (x = $x)" {
  236. for {set j 0} {$j < 5} {incr j} {
  237. wait_for_condition 50 1000 {
  238. [$R($j) get x] == $counter_value
  239. } else {
  240. show_cluster_status
  241. fail "Instance #$j x variable is inconsistent"
  242. }
  243. }
  244. }
  245. # wait for all the slaves to be in sync.
  246. set masteroff [status $R($master_id) master_repl_offset]
  247. wait_for_condition 500 100 {
  248. [status $R(0) master_repl_offset] >= $masteroff &&
  249. [status $R(1) master_repl_offset] >= $masteroff &&
  250. [status $R(2) master_repl_offset] >= $masteroff &&
  251. [status $R(3) master_repl_offset] >= $masteroff &&
  252. [status $R(4) master_repl_offset] >= $masteroff
  253. } else {
  254. show_cluster_status
  255. fail "Replicas offsets didn't catch up with the master after too long time."
  256. }
  257. if {$debug_msg} {
  258. show_cluster_status
  259. }
  260. test "PSYNC2: total sum of full synchronizations is exactly 4" {
  261. set sum 0
  262. for {set j 0} {$j < 5} {incr j} {
  263. incr sum [status $R($j) sync_full]
  264. }
  265. if {$sum != 4} {
  266. show_cluster_status
  267. assert {$sum == 4}
  268. }
  269. }
  270. # In absence of pings, are the instances really able to have
  271. # the exact same offset?
  272. $R($master_id) config set repl-ping-replica-period 3600
  273. wait_for_condition 500 100 {
  274. [status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] &&
  275. [status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] &&
  276. [status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] &&
  277. [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] &&
  278. [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset]
  279. } else {
  280. show_cluster_status
  281. fail "Replicas and master offsets were unable to match *exactly*."
  282. }
  283. # Limit anyway the maximum number of cycles. This is useful when the
  284. # test is skipped via --only option of the test suite. In that case
  285. # we don't want to see many seconds of this test being just skipped.
  286. if {$cycle > 50} break
  287. }
  288. test "PSYNC2: Bring the master back again for next test" {
  289. $R($master_id) slaveof no one
  290. set master_host $R_host($master_id)
  291. set master_port $R_port($master_id)
  292. for {set j 0} {$j < 5} {incr j} {
  293. if {$j == $master_id} continue
  294. $R($j) slaveof $master_host $master_port
  295. }
  296. # Wait for replicas to sync. it is not enough to just wait for connected_slaves==4
  297. # since we might do the check before the master realized that they're disconnected
  298. wait_for_condition 50 1000 {
  299. [status $R($master_id) connected_slaves] == 4 &&
  300. [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
  301. [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
  302. [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
  303. [status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
  304. } else {
  305. show_cluster_status
  306. fail "Replica not reconnecting"
  307. }
  308. }
  309. test "PSYNC2: Partial resync after restart using RDB aux fields" {
  310. # Pick a random slave
  311. set slave_id [expr {($master_id+1)%5}]
  312. set sync_count [status $R($master_id) sync_full]
  313. set sync_partial [status $R($master_id) sync_partial_ok]
  314. set sync_partial_err [status $R($master_id) sync_partial_err]
  315. catch {
  316. $R($slave_id) config rewrite
  317. restart_server [expr {0-$slave_id}] true false
  318. set R($slave_id) [srv [expr {0-$slave_id}] client]
  319. }
  320. # note: just waiting for connected_slaves==4 has a race condition since
  321. # we might do the check before the master realized that the slave disconnected
  322. wait_for_condition 50 1000 {
  323. [status $R($master_id) sync_partial_ok] == $sync_partial + 1
  324. } else {
  325. puts "prev sync_full: $sync_count"
  326. puts "prev sync_partial_ok: $sync_partial"
  327. puts "prev sync_partial_err: $sync_partial_err"
  328. puts [$R($master_id) info stats]
  329. show_cluster_status
  330. fail "Replica didn't partial sync"
  331. }
  332. set new_sync_count [status $R($master_id) sync_full]
  333. assert {$sync_count == $new_sync_count}
  334. }
  335. test "PSYNC2: Replica RDB restart with EVALSHA in backlog issue #4483" {
  336. # Pick a random slave
  337. set slave_id [expr {($master_id+1)%5}]
  338. set sync_count [status $R($master_id) sync_full]
  339. # Make sure to replicate the first EVAL while the salve is online
  340. # so that it's part of the scripts the master believes it's safe
  341. # to propagate as EVALSHA.
  342. $R($master_id) EVAL {return redis.call("incr","__mycounter")} 0
  343. $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
  344. # Wait for the two to sync
  345. wait_for_condition 50 1000 {
  346. [$R($master_id) debug digest] == [$R($slave_id) debug digest]
  347. } else {
  348. show_cluster_status
  349. fail "Replica not reconnecting"
  350. }
  351. # Prevent the slave from receiving master updates, and at
  352. # the same time send a new script several times to the
  353. # master, so that we'll end with EVALSHA into the backlog.
  354. $R($slave_id) slaveof 127.0.0.1 0
  355. $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
  356. $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
  357. $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
  358. catch {
  359. $R($slave_id) config rewrite
  360. restart_server [expr {0-$slave_id}] true false
  361. set R($slave_id) [srv [expr {0-$slave_id}] client]
  362. }
  363. # Reconfigure the slave correctly again, when it's back online.
  364. set retry 50
  365. while {$retry} {
  366. if {[catch {
  367. $R($slave_id) slaveof $master_host $master_port
  368. }]} {
  369. after 1000
  370. } else {
  371. break
  372. }
  373. incr retry -1
  374. }
  375. # The master should be back at 4 slaves eventually
  376. wait_for_condition 50 1000 {
  377. [status $R($master_id) connected_slaves] == 4
  378. } else {
  379. show_cluster_status
  380. fail "Replica not reconnecting"
  381. }
  382. set new_sync_count [status $R($master_id) sync_full]
  383. assert {$sync_count == $new_sync_count}
  384. # However if the slave started with the full state of the
  385. # scripting engine, we should now have the same digest.
  386. wait_for_condition 50 1000 {
  387. [$R($master_id) debug digest] == [$R($slave_id) debug digest]
  388. } else {
  389. show_cluster_status
  390. fail "Debug digest mismatch between master and replica in post-restart handshake"
  391. }
  392. }
  393. if {$no_exit} {
  394. while 1 { puts -nonewline .; flush stdout; after 1000}
  395. }
  396. }}}}}