replication.tcl 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695
  1. proc log_file_matches {log pattern} {
  2. set fp [open $log r]
  3. set content [read $fp]
  4. close $fp
  5. string match $pattern $content
  6. }
  7. start_server {tags {"repl"}} {
  8. set slave [srv 0 client]
  9. set slave_host [srv 0 host]
  10. set slave_port [srv 0 port]
  11. set slave_log [srv 0 stdout]
  12. start_server {} {
  13. set master [srv 0 client]
  14. set master_host [srv 0 host]
  15. set master_port [srv 0 port]
  16. # Configure the master in order to hang waiting for the BGSAVE
  17. # operation, so that the slave remains in the handshake state.
  18. $master config set repl-diskless-sync yes
  19. $master config set repl-diskless-sync-delay 1000
  20. # Use a short replication timeout on the slave, so that if there
  21. # are no bugs the timeout is triggered in a reasonable amount
  22. # of time.
  23. $slave config set repl-timeout 5
  24. # Start the replication process...
  25. $slave slaveof $master_host $master_port
  26. test {Slave enters handshake} {
  27. wait_for_condition 50 1000 {
  28. [string match *handshake* [$slave role]]
  29. } else {
  30. fail "Replica does not enter handshake state"
  31. }
  32. }
  33. # But make the master unable to send
  34. # the periodic newlines to refresh the connection. The slave
  35. # should detect the timeout.
  36. $master debug sleep 10
  37. test {Slave is able to detect timeout during handshake} {
  38. wait_for_condition 50 1000 {
  39. [log_file_matches $slave_log "*Timeout connecting to the MASTER*"]
  40. } else {
  41. fail "Replica is not able to detect timeout"
  42. }
  43. }
  44. }
  45. }
  46. start_server {tags {"repl"}} {
  47. set A [srv 0 client]
  48. set A_host [srv 0 host]
  49. set A_port [srv 0 port]
  50. start_server {} {
  51. set B [srv 0 client]
  52. set B_host [srv 0 host]
  53. set B_port [srv 0 port]
  54. test {Set instance A as slave of B} {
  55. $A slaveof $B_host $B_port
  56. wait_for_condition 50 100 {
  57. [lindex [$A role] 0] eq {slave} &&
  58. [string match {*master_link_status:up*} [$A info replication]]
  59. } else {
  60. fail "Can't turn the instance into a replica"
  61. }
  62. }
  63. test {INCRBYFLOAT replication, should not remove expire} {
  64. r set test 1 EX 100
  65. r incrbyfloat test 0.1
  66. after 1000
  67. assert_equal [$A debug digest] [$B debug digest]
  68. }
  69. test {BRPOPLPUSH replication, when blocking against empty list} {
  70. set rd [redis_deferring_client]
  71. $rd brpoplpush a b 5
  72. r lpush a foo
  73. wait_for_condition 50 100 {
  74. [$A debug digest] eq [$B debug digest]
  75. } else {
  76. fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
  77. }
  78. }
  79. test {BRPOPLPUSH replication, list exists} {
  80. set rd [redis_deferring_client]
  81. r lpush c 1
  82. r lpush c 2
  83. r lpush c 3
  84. $rd brpoplpush c d 5
  85. after 1000
  86. assert_equal [$A debug digest] [$B debug digest]
  87. }
  88. test {BLPOP followed by role change, issue #2473} {
  89. set rd [redis_deferring_client]
  90. $rd blpop foo 0 ; # Block while B is a master
  91. # Turn B into master of A
  92. $A slaveof no one
  93. $B slaveof $A_host $A_port
  94. wait_for_condition 50 100 {
  95. [lindex [$B role] 0] eq {slave} &&
  96. [string match {*master_link_status:up*} [$B info replication]]
  97. } else {
  98. fail "Can't turn the instance into a replica"
  99. }
  100. # Push elements into the "foo" list of the new replica.
  101. # If the client is still attached to the instance, we'll get
  102. # a desync between the two instances.
  103. $A rpush foo a b c
  104. after 100
  105. wait_for_condition 50 100 {
  106. [$A debug digest] eq [$B debug digest] &&
  107. [$A lrange foo 0 -1] eq {a b c} &&
  108. [$B lrange foo 0 -1] eq {a b c}
  109. } else {
  110. fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
  111. }
  112. }
  113. }
  114. }
  115. start_server {tags {"repl"}} {
  116. r set mykey foo
  117. start_server {} {
  118. test {Second server should have role master at first} {
  119. s role
  120. } {master}
  121. test {SLAVEOF should start with link status "down"} {
  122. r slaveof [srv -1 host] [srv -1 port]
  123. s master_link_status
  124. } {down}
  125. test {The role should immediately be changed to "replica"} {
  126. s role
  127. } {slave}
  128. wait_for_sync r
  129. test {Sync should have transferred keys from master} {
  130. r get mykey
  131. } {foo}
  132. test {The link status should be up} {
  133. s master_link_status
  134. } {up}
  135. test {SET on the master should immediately propagate} {
  136. r -1 set mykey bar
  137. wait_for_condition 500 100 {
  138. [r 0 get mykey] eq {bar}
  139. } else {
  140. fail "SET on master did not propagated on replica"
  141. }
  142. }
  143. test {FLUSHALL should replicate} {
  144. r -1 flushall
  145. if {$::valgrind} {after 2000}
  146. list [r -1 dbsize] [r 0 dbsize]
  147. } {0 0}
  148. test {ROLE in master reports master with a slave} {
  149. set res [r -1 role]
  150. lassign $res role offset slaves
  151. assert {$role eq {master}}
  152. assert {$offset > 0}
  153. assert {[llength $slaves] == 1}
  154. lassign [lindex $slaves 0] master_host master_port slave_offset
  155. assert {$slave_offset <= $offset}
  156. }
  157. test {ROLE in slave reports slave in connected state} {
  158. set res [r role]
  159. lassign $res role master_host master_port slave_state slave_offset
  160. assert {$role eq {slave}}
  161. assert {$slave_state eq {connected}}
  162. }
  163. }
  164. }
  165. foreach mdl {no yes} {
  166. foreach sdl {disabled swapdb} {
  167. start_server {tags {"repl"}} {
  168. set master [srv 0 client]
  169. $master config set repl-diskless-sync $mdl
  170. $master config set repl-diskless-sync-delay 1
  171. set master_host [srv 0 host]
  172. set master_port [srv 0 port]
  173. set slaves {}
  174. start_server {} {
  175. lappend slaves [srv 0 client]
  176. start_server {} {
  177. lappend slaves [srv 0 client]
  178. start_server {} {
  179. lappend slaves [srv 0 client]
  180. test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" {
  181. # start load handles only inside the test, so that the test can be skipped
  182. set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000]
  183. set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000]
  184. set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000000]
  185. set load_handle3 [start_write_load $master_host $master_port 8]
  186. set load_handle4 [start_write_load $master_host $master_port 4]
  187. after 5000 ;# wait for some data to accumulate so that we have RDB part for the fork
  188. # Send SLAVEOF commands to slaves
  189. [lindex $slaves 0] config set repl-diskless-load $sdl
  190. [lindex $slaves 1] config set repl-diskless-load $sdl
  191. [lindex $slaves 2] config set repl-diskless-load $sdl
  192. [lindex $slaves 0] slaveof $master_host $master_port
  193. [lindex $slaves 1] slaveof $master_host $master_port
  194. [lindex $slaves 2] slaveof $master_host $master_port
  195. # Wait for all the three slaves to reach the "online"
  196. # state from the POV of the master.
  197. set retry 500
  198. while {$retry} {
  199. set info [r -3 info]
  200. if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} {
  201. break
  202. } else {
  203. incr retry -1
  204. after 100
  205. }
  206. }
  207. if {$retry == 0} {
  208. error "assertion:Slaves not correctly synchronized"
  209. }
  210. # Wait that slaves acknowledge they are online so
  211. # we are sure that DBSIZE and DEBUG DIGEST will not
  212. # fail because of timing issues.
  213. wait_for_condition 500 100 {
  214. [lindex [[lindex $slaves 0] role] 3] eq {connected} &&
  215. [lindex [[lindex $slaves 1] role] 3] eq {connected} &&
  216. [lindex [[lindex $slaves 2] role] 3] eq {connected}
  217. } else {
  218. fail "Slaves still not connected after some time"
  219. }
  220. # Stop the write load
  221. stop_bg_complex_data $load_handle0
  222. stop_bg_complex_data $load_handle1
  223. stop_bg_complex_data $load_handle2
  224. stop_write_load $load_handle3
  225. stop_write_load $load_handle4
  226. # Make sure that slaves and master have same
  227. # number of keys
  228. wait_for_condition 500 100 {
  229. [$master dbsize] == [[lindex $slaves 0] dbsize] &&
  230. [$master dbsize] == [[lindex $slaves 1] dbsize] &&
  231. [$master dbsize] == [[lindex $slaves 2] dbsize]
  232. } else {
  233. fail "Different number of keys between master and replica after too long time."
  234. }
  235. # Check digests
  236. set digest [$master debug digest]
  237. set digest0 [[lindex $slaves 0] debug digest]
  238. set digest1 [[lindex $slaves 1] debug digest]
  239. set digest2 [[lindex $slaves 2] debug digest]
  240. assert {$digest ne 0000000000000000000000000000000000000000}
  241. assert {$digest eq $digest0}
  242. assert {$digest eq $digest1}
  243. assert {$digest eq $digest2}
  244. }
  245. }
  246. }
  247. }
  248. }
  249. }
  250. }
  251. start_server {tags {"repl"}} {
  252. set master [srv 0 client]
  253. set master_host [srv 0 host]
  254. set master_port [srv 0 port]
  255. start_server {} {
  256. test "Master stream is correctly processed while the replica has a script in -BUSY state" {
  257. set load_handle0 [start_write_load $master_host $master_port 3]
  258. set slave [srv 0 client]
  259. $slave config set lua-time-limit 500
  260. $slave slaveof $master_host $master_port
  261. # Wait for the slave to be online
  262. wait_for_condition 500 100 {
  263. [lindex [$slave role] 3] eq {connected}
  264. } else {
  265. fail "Replica still not connected after some time"
  266. }
  267. # Wait some time to make sure the master is sending data
  268. # to the slave.
  269. after 5000
  270. # Stop the ability of the slave to process data by sendig
  271. # a script that will put it in BUSY state.
  272. $slave eval {for i=1,3000000000 do end} 0
  273. # Wait some time again so that more master stream will
  274. # be processed.
  275. after 2000
  276. # Stop the write load
  277. stop_write_load $load_handle0
  278. # number of keys
  279. wait_for_condition 500 100 {
  280. [$master debug digest] eq [$slave debug digest]
  281. } else {
  282. fail "Different datasets between replica and master"
  283. }
  284. }
  285. }
  286. }
  287. test {slave fails full sync and diskless load swapdb recovers it} {
  288. start_server {tags {"repl"}} {
  289. set slave [srv 0 client]
  290. set slave_host [srv 0 host]
  291. set slave_port [srv 0 port]
  292. set slave_log [srv 0 stdout]
  293. start_server {} {
  294. set master [srv 0 client]
  295. set master_host [srv 0 host]
  296. set master_port [srv 0 port]
  297. # Put different data sets on the master and slave
  298. # we need to put large keys on the master since the slave replies to info only once in 2mb
  299. $slave debug populate 2000 slave 10
  300. $master debug populate 200 master 100000
  301. $master config set rdbcompression no
  302. # Set master and slave to use diskless replication
  303. $master config set repl-diskless-sync yes
  304. $master config set repl-diskless-sync-delay 0
  305. $slave config set repl-diskless-load swapdb
  306. # Set master with a slow rdb generation, so that we can easily disconnect it mid sync
  307. # 10ms per key, with 200 keys is 2 seconds
  308. $master config set rdb-key-save-delay 10000
  309. # Start the replication process...
  310. $slave slaveof $master_host $master_port
  311. # wait for the slave to start reading the rdb
  312. wait_for_condition 50 100 {
  313. [s -1 loading] eq 1
  314. } else {
  315. fail "Replica didn't get into loading mode"
  316. }
  317. # make sure that next sync will not start immediately so that we can catch the slave in betweeen syncs
  318. $master config set repl-diskless-sync-delay 5
  319. # for faster server shutdown, make rdb saving fast again (the fork is already uses the slow one)
  320. $master config set rdb-key-save-delay 0
  321. # waiting slave to do flushdb (key count drop)
  322. wait_for_condition 50 100 {
  323. 2000 != [scan [regexp -inline {keys\=([\d]*)} [$slave info keyspace]] keys=%d]
  324. } else {
  325. fail "Replica didn't flush"
  326. }
  327. # make sure we're still loading
  328. assert_equal [s -1 loading] 1
  329. # kill the slave connection on the master
  330. set killed [$master client kill type slave]
  331. # wait for loading to stop (fail)
  332. wait_for_condition 50 100 {
  333. [s -1 loading] eq 0
  334. } else {
  335. fail "Replica didn't disconnect"
  336. }
  337. # make sure the original keys were restored
  338. assert_equal [$slave dbsize] 2000
  339. }
  340. }
  341. }
  342. test {diskless loading short read} {
  343. start_server {tags {"repl"}} {
  344. set replica [srv 0 client]
  345. set replica_host [srv 0 host]
  346. set replica_port [srv 0 port]
  347. start_server {} {
  348. set master [srv 0 client]
  349. set master_host [srv 0 host]
  350. set master_port [srv 0 port]
  351. # Set master and replica to use diskless replication
  352. $master config set repl-diskless-sync yes
  353. $master config set rdbcompression no
  354. $replica config set repl-diskless-load swapdb
  355. # Try to fill the master with all types of data types / encodings
  356. for {set k 0} {$k < 3} {incr k} {
  357. for {set i 0} {$i < 10} {incr i} {
  358. r set "$k int_$i" [expr {int(rand()*10000)}]
  359. r expire "$k int_$i" [expr {int(rand()*10000)}]
  360. r set "$k string_$i" [string repeat A [expr {int(rand()*1000000)}]]
  361. r hset "$k hash_small" [string repeat A [expr {int(rand()*10)}]] 0[string repeat A [expr {int(rand()*10)}]]
  362. r hset "$k hash_large" [string repeat A [expr {int(rand()*10000)}]] [string repeat A [expr {int(rand()*1000000)}]]
  363. r sadd "$k set_small" [string repeat A [expr {int(rand()*10)}]]
  364. r sadd "$k set_large" [string repeat A [expr {int(rand()*1000000)}]]
  365. r zadd "$k zset_small" [expr {rand()}] [string repeat A [expr {int(rand()*10)}]]
  366. r zadd "$k zset_large" [expr {rand()}] [string repeat A [expr {int(rand()*1000000)}]]
  367. r lpush "$k list_small" [string repeat A [expr {int(rand()*10)}]]
  368. r lpush "$k list_large" [string repeat A [expr {int(rand()*1000000)}]]
  369. for {set j 0} {$j < 10} {incr j} {
  370. r xadd "$k stream" * foo "asdf" bar "1234"
  371. }
  372. r xgroup create "$k stream" "mygroup_$i" 0
  373. r xreadgroup GROUP "mygroup_$i" Alice COUNT 1 STREAMS "$k stream" >
  374. }
  375. }
  376. # Start the replication process...
  377. set loglines [count_log_lines -1]
  378. $master config set repl-diskless-sync-delay 0
  379. $replica replicaof $master_host $master_port
  380. # kill the replication at various points
  381. set attempts 3
  382. if {$::accurate} { set attempts 10 }
  383. for {set i 0} {$i < $attempts} {incr i} {
  384. # wait for the replica to start reading the rdb
  385. # using the log file since the replica only responds to INFO once in 2mb
  386. wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1
  387. # add some additional random sleep so that we kill the master on a different place each time
  388. after [expr {int(rand()*100)}]
  389. # kill the replica connection on the master
  390. set killed [$master client kill type replica]
  391. if {[catch {
  392. set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10]
  393. if {$::verbose} {
  394. puts $res
  395. }
  396. }]} {
  397. puts "failed triggering short read"
  398. # force the replica to try another full sync
  399. $master client kill type replica
  400. $master set asdf asdf
  401. # the side effect of resizing the backlog is that it is flushed (16k is the min size)
  402. $master config set repl-backlog-size [expr {16384 + $i}]
  403. }
  404. # wait for loading to stop (fail)
  405. set loglines [count_log_lines -1]
  406. wait_for_condition 100 10 {
  407. [s -1 loading] eq 0
  408. } else {
  409. fail "Replica didn't disconnect"
  410. }
  411. }
  412. # enable fast shutdown
  413. $master config set rdb-key-save-delay 0
  414. }
  415. }
  416. }
  417. # get current stime and utime metrics for a thread (since it's creation)
  418. proc get_cpu_metrics { statfile } {
  419. if { [ catch {
  420. set fid [ open $statfile r ]
  421. set data [ read $fid 1024 ]
  422. ::close $fid
  423. set data [ split $data ]
  424. ;## number of jiffies it has been scheduled...
  425. set utime [ lindex $data 13 ]
  426. set stime [ lindex $data 14 ]
  427. } err ] } {
  428. error "assertion:can't parse /proc: $err"
  429. }
  430. set mstime [clock milliseconds]
  431. return [ list $mstime $utime $stime ]
  432. }
  433. # compute %utime and %stime of a thread between two measurements
  434. proc compute_cpu_usage {start end} {
  435. set clock_ticks [exec getconf CLK_TCK]
  436. # convert ms time to jiffies and calc delta
  437. set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ]
  438. set utime [ expr { [lindex $end 1] - [lindex $start 1] } ]
  439. set stime [ expr { [lindex $end 2] - [lindex $start 2] } ]
  440. set pucpu [ expr { ($utime / $dtime) * 100 } ]
  441. set pscpu [ expr { ($stime / $dtime) * 100 } ]
  442. return [ list $pucpu $pscpu ]
  443. }
  444. # test diskless rdb pipe with multiple replicas, which may drop half way
  445. start_server {tags {"repl"}} {
  446. set master [srv 0 client]
  447. $master config set repl-diskless-sync yes
  448. $master config set repl-diskless-sync-delay 1
  449. set master_host [srv 0 host]
  450. set master_port [srv 0 port]
  451. set master_pid [srv 0 pid]
  452. # put enough data in the db that the rdb file will be bigger than the socket buffers
  453. # and since we'll have key-load-delay of 100, 20000 keys will take at least 2 seconds
  454. # we also need the replica to process requests during transfer (which it does only once in 2mb)
  455. $master debug populate 20000 test 10000
  456. $master config set rdbcompression no
  457. # If running on Linux, we also measure utime/stime to detect possible I/O handling issues
  458. set os [catch {exec unamee}]
  459. set measure_time [expr {$os == "Linux"} ? 1 : 0]
  460. foreach all_drop {no slow fast all} {
  461. test "diskless $all_drop replicas drop during rdb pipe" {
  462. set replicas {}
  463. set replicas_alive {}
  464. # start one replica that will read the rdb fast, and one that will be slow
  465. start_server {} {
  466. lappend replicas [srv 0 client]
  467. lappend replicas_alive [srv 0 client]
  468. start_server {} {
  469. lappend replicas [srv 0 client]
  470. lappend replicas_alive [srv 0 client]
  471. # start replication
  472. # it's enough for just one replica to be slow, and have it's write handler enabled
  473. # so that the whole rdb generation process is bound to that
  474. set loglines [count_log_lines -1]
  475. [lindex $replicas 0] config set repl-diskless-load swapdb
  476. [lindex $replicas 0] config set key-load-delay 100
  477. [lindex $replicas 0] replicaof $master_host $master_port
  478. [lindex $replicas 1] replicaof $master_host $master_port
  479. # wait for the replicas to start reading the rdb
  480. # using the log file since the replica only responds to INFO once in 2mb
  481. wait_for_log_message -1 "*Loading DB in memory*" $loglines 800 10
  482. if {$measure_time} {
  483. set master_statfile "/proc/$master_pid/stat"
  484. set master_start_metrics [get_cpu_metrics $master_statfile]
  485. set start_time [clock seconds]
  486. }
  487. # wait a while so that the pipe socket writer will be
  488. # blocked on write (since replica 0 is slow to read from the socket)
  489. after 500
  490. # add some command to be present in the command stream after the rdb.
  491. $master incr $all_drop
  492. # disconnect replicas depending on the current test
  493. set loglines [count_log_lines -2]
  494. if {$all_drop == "all" || $all_drop == "fast"} {
  495. exec kill [srv 0 pid]
  496. set replicas_alive [lreplace $replicas_alive 1 1]
  497. }
  498. if {$all_drop == "all" || $all_drop == "slow"} {
  499. exec kill [srv -1 pid]
  500. set replicas_alive [lreplace $replicas_alive 0 0]
  501. }
  502. # wait for rdb child to exit
  503. wait_for_condition 500 100 {
  504. [s -2 rdb_bgsave_in_progress] == 0
  505. } else {
  506. fail "rdb child didn't terminate"
  507. }
  508. # make sure we got what we were aiming for, by looking for the message in the log file
  509. if {$all_drop == "all"} {
  510. wait_for_log_message -2 "*Diskless rdb transfer, last replica dropped, killing fork child*" $loglines 1 1
  511. }
  512. if {$all_drop == "no"} {
  513. wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 2 replicas still up*" $loglines 1 1
  514. }
  515. if {$all_drop == "slow" || $all_drop == "fast"} {
  516. wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 1 replicas still up*" $loglines 1 1
  517. }
  518. # make sure we don't have a busy loop going thought epoll_wait
  519. if {$measure_time} {
  520. set master_end_metrics [get_cpu_metrics $master_statfile]
  521. set time_elapsed [expr {[clock seconds]-$start_time}]
  522. set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics]
  523. set master_utime [lindex $master_cpu 0]
  524. set master_stime [lindex $master_cpu 1]
  525. if {$::verbose} {
  526. puts "elapsed: $time_elapsed"
  527. puts "master utime: $master_utime"
  528. puts "master stime: $master_stime"
  529. }
  530. if {$all_drop == "all" || $all_drop == "slow"} {
  531. assert {$master_utime < 70}
  532. assert {$master_stime < 70}
  533. }
  534. if {$all_drop == "none" || $all_drop == "fast"} {
  535. assert {$master_utime < 15}
  536. assert {$master_stime < 15}
  537. }
  538. }
  539. # verify the data integrity
  540. foreach replica $replicas_alive {
  541. # Wait that replicas acknowledge they are online so
  542. # we are sure that DBSIZE and DEBUG DIGEST will not
  543. # fail because of timing issues.
  544. wait_for_condition 150 100 {
  545. [lindex [$replica role] 3] eq {connected}
  546. } else {
  547. fail "replicas still not connected after some time"
  548. }
  549. # Make sure that replicas and master have same
  550. # number of keys
  551. wait_for_condition 50 100 {
  552. [$master dbsize] == [$replica dbsize]
  553. } else {
  554. fail "Different number of keys between master and replicas after too long time."
  555. }
  556. # Check digests
  557. set digest [$master debug digest]
  558. set digest0 [$replica debug digest]
  559. assert {$digest ne 0000000000000000000000000000000000000000}
  560. assert {$digest eq $digest0}
  561. }
  562. }
  563. }
  564. }
  565. }
  566. }
  567. test {replicaof right after disconnection} {
  568. # this is a rare race condition that was reproduced sporadically by the psync2 unit.
  569. # see details in #7205
  570. start_server {tags {"repl"}} {
  571. set replica1 [srv 0 client]
  572. set replica1_host [srv 0 host]
  573. set replica1_port [srv 0 port]
  574. set replica1_log [srv 0 stdout]
  575. start_server {} {
  576. set replica2 [srv 0 client]
  577. set replica2_host [srv 0 host]
  578. set replica2_port [srv 0 port]
  579. set replica2_log [srv 0 stdout]
  580. start_server {} {
  581. set master [srv 0 client]
  582. set master_host [srv 0 host]
  583. set master_port [srv 0 port]
  584. $replica1 replicaof $master_host $master_port
  585. $replica2 replicaof $master_host $master_port
  586. wait_for_condition 50 100 {
  587. [string match {*master_link_status:up*} [$replica1 info replication]] &&
  588. [string match {*master_link_status:up*} [$replica2 info replication]]
  589. } else {
  590. fail "Can't turn the instance into a replica"
  591. }
  592. set rd [redis_deferring_client -1]
  593. $rd debug sleep 1
  594. after 100
  595. # when replica2 will wake up from the sleep it will find both disconnection
  596. # from it's master and also a replicaof command at the same event loop
  597. $master client kill type replica
  598. $replica2 replicaof $replica1_host $replica1_port
  599. $rd read
  600. wait_for_condition 50 100 {
  601. [string match {*master_link_status:up*} [$replica2 info replication]]
  602. } else {
  603. fail "role change failed."
  604. }
  605. # make sure psync succeeded, and there were no unexpected full syncs.
  606. assert_equal [status $master sync_full] 2
  607. assert_equal [status $replica1 sync_full] 0
  608. assert_equal [status $replica2 sync_full] 0
  609. }
  610. }
  611. }
  612. }