psync2-pingoff.tcl 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. # These tests were added together with the meaningful offset implementation
  2. # in redis 6.0.0, which was later abandoned in 6.0.4, they used to test that
  3. # servers are able to PSYNC with replicas even if the replication stream has
  4. # PINGs at the end which present in one sever and missing on another.
  5. # We keep these tests just because they reproduce edge cases in the replication
  6. # logic in hope they'll be able to spot some problem in the future.
  7. start_server {tags {"psync2 external:skip"}} {
  8. start_server {} {
  9. # Config
  10. set debug_msg 0 ; # Enable additional debug messages
  11. for {set j 0} {$j < 2} {incr j} {
  12. set R($j) [srv [expr 0-$j] client]
  13. set R_host($j) [srv [expr 0-$j] host]
  14. set R_port($j) [srv [expr 0-$j] port]
  15. $R($j) CONFIG SET repl-ping-replica-period 1
  16. if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
  17. }
  18. # Setup replication
  19. test "PSYNC2 pingoff: setup" {
  20. $R(1) replicaof $R_host(0) $R_port(0)
  21. $R(0) set foo bar
  22. wait_for_condition 50 1000 {
  23. [status $R(1) master_link_status] == "up" &&
  24. [$R(0) dbsize] == 1 && [$R(1) dbsize] == 1
  25. } else {
  26. fail "Replicas not replicating from master"
  27. }
  28. }
  29. test "PSYNC2 pingoff: write and wait replication" {
  30. $R(0) INCR counter
  31. $R(0) INCR counter
  32. $R(0) INCR counter
  33. wait_for_condition 50 1000 {
  34. [$R(0) GET counter] eq [$R(1) GET counter]
  35. } else {
  36. fail "Master and replica don't agree about counter"
  37. }
  38. }
  39. # In this test we'll make sure the replica will get stuck, but with
  40. # an active connection: this way the master will continue to send PINGs
  41. # every second (we modified the PING period earlier)
  42. test "PSYNC2 pingoff: pause replica and promote it" {
  43. $R(1) MULTI
  44. $R(1) DEBUG SLEEP 5
  45. $R(1) SLAVEOF NO ONE
  46. $R(1) EXEC
  47. $R(1) ping ; # Wait for it to return back available
  48. }
  49. test "Make the old master a replica of the new one and check conditions" {
  50. # We set the new master's ping period to a high value, so that there's
  51. # no chance for a race condition of sending a PING in between the two
  52. # INFO calls in the assert for master_repl_offset match below.
  53. $R(1) CONFIG SET repl-ping-replica-period 1000
  54. assert_equal [status $R(1) sync_full] 0
  55. $R(0) REPLICAOF $R_host(1) $R_port(1)
  56. wait_for_condition 50 1000 {
  57. [status $R(0) master_link_status] == "up"
  58. } else {
  59. fail "The new master was not able to sync"
  60. }
  61. # make sure replication is still alive and kicking
  62. $R(1) incr x
  63. wait_for_condition 50 1000 {
  64. [status $R(0) loading] == 0 &&
  65. [$R(0) get x] == 1
  66. } else {
  67. fail "replica didn't get incr"
  68. }
  69. assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
  70. }
  71. }}
  72. start_server {tags {"psync2 external:skip"}} {
  73. start_server {} {
  74. start_server {} {
  75. start_server {} {
  76. start_server {} {
  77. test {test various edge cases of repl topology changes with missing pings at the end} {
  78. set master [srv -4 client]
  79. set master_host [srv -4 host]
  80. set master_port [srv -4 port]
  81. set replica1 [srv -3 client]
  82. set replica2 [srv -2 client]
  83. set replica3 [srv -1 client]
  84. set replica4 [srv -0 client]
  85. $replica1 replicaof $master_host $master_port
  86. $replica2 replicaof $master_host $master_port
  87. $replica3 replicaof $master_host $master_port
  88. $replica4 replicaof $master_host $master_port
  89. wait_for_condition 50 1000 {
  90. [status $master connected_slaves] == 4
  91. } else {
  92. fail "replicas didn't connect"
  93. }
  94. $master incr x
  95. wait_for_condition 50 1000 {
  96. [$replica1 get x] == 1 && [$replica2 get x] == 1 &&
  97. [$replica3 get x] == 1 && [$replica4 get x] == 1
  98. } else {
  99. fail "replicas didn't get incr"
  100. }
  101. # disconnect replica1 and replica2
  102. # and wait for the master to send a ping to replica3 and replica4
  103. $replica1 replicaof no one
  104. $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id
  105. $master config set repl-ping-replica-period 1
  106. set replofs [status $master master_repl_offset]
  107. wait_for_condition 50 100 {
  108. [status $replica3 master_repl_offset] > $replofs &&
  109. [status $replica4 master_repl_offset] > $replofs
  110. } else {
  111. fail "replica didn't sync in time"
  112. }
  113. # make everyone sync from the replica1 that didn't get the last ping from the old master
  114. # replica4 will keep syncing from the old master which now syncs from replica1
  115. # and replica2 will re-connect to the old master (which went back in time)
  116. set new_master_host [srv -3 host]
  117. set new_master_port [srv -3 port]
  118. $replica3 replicaof $new_master_host $new_master_port
  119. $master replicaof $new_master_host $new_master_port
  120. $replica2 replicaof $master_host $master_port
  121. wait_for_condition 50 1000 {
  122. [status $replica2 master_link_status] == "up" &&
  123. [status $replica3 master_link_status] == "up" &&
  124. [status $replica4 master_link_status] == "up" &&
  125. [status $master master_link_status] == "up"
  126. } else {
  127. fail "replicas didn't connect"
  128. }
  129. # make sure replication is still alive and kicking
  130. $replica1 incr x
  131. wait_for_condition 50 1000 {
  132. [$replica2 get x] == 2 &&
  133. [$replica3 get x] == 2 &&
  134. [$replica4 get x] == 2 &&
  135. [$master get x] == 2
  136. } else {
  137. fail "replicas didn't get incr"
  138. }
  139. # make sure we have the right amount of full syncs
  140. assert_equal [status $master sync_full] 6
  141. assert_equal [status $replica1 sync_full] 2
  142. assert_equal [status $replica2 sync_full] 0
  143. assert_equal [status $replica3 sync_full] 0
  144. assert_equal [status $replica4 sync_full] 0
  145. # force psync
  146. $master client kill type master
  147. $replica2 client kill type master
  148. $replica3 client kill type master
  149. $replica4 client kill type master
  150. # make sure replication is still alive and kicking
  151. $replica1 incr x
  152. wait_for_condition 50 1000 {
  153. [$replica2 get x] == 3 &&
  154. [$replica3 get x] == 3 &&
  155. [$replica4 get x] == 3 &&
  156. [$master get x] == 3
  157. } else {
  158. fail "replicas didn't get incr"
  159. }
  160. # make sure we have the right amount of full syncs
  161. assert_equal [status $master sync_full] 6
  162. assert_equal [status $replica1 sync_full] 2
  163. assert_equal [status $replica2 sync_full] 0
  164. assert_equal [status $replica3 sync_full] 0
  165. assert_equal [status $replica4 sync_full] 0
  166. }
  167. }}}}}
  168. start_server {tags {"psync2 external:skip"}} {
  169. start_server {} {
  170. start_server {} {
  171. for {set j 0} {$j < 3} {incr j} {
  172. set R($j) [srv [expr 0-$j] client]
  173. set R_host($j) [srv [expr 0-$j] host]
  174. set R_port($j) [srv [expr 0-$j] port]
  175. $R($j) CONFIG SET repl-ping-replica-period 1
  176. }
  177. test "Chained replicas disconnect when replica re-connect with the same master" {
  178. # Add a second replica as a chained replica of the current replica
  179. $R(1) replicaof $R_host(0) $R_port(0)
  180. $R(2) replicaof $R_host(1) $R_port(1)
  181. wait_for_condition 50 1000 {
  182. [status $R(2) master_link_status] == "up"
  183. } else {
  184. fail "Chained replica not replicating from its master"
  185. }
  186. # Do a write on the master, and wait for the master to
  187. # send some PINGs to its replica
  188. $R(0) INCR counter2
  189. set replofs [status $R(0) master_repl_offset]
  190. wait_for_condition 50 100 {
  191. [status $R(1) master_repl_offset] > $replofs &&
  192. [status $R(2) master_repl_offset] > $replofs
  193. } else {
  194. fail "replica didn't sync in time"
  195. }
  196. set sync_partial_master [status $R(0) sync_partial_ok]
  197. set sync_partial_replica [status $R(1) sync_partial_ok]
  198. $R(0) CONFIG SET repl-ping-replica-period 100
  199. # Disconnect the master's direct replica
  200. $R(0) client kill type replica
  201. wait_for_condition 50 1000 {
  202. [status $R(1) master_link_status] == "up" &&
  203. [status $R(2) master_link_status] == "up" &&
  204. [status $R(0) sync_partial_ok] == $sync_partial_master + 1 &&
  205. [status $R(1) sync_partial_ok] == $sync_partial_replica
  206. } else {
  207. fail "Disconnected replica failed to PSYNC with master"
  208. }
  209. # Verify that the replica and its replica's meaningful and real
  210. # offsets match with the master
  211. assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
  212. assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
  213. # make sure replication is still alive and kicking
  214. $R(0) incr counter2
  215. wait_for_condition 50 1000 {
  216. [$R(1) get counter2] == 2 && [$R(2) get counter2] == 2
  217. } else {
  218. fail "replicas didn't get incr"
  219. }
  220. assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
  221. assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
  222. }
  223. }}}