replication-buffer.tcl 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. # This test group aims to test that all replicas share one global replication buffer,
  2. # two replicas don't make replication buffer size double, and when there is no replica,
  3. # replica buffer will shrink.
  4. start_server {tags {"repl external:skip"}} {
  5. start_server {} {
  6. start_server {} {
  7. start_server {} {
  8. set replica1 [srv -3 client]
  9. set replica2 [srv -2 client]
  10. set replica3 [srv -1 client]
  11. set master [srv 0 client]
  12. set master_host [srv 0 host]
  13. set master_port [srv 0 port]
  14. $master config set save ""
  15. $master config set repl-backlog-size 16384
  16. $master config set client-output-buffer-limit "replica 0 0 0"
  17. # Make sure replica3 is synchronized with master
  18. $replica3 replicaof $master_host $master_port
  19. wait_for_sync $replica3
  20. # Generating RDB will take some 100 seconds
  21. $master config set rdb-key-save-delay 1000000
  22. populate 100 "" 16
  23. # Make sure replica1 and replica2 are waiting bgsave
  24. $replica1 replicaof $master_host $master_port
  25. $replica2 replicaof $master_host $master_port
  26. wait_for_condition 50 100 {
  27. ([s rdb_bgsave_in_progress] == 1) &&
  28. [lindex [$replica1 role] 3] eq {sync} &&
  29. [lindex [$replica2 role] 3] eq {sync}
  30. } else {
  31. fail "fail to sync with replicas"
  32. }
  33. test {All replicas share one global replication buffer} {
  34. set before_used [s used_memory]
  35. populate 1024 "" 1024 ; # Write extra 1M data
  36. # New data uses 1M memory, but all replicas use only one
  37. # replication buffer, so all replicas output memory is not
  38. # more than double of replication buffer.
  39. set repl_buf_mem [s mem_total_replication_buffers]
  40. set extra_mem [expr {[s used_memory]-$before_used-1024*1024}]
  41. assert {$extra_mem < 2*$repl_buf_mem}
  42. # Kill replica1, replication_buffer will not become smaller
  43. catch {$replica1 shutdown nosave}
  44. wait_for_condition 50 100 {
  45. [s connected_slaves] eq {2}
  46. } else {
  47. fail "replica doesn't disconnect with master"
  48. }
  49. assert_equal $repl_buf_mem [s mem_total_replication_buffers]
  50. }
  51. test {Replication buffer will become smaller when no replica uses} {
  52. # Make sure replica3 catch up with the master
  53. wait_for_ofs_sync $master $replica3
  54. set repl_buf_mem [s mem_total_replication_buffers]
  55. # Kill replica2, replication_buffer will become smaller
  56. catch {$replica2 shutdown nosave}
  57. wait_for_condition 50 100 {
  58. [s connected_slaves] eq {1}
  59. } else {
  60. fail "replica2 doesn't disconnect with master"
  61. }
  62. assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]}
  63. }
  64. }
  65. }
  66. }
  67. }
  68. # This test group aims to test replication backlog size can outgrow the backlog
  69. # limit config if there is a slow replica which keep massive replication buffers,
  70. # and replicas could use this replication buffer (beyond backlog config) for
  71. # partial re-synchronization. Of course, replication backlog memory also can
  72. # become smaller when master disconnects with slow replicas since output buffer
  73. # limit is reached.
  74. start_server {tags {"repl external:skip"}} {
  75. start_server {} {
  76. start_server {} {
  77. set replica1 [srv -2 client]
  78. set replica1_pid [s -2 process_id]
  79. set replica2 [srv -1 client]
  80. set replica2_pid [s -1 process_id]
  81. set master [srv 0 client]
  82. set master_host [srv 0 host]
  83. set master_port [srv 0 port]
  84. $master config set save ""
  85. $master config set repl-backlog-size 16384
  86. $master config set client-output-buffer-limit "replica 0 0 0"
  87. # Executing 'debug digest' on master which has many keys costs much time
  88. # (especially in valgrind), this causes that replica1 and replica2 disconnect
  89. # with master.
  90. $master config set repl-timeout 1000
  91. $replica1 config set repl-timeout 1000
  92. $replica2 config set repl-timeout 1000
  93. $replica1 replicaof $master_host $master_port
  94. wait_for_sync $replica1
  95. test {Replication backlog size can outgrow the backlog limit config} {
  96. # Generating RDB will take 1000 seconds
  97. $master config set rdb-key-save-delay 1000000
  98. populate 1000 master 10000
  99. $replica2 replicaof $master_host $master_port
  100. # Make sure replica2 is waiting bgsave
  101. wait_for_condition 5000 100 {
  102. ([s rdb_bgsave_in_progress] == 1) &&
  103. [lindex [$replica2 role] 3] eq {sync}
  104. } else {
  105. fail "fail to sync with replicas"
  106. }
  107. # Replication actual backlog grow more than backlog setting since
  108. # the slow replica2 kept replication buffer.
  109. populate 10000 master 10000
  110. assert {[s repl_backlog_histlen] > [expr 10000*10000]}
  111. }
  112. # Wait replica1 catch up with the master
  113. wait_for_condition 1000 100 {
  114. [s -2 master_repl_offset] eq [s master_repl_offset]
  115. } else {
  116. fail "Replica offset didn't catch up with the master after too long time"
  117. }
  118. test {Replica could use replication buffer (beyond backlog config) for partial resynchronization} {
  119. # replica1 disconnects with master
  120. $replica1 replicaof [srv -1 host] [srv -1 port]
  121. # Write a mass of data that exceeds repl-backlog-size
  122. populate 10000 master 10000
  123. # replica1 reconnects with master
  124. $replica1 replicaof $master_host $master_port
  125. wait_for_condition 1000 100 {
  126. [s -2 master_repl_offset] eq [s master_repl_offset]
  127. } else {
  128. fail "Replica offset didn't catch up with the master after too long time"
  129. }
  130. # replica2 still waits for bgsave ending
  131. assert {[s rdb_bgsave_in_progress] eq {1} && [lindex [$replica2 role] 3] eq {sync}}
  132. # master accepted replica1 partial resync
  133. assert_equal [s sync_partial_ok] {1}
  134. assert_equal [$master debug digest] [$replica1 debug digest]
  135. }
  136. test {Replication backlog memory will become smaller if disconnecting with replica} {
  137. assert {[s repl_backlog_histlen] > [expr 2*10000*10000]}
  138. assert_equal [s connected_slaves] {2}
  139. exec kill -SIGSTOP $replica2_pid
  140. r config set client-output-buffer-limit "replica 128k 0 0"
  141. # trigger output buffer limit check
  142. r set key [string repeat A [expr 64*1024]]
  143. # master will close replica2's connection since replica2's output
  144. # buffer limit is reached, so there only is replica1.
  145. wait_for_condition 100 100 {
  146. [s connected_slaves] eq {1}
  147. } else {
  148. fail "master didn't disconnect with replica2"
  149. }
  150. # Since we trim replication backlog inrementally, replication backlog
  151. # memory may take time to be reclaimed.
  152. wait_for_condition 1000 100 {
  153. [s repl_backlog_histlen] < [expr 10000*10000]
  154. } else {
  155. fail "Replication backlog memory is not smaller"
  156. }
  157. exec kill -SIGCONT $replica2_pid
  158. }
  159. }
  160. }
  161. }
  162. test {Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size} {
  163. start_server {tags {"repl external:skip"}} {
  164. start_server {} {
  165. r config set save ""
  166. r config set repl-backlog-size 100mb
  167. r config set client-output-buffer-limit "replica 512k 0 0"
  168. set replica [srv -1 client]
  169. $replica replicaof [srv 0 host] [srv 0 port]
  170. wait_for_sync $replica
  171. set big_str [string repeat A [expr 10*1024*1024]] ;# 10mb big string
  172. r multi
  173. r client kill type replica
  174. r set key $big_str
  175. r set key $big_str
  176. r debug sleep 2 ;# wait for replica reconnecting
  177. r exec
  178. # When replica reconnects with master, master accepts partial resync,
  179. # and don't close replica client even client output buffer limit is
  180. # reached.
  181. r set key $big_str ;# trigger output buffer limit check
  182. wait_for_ofs_sync r $replica
  183. # master accepted replica partial resync
  184. assert_equal [s sync_full] {1}
  185. assert_equal [s sync_partial_ok] {1}
  186. r multi
  187. r set key $big_str
  188. r set key $big_str
  189. r exec
  190. # replica's reply buffer size is more than client-output-buffer-limit but
  191. # doesn't exceed repl-backlog-size, we don't close replica client.
  192. wait_for_condition 1000 100 {
  193. [s -1 master_repl_offset] eq [s master_repl_offset]
  194. } else {
  195. fail "Replica offset didn't catch up with the master after too long time"
  196. }
  197. assert_equal [s sync_full] {1}
  198. assert_equal [s sync_partial_ok] {1}
  199. }
  200. }
  201. }