memefficiency.tcl 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. proc test_memory_efficiency {range} {
  2. r flushall
  3. set rd [redis_deferring_client]
  4. set base_mem [s used_memory]
  5. set written 0
  6. for {set j 0} {$j < 10000} {incr j} {
  7. set key key:$j
  8. set val [string repeat A [expr {int(rand()*$range)}]]
  9. $rd set $key $val
  10. incr written [string length $key]
  11. incr written [string length $val]
  12. incr written 2 ;# A separator is the minimum to store key-value data.
  13. }
  14. for {set j 0} {$j < 10000} {incr j} {
  15. $rd read ; # Discard replies
  16. }
  17. set current_mem [s used_memory]
  18. set used [expr {$current_mem-$base_mem}]
  19. set efficiency [expr {double($written)/$used}]
  20. return $efficiency
  21. }
  22. start_server {tags {"memefficiency external:skip"}} {
  23. foreach {size_range expected_min_efficiency} {
  24. 32 0.15
  25. 64 0.25
  26. 128 0.35
  27. 1024 0.75
  28. 16384 0.82
  29. } {
  30. test "Memory efficiency with values in range $size_range" {
  31. set efficiency [test_memory_efficiency $size_range]
  32. assert {$efficiency >= $expected_min_efficiency}
  33. }
  34. }
  35. }
  36. run_solo {defrag} {
  37. start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save ""}} {
  38. if {[string match {*jemalloc*} [s mem_allocator]] && [r debug mallctl arenas.page] <= 8192} {
  39. test "Active defrag" {
  40. r config set hz 100
  41. r config set activedefrag no
  42. r config set active-defrag-threshold-lower 5
  43. r config set active-defrag-cycle-min 65
  44. r config set active-defrag-cycle-max 75
  45. r config set active-defrag-ignore-bytes 2mb
  46. r config set maxmemory 100mb
  47. r config set maxmemory-policy allkeys-lru
  48. populate 700000 asdf1 150
  49. populate 170000 asdf2 300
  50. after 120 ;# serverCron only updates the info once in 100ms
  51. set frag [s allocator_frag_ratio]
  52. if {$::verbose} {
  53. puts "frag $frag"
  54. }
  55. assert {$frag >= 1.4}
  56. r config set latency-monitor-threshold 5
  57. r latency reset
  58. r config set maxmemory 110mb ;# prevent further eviction (not to fail the digest test)
  59. set digest [r debug digest]
  60. catch {r config set activedefrag yes} e
  61. if {[r config get activedefrag] eq "activedefrag yes"} {
  62. # Wait for the active defrag to start working (decision once a
  63. # second).
  64. wait_for_condition 50 100 {
  65. [s active_defrag_running] ne 0
  66. } else {
  67. fail "defrag not started."
  68. }
  69. # Wait for the active defrag to stop working.
  70. wait_for_condition 2000 100 {
  71. [s active_defrag_running] eq 0
  72. } else {
  73. after 120 ;# serverCron only updates the info once in 100ms
  74. puts [r info memory]
  75. puts [r memory malloc-stats]
  76. fail "defrag didn't stop."
  77. }
  78. # Test the the fragmentation is lower.
  79. after 120 ;# serverCron only updates the info once in 100ms
  80. set frag [s allocator_frag_ratio]
  81. set max_latency 0
  82. foreach event [r latency latest] {
  83. lassign $event eventname time latency max
  84. if {$eventname == "active-defrag-cycle"} {
  85. set max_latency $max
  86. }
  87. }
  88. if {$::verbose} {
  89. puts "frag $frag"
  90. set misses [s active_defrag_misses]
  91. set hits [s active_defrag_hits]
  92. puts "hits: $hits"
  93. puts "misses: $misses"
  94. puts "max latency $max_latency"
  95. puts [r latency latest]
  96. puts [r latency history active-defrag-cycle]
  97. }
  98. assert {$frag < 1.1}
  99. # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75,
  100. # we expect max latency to be not much higher than 7.5ms but due to rare slowness threshold is set higher
  101. if {!$::no_latency} {
  102. assert {$max_latency <= 30}
  103. }
  104. }
  105. # verify the data isn't corrupted or changed
  106. set newdigest [r debug digest]
  107. assert {$digest eq $newdigest}
  108. r save ;# saving an rdb iterates over all the data / pointers
  109. # if defrag is supported, test AOF loading too
  110. if {[r config get activedefrag] eq "activedefrag yes"} {
  111. # reset stats and load the AOF file
  112. r config resetstat
  113. r config set key-load-delay -50 ;# sleep on average 1/50 usec
  114. r debug loadaof
  115. r config set activedefrag no
  116. # measure hits and misses right after aof loading
  117. set misses [s active_defrag_misses]
  118. set hits [s active_defrag_hits]
  119. after 120 ;# serverCron only updates the info once in 100ms
  120. set frag [s allocator_frag_ratio]
  121. set max_latency 0
  122. foreach event [r latency latest] {
  123. lassign $event eventname time latency max
  124. if {$eventname == "while-blocked-cron"} {
  125. set max_latency $max
  126. }
  127. }
  128. if {$::verbose} {
  129. puts "AOF loading:"
  130. puts "frag $frag"
  131. puts "hits: $hits"
  132. puts "misses: $misses"
  133. puts "max latency $max_latency"
  134. puts [r latency latest]
  135. puts [r latency history "while-blocked-cron"]
  136. }
  137. # make sure we had defrag hits during AOF loading
  138. assert {$hits > 100000}
  139. # make sure the defragger did enough work to keep the fragmentation low during loading.
  140. # we cannot check that it went all the way down, since we don't wait for full defrag cycle to complete.
  141. assert {$frag < 1.4}
  142. # since the AOF contains simple (fast) SET commands (and the cron during loading runs every 1000 commands),
  143. # it'll still not block the loading for long periods of time.
  144. if {!$::no_latency} {
  145. assert {$max_latency <= 30}
  146. }
  147. }
  148. }
  149. r config set appendonly no
  150. r config set key-load-delay 0
  151. test "Active defrag big keys" {
  152. r flushdb
  153. r config resetstat
  154. r config set hz 100
  155. r config set activedefrag no
  156. r config set active-defrag-max-scan-fields 1000
  157. r config set active-defrag-threshold-lower 5
  158. r config set active-defrag-cycle-min 65
  159. r config set active-defrag-cycle-max 75
  160. r config set active-defrag-ignore-bytes 2mb
  161. r config set maxmemory 0
  162. r config set list-max-ziplist-size 5 ;# list of 10k items will have 2000 quicklist nodes
  163. r config set stream-node-max-entries 5
  164. r hmset hash h1 v1 h2 v2 h3 v3
  165. r lpush list a b c d
  166. r zadd zset 0 a 1 b 2 c 3 d
  167. r sadd set a b c d
  168. r xadd stream * item 1 value a
  169. r xadd stream * item 2 value b
  170. r xgroup create stream mygroup 0
  171. r xreadgroup GROUP mygroup Alice COUNT 1 STREAMS stream >
  172. # create big keys with 10k items
  173. set rd [redis_deferring_client]
  174. for {set j 0} {$j < 10000} {incr j} {
  175. $rd hset bighash $j [concat "asdfasdfasdf" $j]
  176. $rd lpush biglist [concat "asdfasdfasdf" $j]
  177. $rd zadd bigzset $j [concat "asdfasdfasdf" $j]
  178. $rd sadd bigset [concat "asdfasdfasdf" $j]
  179. $rd xadd bigstream * item 1 value a
  180. }
  181. for {set j 0} {$j < 50000} {incr j} {
  182. $rd read ; # Discard replies
  183. }
  184. set expected_frag 1.7
  185. if {$::accurate} {
  186. # scale the hash to 1m fields in order to have a measurable the latency
  187. for {set j 10000} {$j < 1000000} {incr j} {
  188. $rd hset bighash $j [concat "asdfasdfasdf" $j]
  189. }
  190. for {set j 10000} {$j < 1000000} {incr j} {
  191. $rd read ; # Discard replies
  192. }
  193. # creating that big hash, increased used_memory, so the relative frag goes down
  194. set expected_frag 1.3
  195. }
  196. # add a mass of string keys
  197. for {set j 0} {$j < 500000} {incr j} {
  198. $rd setrange $j 150 a
  199. }
  200. for {set j 0} {$j < 500000} {incr j} {
  201. $rd read ; # Discard replies
  202. }
  203. assert_equal [r dbsize] 500010
  204. # create some fragmentation
  205. for {set j 0} {$j < 500000} {incr j 2} {
  206. $rd del $j
  207. }
  208. for {set j 0} {$j < 500000} {incr j 2} {
  209. $rd read ; # Discard replies
  210. }
  211. assert_equal [r dbsize] 250010
  212. # start defrag
  213. after 120 ;# serverCron only updates the info once in 100ms
  214. set frag [s allocator_frag_ratio]
  215. if {$::verbose} {
  216. puts "frag $frag"
  217. }
  218. assert {$frag >= $expected_frag}
  219. r config set latency-monitor-threshold 5
  220. r latency reset
  221. set digest [r debug digest]
  222. catch {r config set activedefrag yes} e
  223. if {[r config get activedefrag] eq "activedefrag yes"} {
  224. # wait for the active defrag to start working (decision once a second)
  225. wait_for_condition 50 100 {
  226. [s active_defrag_running] ne 0
  227. } else {
  228. fail "defrag not started."
  229. }
  230. # wait for the active defrag to stop working
  231. wait_for_condition 500 100 {
  232. [s active_defrag_running] eq 0
  233. } else {
  234. after 120 ;# serverCron only updates the info once in 100ms
  235. puts [r info memory]
  236. puts [r memory malloc-stats]
  237. fail "defrag didn't stop."
  238. }
  239. # test the the fragmentation is lower
  240. after 120 ;# serverCron only updates the info once in 100ms
  241. set frag [s allocator_frag_ratio]
  242. set max_latency 0
  243. foreach event [r latency latest] {
  244. lassign $event eventname time latency max
  245. if {$eventname == "active-defrag-cycle"} {
  246. set max_latency $max
  247. }
  248. }
  249. if {$::verbose} {
  250. puts "frag $frag"
  251. set misses [s active_defrag_misses]
  252. set hits [s active_defrag_hits]
  253. puts "hits: $hits"
  254. puts "misses: $misses"
  255. puts "max latency $max_latency"
  256. puts [r latency latest]
  257. puts [r latency history active-defrag-cycle]
  258. }
  259. assert {$frag < 1.1}
  260. # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75,
  261. # we expect max latency to be not much higher than 7.5ms but due to rare slowness threshold is set higher
  262. if {!$::no_latency} {
  263. assert {$max_latency <= 30}
  264. }
  265. }
  266. # verify the data isn't corrupted or changed
  267. set newdigest [r debug digest]
  268. assert {$digest eq $newdigest}
  269. r save ;# saving an rdb iterates over all the data / pointers
  270. } {OK}
  271. test "Active defrag big list" {
  272. r flushdb
  273. r config resetstat
  274. r config set hz 100
  275. r config set activedefrag no
  276. r config set active-defrag-max-scan-fields 1000
  277. r config set active-defrag-threshold-lower 5
  278. r config set active-defrag-cycle-min 65
  279. r config set active-defrag-cycle-max 75
  280. r config set active-defrag-ignore-bytes 2mb
  281. r config set maxmemory 0
  282. r config set list-max-ziplist-size 5 ;# list of 500k items will have 100k quicklist nodes
  283. # create big keys with 10k items
  284. set rd [redis_deferring_client]
  285. set expected_frag 1.7
  286. # add a mass of list nodes to two lists (allocations are interlaced)
  287. set val [string repeat A 100] ;# 5 items of 100 bytes puts us in the 640 bytes bin, which has 32 regs, so high potential for fragmentation
  288. set elements 500000
  289. for {set j 0} {$j < $elements} {incr j} {
  290. $rd lpush biglist1 $val
  291. $rd lpush biglist2 $val
  292. }
  293. for {set j 0} {$j < $elements} {incr j} {
  294. $rd read ; # Discard replies
  295. $rd read ; # Discard replies
  296. }
  297. # create some fragmentation
  298. r del biglist2
  299. # start defrag
  300. after 120 ;# serverCron only updates the info once in 100ms
  301. set frag [s allocator_frag_ratio]
  302. if {$::verbose} {
  303. puts "frag $frag"
  304. }
  305. assert {$frag >= $expected_frag}
  306. r config set latency-monitor-threshold 5
  307. r latency reset
  308. set digest [r debug digest]
  309. catch {r config set activedefrag yes} e
  310. if {[r config get activedefrag] eq "activedefrag yes"} {
  311. # wait for the active defrag to start working (decision once a second)
  312. wait_for_condition 50 100 {
  313. [s active_defrag_running] ne 0
  314. } else {
  315. fail "defrag not started."
  316. }
  317. # wait for the active defrag to stop working
  318. wait_for_condition 500 100 {
  319. [s active_defrag_running] eq 0
  320. } else {
  321. after 120 ;# serverCron only updates the info once in 100ms
  322. puts [r info memory]
  323. puts [r info stats]
  324. puts [r memory malloc-stats]
  325. fail "defrag didn't stop."
  326. }
  327. # test the the fragmentation is lower
  328. after 120 ;# serverCron only updates the info once in 100ms
  329. set misses [s active_defrag_misses]
  330. set hits [s active_defrag_hits]
  331. set frag [s allocator_frag_ratio]
  332. set max_latency 0
  333. foreach event [r latency latest] {
  334. lassign $event eventname time latency max
  335. if {$eventname == "active-defrag-cycle"} {
  336. set max_latency $max
  337. }
  338. }
  339. if {$::verbose} {
  340. puts "frag $frag"
  341. puts "misses: $misses"
  342. puts "hits: $hits"
  343. puts "max latency $max_latency"
  344. puts [r latency latest]
  345. puts [r latency history active-defrag-cycle]
  346. }
  347. assert {$frag < 1.1}
  348. # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75,
  349. # we expect max latency to be not much higher than 7.5ms but due to rare slowness threshold is set higher
  350. if {!$::no_latency} {
  351. assert {$max_latency <= 30}
  352. }
  353. # in extreme cases of stagnation, we see over 20m misses before the tests aborts with "defrag didn't stop",
  354. # in normal cases we only see 100k misses out of 500k elements
  355. assert {$misses < $elements}
  356. }
  357. # verify the data isn't corrupted or changed
  358. set newdigest [r debug digest]
  359. assert {$digest eq $newdigest}
  360. r save ;# saving an rdb iterates over all the data / pointers
  361. r del biglist1 ;# coverage for quicklistBookmarksClear
  362. } {1}
  363. # Temporarily skip the active defrag edge case since it constantly fails on 32bit bit builds
  364. # since upgrading to jemalloc 5.2.1 (#9623). We need to resolve this and re-enabled.
  365. if {false} {
  366. test "Active defrag edge case" {
  367. # there was an edge case in defrag where all the slabs of a certain bin are exact the same
  368. # % utilization, with the exception of the current slab from which new allocations are made
  369. # if the current slab is lower in utilization the defragger would have ended up in stagnation,
  370. # kept running and not move any allocation.
  371. # this test is more consistent on a fresh server with no history
  372. start_server {tags {"defrag"} overrides {save ""}} {
  373. r flushdb
  374. r config resetstat
  375. r config set hz 100
  376. r config set activedefrag no
  377. r config set active-defrag-max-scan-fields 1000
  378. r config set active-defrag-threshold-lower 5
  379. r config set active-defrag-cycle-min 65
  380. r config set active-defrag-cycle-max 75
  381. r config set active-defrag-ignore-bytes 1mb
  382. r config set maxmemory 0
  383. set expected_frag 1.3
  384. r debug mallctl-str thread.tcache.flush VOID
  385. # fill the first slab containin 32 regs of 640 bytes.
  386. for {set j 0} {$j < 32} {incr j} {
  387. r setrange "_$j" 600 x
  388. r debug mallctl-str thread.tcache.flush VOID
  389. }
  390. # add a mass of keys with 600 bytes values, fill the bin of 640 bytes which has 32 regs per slab.
  391. set rd [redis_deferring_client]
  392. set keys 640000
  393. for {set j 0} {$j < $keys} {incr j} {
  394. $rd setrange $j 600 x
  395. }
  396. for {set j 0} {$j < $keys} {incr j} {
  397. $rd read ; # Discard replies
  398. }
  399. # create some fragmentation of 50%
  400. set sent 0
  401. for {set j 0} {$j < $keys} {incr j 1} {
  402. $rd del $j
  403. incr sent
  404. incr j 1
  405. }
  406. for {set j 0} {$j < $sent} {incr j} {
  407. $rd read ; # Discard replies
  408. }
  409. # create higher fragmentation in the first slab
  410. for {set j 10} {$j < 32} {incr j} {
  411. r del "_$j"
  412. }
  413. # start defrag
  414. after 120 ;# serverCron only updates the info once in 100ms
  415. set frag [s allocator_frag_ratio]
  416. if {$::verbose} {
  417. puts "frag $frag"
  418. }
  419. assert {$frag >= $expected_frag}
  420. set digest [r debug digest]
  421. catch {r config set activedefrag yes} e
  422. if {[r config get activedefrag] eq "activedefrag yes"} {
  423. # wait for the active defrag to start working (decision once a second)
  424. wait_for_condition 50 100 {
  425. [s active_defrag_running] ne 0
  426. } else {
  427. fail "defrag not started."
  428. }
  429. # wait for the active defrag to stop working
  430. wait_for_condition 500 100 {
  431. [s active_defrag_running] eq 0
  432. } else {
  433. after 120 ;# serverCron only updates the info once in 100ms
  434. puts [r info memory]
  435. puts [r info stats]
  436. puts [r memory malloc-stats]
  437. fail "defrag didn't stop."
  438. }
  439. # test the the fragmentation is lower
  440. after 120 ;# serverCron only updates the info once in 100ms
  441. set misses [s active_defrag_misses]
  442. set hits [s active_defrag_hits]
  443. set frag [s allocator_frag_ratio]
  444. if {$::verbose} {
  445. puts "frag $frag"
  446. puts "hits: $hits"
  447. puts "misses: $misses"
  448. }
  449. assert {$frag < 1.1}
  450. assert {$misses < 10000000} ;# when defrag doesn't stop, we have some 30m misses, when it does, we have 2m misses
  451. }
  452. # verify the data isn't corrupted or changed
  453. set newdigest [r debug digest]
  454. assert {$digest eq $newdigest}
  455. r save ;# saving an rdb iterates over all the data / pointers
  456. }
  457. }
  458. }
  459. }
  460. }
  461. } ;# run_solo