1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "internal/cpu" 9 "runtime/internal/atomic" 10 "runtime/internal/sys" 11 "unsafe" 12 ) 13 14 // A spanSet is a set of *mspans. 15 // 16 // spanSet is safe for concurrent push and pop operations. 17 type spanSet struct { 18 // A spanSet is a two-level data structure consisting of a 19 // growable spine that points to fixed-sized blocks. The spine 20 // can be accessed without locks, but adding a block or 21 // growing it requires taking the spine lock. 22 // 23 // Because each mspan covers at least 8K of heap and takes at 24 // most 8 bytes in the spanSet, the growth of the spine is 25 // quite limited. 26 // 27 // The spine and all blocks are allocated off-heap, which 28 // allows this to be used in the memory manager and avoids the 29 // need for write barriers on all of these. spanSetBlocks are 30 // managed in a pool, though never freed back to the operating 31 // system. We never release spine memory because there could be 32 // concurrent lock-free access and we're likely to reuse it 33 // anyway. (In principle, we could do this during STW.) 34 35 spineLock mutex 36 spine unsafe.Pointer // *[N]*spanSetBlock, accessed atomically 37 spineLen uintptr // Spine array length, accessed atomically 38 spineCap uintptr // Spine array cap, accessed under lock 39 40 // index is the head and tail of the spanSet in a single field. 41 // The head and the tail both represent an index into the logical 42 // concatenation of all blocks, with the head always behind or 43 // equal to the tail (indicating an empty set). This field is 44 // always accessed atomically. 45 // 46 // The head and the tail are only 32 bits wide, which means we 47 // can only support up to 2^32 pushes before a reset. If every 48 // span in the heap were stored in this set, and each span were 49 // the minimum size (1 runtime page, 8 KiB), then roughly the 50 // smallest heap which would be unrepresentable is 32 TiB in size. 51 index headTailIndex 52 } 53 54 const ( 55 spanSetBlockEntries = 512 // 4KB on 64-bit 56 spanSetInitSpineCap = 256 // Enough for 1GB heap on 64-bit 57 ) 58 59 type spanSetBlock struct { 60 // Free spanSetBlocks are managed via a lock-free stack. 61 lfnode 62 63 // popped is the number of pop operations that have occurred on 64 // this block. This number is used to help determine when a block 65 // may be safely recycled. 66 popped uint32 67 68 // spans is the set of spans in this block. 69 spans [spanSetBlockEntries]*mspan 70 } 71 72 // push adds span s to buffer b. push is safe to call concurrently 73 // with other push and pop operations. 74 func (b *spanSet) push(s *mspan) { 75 // Obtain our slot. 76 cursor := uintptr(b.index.incTail().tail() - 1) 77 top, bottom := cursor/spanSetBlockEntries, cursor%spanSetBlockEntries 78 79 // Do we need to add a block? 80 spineLen := atomic.Loaduintptr(&b.spineLen) 81 var block *spanSetBlock 82 retry: 83 if top < spineLen { 84 spine := atomic.Loadp(unsafe.Pointer(&b.spine)) 85 blockp := add(spine, sys.PtrSize*top) 86 block = (*spanSetBlock)(atomic.Loadp(blockp)) 87 } else { 88 // Add a new block to the spine, potentially growing 89 // the spine. 90 lock(&b.spineLock) 91 // spineLen cannot change until we release the lock, 92 // but may have changed while we were waiting. 93 spineLen = atomic.Loaduintptr(&b.spineLen) 94 if top < spineLen { 95 unlock(&b.spineLock) 96 goto retry 97 } 98 99 if spineLen == b.spineCap { 100 // Grow the spine. 101 newCap := b.spineCap * 2 102 if newCap == 0 { 103 newCap = spanSetInitSpineCap 104 } 105 newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gcMiscSys) 106 if b.spineCap != 0 { 107 // Blocks are allocated off-heap, so 108 // no write barriers. 109 memmove(newSpine, b.spine, b.spineCap*sys.PtrSize) 110 } 111 // Spine is allocated off-heap, so no write barrier. 112 atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine) 113 b.spineCap = newCap 114 // We can't immediately free the old spine 115 // since a concurrent push with a lower index 116 // could still be reading from it. We let it 117 // leak because even a 1TB heap would waste 118 // less than 2MB of memory on old spines. If 119 // this is a problem, we could free old spines 120 // during STW. 121 } 122 123 // Allocate a new block from the pool. 124 block = spanSetBlockPool.alloc() 125 126 // Add it to the spine. 127 blockp := add(b.spine, sys.PtrSize*top) 128 // Blocks are allocated off-heap, so no write barrier. 129 atomic.StorepNoWB(blockp, unsafe.Pointer(block)) 130 atomic.Storeuintptr(&b.spineLen, spineLen+1) 131 unlock(&b.spineLock) 132 } 133 134 // We have a block. Insert the span atomically, since there may be 135 // concurrent readers via the block API. 136 atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), unsafe.Pointer(s)) 137 } 138 139 // pop removes and returns a span from buffer b, or nil if b is empty. 140 // pop is safe to call concurrently with other pop and push operations. 141 func (b *spanSet) pop() *mspan { 142 var head, tail uint32 143 claimLoop: 144 for { 145 headtail := b.index.load() 146 head, tail = headtail.split() 147 if head >= tail { 148 // The buf is empty, as far as we can tell. 149 return nil 150 } 151 // Check if the head position we want to claim is actually 152 // backed by a block. 153 spineLen := atomic.Loaduintptr(&b.spineLen) 154 if spineLen <= uintptr(head)/spanSetBlockEntries { 155 // We're racing with a spine growth and the allocation of 156 // a new block (and maybe a new spine!), and trying to grab 157 // the span at the index which is currently being pushed. 158 // Instead of spinning, let's just notify the caller that 159 // there's nothing currently here. Spinning on this is 160 // almost definitely not worth it. 161 return nil 162 } 163 // Try to claim the current head by CASing in an updated head. 164 // This may fail transiently due to a push which modifies the 165 // tail, so keep trying while the head isn't changing. 166 want := head 167 for want == head { 168 if b.index.cas(headtail, makeHeadTailIndex(want+1, tail)) { 169 break claimLoop 170 } 171 headtail = b.index.load() 172 head, tail = headtail.split() 173 } 174 // We failed to claim the spot we were after and the head changed, 175 // meaning a popper got ahead of us. Try again from the top because 176 // the buf may not be empty. 177 } 178 top, bottom := head/spanSetBlockEntries, head%spanSetBlockEntries 179 180 // We may be reading a stale spine pointer, but because the length 181 // grows monotonically and we've already verified it, we'll definitely 182 // be reading from a valid block. 183 spine := atomic.Loadp(unsafe.Pointer(&b.spine)) 184 blockp := add(spine, sys.PtrSize*uintptr(top)) 185 186 // Given that the spine length is correct, we know we will never 187 // see a nil block here, since the length is always updated after 188 // the block is set. 189 block := (*spanSetBlock)(atomic.Loadp(blockp)) 190 s := (*mspan)(atomic.Loadp(unsafe.Pointer(&block.spans[bottom]))) 191 for s == nil { 192 // We raced with the span actually being set, but given that we 193 // know a block for this span exists, the race window here is 194 // extremely small. Try again. 195 s = (*mspan)(atomic.Loadp(unsafe.Pointer(&block.spans[bottom]))) 196 } 197 // Clear the pointer. This isn't strictly necessary, but defensively 198 // avoids accidentally re-using blocks which could lead to memory 199 // corruption. This way, we'll get a nil pointer access instead. 200 atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), nil) 201 202 // Increase the popped count. If we are the last possible popper 203 // in the block (note that bottom need not equal spanSetBlockEntries-1 204 // due to races) then it's our resposibility to free the block. 205 // 206 // If we increment popped to spanSetBlockEntries, we can be sure that 207 // we're the last popper for this block, and it's thus safe to free it. 208 // Every other popper must have crossed this barrier (and thus finished 209 // popping its corresponding mspan) by the time we get here. Because 210 // we're the last popper, we also don't have to worry about concurrent 211 // pushers (there can't be any). Note that we may not be the popper 212 // which claimed the last slot in the block, we're just the last one 213 // to finish popping. 214 if atomic.Xadd(&block.popped, 1) == spanSetBlockEntries { 215 // Clear the block's pointer. 216 atomic.StorepNoWB(blockp, nil) 217 218 // Return the block to the block pool. 219 spanSetBlockPool.free(block) 220 } 221 return s 222 } 223 224 // reset resets a spanSet which is empty. It will also clean up 225 // any left over blocks. 226 // 227 // Throws if the buf is not empty. 228 // 229 // reset may not be called concurrently with any other operations 230 // on the span set. 231 func (b *spanSet) reset() { 232 head, tail := b.index.load().split() 233 if head < tail { 234 print("head = ", head, ", tail = ", tail, "\n") 235 throw("attempt to clear non-empty span set") 236 } 237 top := head / spanSetBlockEntries 238 if uintptr(top) < b.spineLen { 239 // If the head catches up to the tail and the set is empty, 240 // we may not clean up the block containing the head and tail 241 // since it may be pushed into again. In order to avoid leaking 242 // memory since we're going to reset the head and tail, clean 243 // up such a block now, if it exists. 244 blockp := (**spanSetBlock)(add(b.spine, sys.PtrSize*uintptr(top))) 245 block := *blockp 246 if block != nil { 247 // Sanity check the popped value. 248 if block.popped == 0 { 249 // popped should never be zero because that means we have 250 // pushed at least one value but not yet popped if this 251 // block pointer is not nil. 252 throw("span set block with unpopped elements found in reset") 253 } 254 if block.popped == spanSetBlockEntries { 255 // popped should also never be equal to spanSetBlockEntries 256 // because the last popper should have made the block pointer 257 // in this slot nil. 258 throw("fully empty unfreed span set block found in reset") 259 } 260 261 // Clear the pointer to the block. 262 atomic.StorepNoWB(unsafe.Pointer(blockp), nil) 263 264 // Return the block to the block pool. 265 spanSetBlockPool.free(block) 266 } 267 } 268 b.index.reset() 269 atomic.Storeuintptr(&b.spineLen, 0) 270 } 271 272 // spanSetBlockPool is a global pool of spanSetBlocks. 273 var spanSetBlockPool spanSetBlockAlloc 274 275 // spanSetBlockAlloc represents a concurrent pool of spanSetBlocks. 276 type spanSetBlockAlloc struct { 277 stack lfstack 278 } 279 280 // alloc tries to grab a spanSetBlock out of the pool, and if it fails 281 // persistentallocs a new one and returns it. 282 func (p *spanSetBlockAlloc) alloc() *spanSetBlock { 283 if s := (*spanSetBlock)(p.stack.pop()); s != nil { 284 return s 285 } 286 return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gcMiscSys)) 287 } 288 289 // free returns a spanSetBlock back to the pool. 290 func (p *spanSetBlockAlloc) free(block *spanSetBlock) { 291 atomic.Store(&block.popped, 0) 292 p.stack.push(&block.lfnode) 293 } 294 295 // haidTailIndex represents a combined 32-bit head and 32-bit tail 296 // of a queue into a single 64-bit value. 297 type headTailIndex uint64 298 299 // makeHeadTailIndex creates a headTailIndex value from a separate 300 // head and tail. 301 func makeHeadTailIndex(head, tail uint32) headTailIndex { 302 return headTailIndex(uint64(head)<<32 | uint64(tail)) 303 } 304 305 // head returns the head of a headTailIndex value. 306 func (h headTailIndex) head() uint32 { 307 return uint32(h >> 32) 308 } 309 310 // tail returns the tail of a headTailIndex value. 311 func (h headTailIndex) tail() uint32 { 312 return uint32(h) 313 } 314 315 // split splits the headTailIndex value into its parts. 316 func (h headTailIndex) split() (head uint32, tail uint32) { 317 return h.head(), h.tail() 318 } 319 320 // load atomically reads a headTailIndex value. 321 func (h *headTailIndex) load() headTailIndex { 322 return headTailIndex(atomic.Load64((*uint64)(h))) 323 } 324 325 // cas atomically compares-and-swaps a headTailIndex value. 326 func (h *headTailIndex) cas(old, new headTailIndex) bool { 327 return atomic.Cas64((*uint64)(h), uint64(old), uint64(new)) 328 } 329 330 // incHead atomically increments the head of a headTailIndex. 331 func (h *headTailIndex) incHead() headTailIndex { 332 return headTailIndex(atomic.Xadd64((*uint64)(h), (1 << 32))) 333 } 334 335 // decHead atomically decrements the head of a headTailIndex. 336 func (h *headTailIndex) decHead() headTailIndex { 337 return headTailIndex(atomic.Xadd64((*uint64)(h), -(1 << 32))) 338 } 339 340 // incTail atomically increments the tail of a headTailIndex. 341 func (h *headTailIndex) incTail() headTailIndex { 342 ht := headTailIndex(atomic.Xadd64((*uint64)(h), +1)) 343 // Check for overflow. 344 if ht.tail() == 0 { 345 print("runtime: head = ", ht.head(), ", tail = ", ht.tail(), "\n") 346 throw("headTailIndex overflow") 347 } 348 return ht 349 } 350 351 // reset clears the headTailIndex to (0, 0). 352 func (h *headTailIndex) reset() { 353 atomic.Store64((*uint64)(h), 0) 354 } 355