1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Memory statistics 6 7 package runtime 8 9 import ( 10 "runtime/internal/atomic" 11 "unsafe" 12 ) 13 14 // Statistics. 15 // 16 // For detailed descriptions see the documentation for MemStats. 17 // Fields that differ from MemStats are further documented here. 18 // 19 // Many of these fields are updated on the fly, while others are only 20 // updated when updatememstats is called. 21 type mstats struct { 22 // General statistics. 23 alloc uint64 // bytes allocated and not yet freed 24 total_alloc uint64 // bytes allocated (even if freed) 25 sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) 26 nlookup uint64 // number of pointer lookups (unused) 27 nmalloc uint64 // number of mallocs 28 nfree uint64 // number of frees 29 30 // Statistics about malloc heap. 31 // Updated atomically, or with the world stopped. 32 // 33 // Like MemStats, heap_sys and heap_inuse do not count memory 34 // in manually-managed spans. 35 heap_sys sysMemStat // virtual address space obtained from system for GC'd heap 36 heap_inuse uint64 // bytes in mSpanInUse spans 37 heap_released uint64 // bytes released to the os 38 39 // heap_objects is not used by the runtime directly and instead 40 // computed on the fly by updatememstats. 41 heap_objects uint64 // total number of allocated objects 42 43 // Statistics about stacks. 44 stacks_inuse uint64 // bytes in manually-managed stack spans; computed by updatememstats 45 stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys 46 47 // Statistics about allocation of low-level fixed-size structures. 48 // Protected by FixAlloc locks. 49 mspan_inuse uint64 // mspan structures 50 mspan_sys sysMemStat 51 mcache_inuse uint64 // mcache structures 52 mcache_sys sysMemStat 53 buckhash_sys sysMemStat // profiling bucket hash table 54 55 // Statistics about GC overhead. 56 gcWorkBufInUse uint64 // computed by updatememstats 57 gcProgPtrScalarBitsInUse uint64 // computed by updatememstats 58 gcMiscSys sysMemStat // updated atomically or during STW 59 60 // Miscellaneous statistics. 61 other_sys sysMemStat // updated atomically or during STW 62 63 // Statistics about the garbage collector. 64 65 // Protected by mheap or stopping the world during GC. 66 last_gc_unix uint64 // last gc (in unix time) 67 pause_total_ns uint64 68 pause_ns [256]uint64 // circular buffer of recent gc pause lengths 69 pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970) 70 numgc uint32 71 numforcedgc uint32 // number of user-forced GCs 72 gc_cpu_fraction float64 // fraction of CPU time used by GC 73 enablegc bool 74 debuggc bool 75 76 // Statistics about allocation size classes. 77 78 by_size [_NumSizeClasses]struct { 79 size uint32 80 nmalloc uint64 81 nfree uint64 82 } 83 84 // Add an uint32 for even number of size classes to align below fields 85 // to 64 bits for atomic operations on 32 bit platforms. 86 _ [1 - _NumSizeClasses%2]uint32 87 88 last_gc_nanotime uint64 // last gc (monotonic time) 89 last_heap_inuse uint64 // heap_inuse at mark termination of the previous GC 90 91 // heapStats is a set of statistics 92 heapStats consistentHeapStats 93 94 // _ uint32 // ensure gcPauseDist is aligned 95 96 // gcPauseDist represents the distribution of all GC-related 97 // application pauses in the runtime. 98 // 99 // Each individual pause is counted separately, unlike pause_ns. 100 gcPauseDist timeHistogram 101 } 102 103 var memstats mstats 104 105 // A MemStats records statistics about the memory allocator. 106 type MemStats struct { 107 // General statistics. 108 109 // Alloc is bytes of allocated heap objects. 110 // 111 // This is the same as HeapAlloc (see below). 112 Alloc uint64 113 114 // TotalAlloc is cumulative bytes allocated for heap objects. 115 // 116 // TotalAlloc increases as heap objects are allocated, but 117 // unlike Alloc and HeapAlloc, it does not decrease when 118 // objects are freed. 119 TotalAlloc uint64 120 121 // Sys is the total bytes of memory obtained from the OS. 122 // 123 // Sys is the sum of the XSys fields below. Sys measures the 124 // virtual address space reserved by the Go runtime for the 125 // heap, stacks, and other internal data structures. It's 126 // likely that not all of the virtual address space is backed 127 // by physical memory at any given moment, though in general 128 // it all was at some point. 129 Sys uint64 130 131 // Lookups is the number of pointer lookups performed by the 132 // runtime. 133 // 134 // This is primarily useful for debugging runtime internals. 135 Lookups uint64 136 137 // Mallocs is the cumulative count of heap objects allocated. 138 // The number of live objects is Mallocs - Frees. 139 Mallocs uint64 140 141 // Frees is the cumulative count of heap objects freed. 142 Frees uint64 143 144 // Heap memory statistics. 145 // 146 // Interpreting the heap statistics requires some knowledge of 147 // how Go organizes memory. Go divides the virtual address 148 // space of the heap into "spans", which are contiguous 149 // regions of memory 8K or larger. A span may be in one of 150 // three states: 151 // 152 // An "idle" span contains no objects or other data. The 153 // physical memory backing an idle span can be released back 154 // to the OS (but the virtual address space never is), or it 155 // can be converted into an "in use" or "stack" span. 156 // 157 // An "in use" span contains at least one heap object and may 158 // have free space available to allocate more heap objects. 159 // 160 // A "stack" span is used for goroutine stacks. Stack spans 161 // are not considered part of the heap. A span can change 162 // between heap and stack memory; it is never used for both 163 // simultaneously. 164 165 // HeapAlloc is bytes of allocated heap objects. 166 // 167 // "Allocated" heap objects include all reachable objects, as 168 // well as unreachable objects that the garbage collector has 169 // not yet freed. Specifically, HeapAlloc increases as heap 170 // objects are allocated and decreases as the heap is swept 171 // and unreachable objects are freed. Sweeping occurs 172 // incrementally between GC cycles, so these two processes 173 // occur simultaneously, and as a result HeapAlloc tends to 174 // change smoothly (in contrast with the sawtooth that is 175 // typical of stop-the-world garbage collectors). 176 HeapAlloc uint64 177 178 // HeapSys is bytes of heap memory obtained from the OS. 179 // 180 // HeapSys measures the amount of virtual address space 181 // reserved for the heap. This includes virtual address space 182 // that has been reserved but not yet used, which consumes no 183 // physical memory, but tends to be small, as well as virtual 184 // address space for which the physical memory has been 185 // returned to the OS after it became unused (see HeapReleased 186 // for a measure of the latter). 187 // 188 // HeapSys estimates the largest size the heap has had. 189 HeapSys uint64 190 191 // HeapIdle is bytes in idle (unused) spans. 192 // 193 // Idle spans have no objects in them. These spans could be 194 // (and may already have been) returned to the OS, or they can 195 // be reused for heap allocations, or they can be reused as 196 // stack memory. 197 // 198 // HeapIdle minus HeapReleased estimates the amount of memory 199 // that could be returned to the OS, but is being retained by 200 // the runtime so it can grow the heap without requesting more 201 // memory from the OS. If this difference is significantly 202 // larger than the heap size, it indicates there was a recent 203 // transient spike in live heap size. 204 HeapIdle uint64 205 206 // HeapInuse is bytes in in-use spans. 207 // 208 // In-use spans have at least one object in them. These spans 209 // can only be used for other objects of roughly the same 210 // size. 211 // 212 // HeapInuse minus HeapAlloc estimates the amount of memory 213 // that has been dedicated to particular size classes, but is 214 // not currently being used. This is an upper bound on 215 // fragmentation, but in general this memory can be reused 216 // efficiently. 217 HeapInuse uint64 218 219 // HeapReleased is bytes of physical memory returned to the OS. 220 // 221 // This counts heap memory from idle spans that was returned 222 // to the OS and has not yet been reacquired for the heap. 223 HeapReleased uint64 224 225 // HeapObjects is the number of allocated heap objects. 226 // 227 // Like HeapAlloc, this increases as objects are allocated and 228 // decreases as the heap is swept and unreachable objects are 229 // freed. 230 HeapObjects uint64 231 232 // Stack memory statistics. 233 // 234 // Stacks are not considered part of the heap, but the runtime 235 // can reuse a span of heap memory for stack memory, and 236 // vice-versa. 237 238 // StackInuse is bytes in stack spans. 239 // 240 // In-use stack spans have at least one stack in them. These 241 // spans can only be used for other stacks of the same size. 242 // 243 // There is no StackIdle because unused stack spans are 244 // returned to the heap (and hence counted toward HeapIdle). 245 StackInuse uint64 246 247 // StackSys is bytes of stack memory obtained from the OS. 248 // 249 // StackSys is StackInuse, plus any memory obtained directly 250 // from the OS for OS thread stacks (which should be minimal). 251 StackSys uint64 252 253 // Off-heap memory statistics. 254 // 255 // The following statistics measure runtime-internal 256 // structures that are not allocated from heap memory (usually 257 // because they are part of implementing the heap). Unlike 258 // heap or stack memory, any memory allocated to these 259 // structures is dedicated to these structures. 260 // 261 // These are primarily useful for debugging runtime memory 262 // overheads. 263 264 // MSpanInuse is bytes of allocated mspan structures. 265 MSpanInuse uint64 266 267 // MSpanSys is bytes of memory obtained from the OS for mspan 268 // structures. 269 MSpanSys uint64 270 271 // MCacheInuse is bytes of allocated mcache structures. 272 MCacheInuse uint64 273 274 // MCacheSys is bytes of memory obtained from the OS for 275 // mcache structures. 276 MCacheSys uint64 277 278 // BuckHashSys is bytes of memory in profiling bucket hash tables. 279 BuckHashSys uint64 280 281 // GCSys is bytes of memory in garbage collection metadata. 282 GCSys uint64 283 284 // OtherSys is bytes of memory in miscellaneous off-heap 285 // runtime allocations. 286 OtherSys uint64 287 288 // Garbage collector statistics. 289 290 // NextGC is the target heap size of the next GC cycle. 291 // 292 // The garbage collector's goal is to keep HeapAlloc ≤ NextGC. 293 // At the end of each GC cycle, the target for the next cycle 294 // is computed based on the amount of reachable data and the 295 // value of GOGC. 296 NextGC uint64 297 298 // LastGC is the time the last garbage collection finished, as 299 // nanoseconds since 1970 (the UNIX epoch). 300 LastGC uint64 301 302 // PauseTotalNs is the cumulative nanoseconds in GC 303 // stop-the-world pauses since the program started. 304 // 305 // During a stop-the-world pause, all goroutines are paused 306 // and only the garbage collector can run. 307 PauseTotalNs uint64 308 309 // PauseNs is a circular buffer of recent GC stop-the-world 310 // pause times in nanoseconds. 311 // 312 // The most recent pause is at PauseNs[(NumGC+255)%256]. In 313 // general, PauseNs[N%256] records the time paused in the most 314 // recent N%256th GC cycle. There may be multiple pauses per 315 // GC cycle; this is the sum of all pauses during a cycle. 316 PauseNs [256]uint64 317 318 // PauseEnd is a circular buffer of recent GC pause end times, 319 // as nanoseconds since 1970 (the UNIX epoch). 320 // 321 // This buffer is filled the same way as PauseNs. There may be 322 // multiple pauses per GC cycle; this records the end of the 323 // last pause in a cycle. 324 PauseEnd [256]uint64 325 326 // NumGC is the number of completed GC cycles. 327 NumGC uint32 328 329 // NumForcedGC is the number of GC cycles that were forced by 330 // the application calling the GC function. 331 NumForcedGC uint32 332 333 // GCCPUFraction is the fraction of this program's available 334 // CPU time used by the GC since the program started. 335 // 336 // GCCPUFraction is expressed as a number between 0 and 1, 337 // where 0 means GC has consumed none of this program's CPU. A 338 // program's available CPU time is defined as the integral of 339 // GOMAXPROCS since the program started. That is, if 340 // GOMAXPROCS is 2 and a program has been running for 10 341 // seconds, its "available CPU" is 20 seconds. GCCPUFraction 342 // does not include CPU time used for write barrier activity. 343 // 344 // This is the same as the fraction of CPU reported by 345 // GODEBUG=gctrace=1. 346 GCCPUFraction float64 347 348 // EnableGC indicates that GC is enabled. It is always true, 349 // even if GOGC=off. 350 EnableGC bool 351 352 // DebugGC is currently unused. 353 DebugGC bool 354 355 // BySize reports per-size class allocation statistics. 356 // 357 // BySize[N] gives statistics for allocations of size S where 358 // BySize[N-1].Size < S ≤ BySize[N].Size. 359 // 360 // This does not report allocations larger than BySize[60].Size. 361 BySize [61]struct { 362 // Size is the maximum byte size of an object in this 363 // size class. 364 Size uint32 365 366 // Mallocs is the cumulative count of heap objects 367 // allocated in this size class. The cumulative bytes 368 // of allocation is Size*Mallocs. The number of live 369 // objects in this size class is Mallocs - Frees. 370 Mallocs uint64 371 372 // Frees is the cumulative count of heap objects freed 373 // in this size class. 374 Frees uint64 375 } 376 } 377 378 func init() { 379 if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 { 380 println(offset) 381 throw("memstats.heapStats not aligned to 8 bytes") 382 } 383 if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 { 384 println(offset) 385 throw("memstats.gcPauseDist not aligned to 8 bytes") 386 } 387 // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. 388 // [3]heapStatsDelta) to be 8-byte aligned. 389 if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { 390 println(size) 391 throw("heapStatsDelta not a multiple of 8 bytes in size") 392 } 393 } 394 395 // ReadMemStats populates m with memory allocator statistics. 396 // 397 // The returned memory allocator statistics are up to date as of the 398 // call to ReadMemStats. This is in contrast with a heap profile, 399 // which is a snapshot as of the most recently completed garbage 400 // collection cycle. 401 func ReadMemStats(m *MemStats) { 402 stopTheWorld("read mem stats") 403 404 systemstack(func() { 405 readmemstats_m(m) 406 }) 407 408 startTheWorld() 409 } 410 411 func readmemstats_m(stats *MemStats) { 412 updatememstats() 413 414 stats.Alloc = memstats.alloc 415 stats.TotalAlloc = memstats.total_alloc 416 stats.Sys = memstats.sys 417 stats.Mallocs = memstats.nmalloc 418 stats.Frees = memstats.nfree 419 stats.HeapAlloc = memstats.alloc 420 stats.HeapSys = memstats.heap_sys.load() 421 // By definition, HeapIdle is memory that was mapped 422 // for the heap but is not currently used to hold heap 423 // objects. It also specifically is memory that can be 424 // used for other purposes, like stacks, but this memory 425 // is subtracted out of HeapSys before it makes that 426 // transition. Put another way: 427 // 428 // heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes 429 // heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose 430 // 431 // or 432 // 433 // heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse 434 // heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse 435 // 436 // => heap_idle = heap_sys - heap_inuse 437 stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse 438 stats.HeapInuse = memstats.heap_inuse 439 stats.HeapReleased = memstats.heap_released 440 stats.HeapObjects = memstats.heap_objects 441 stats.StackInuse = memstats.stacks_inuse 442 // memstats.stacks_sys is only memory mapped directly for OS stacks. 443 // Add in heap-allocated stack memory for user consumption. 444 stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load() 445 stats.MSpanInuse = memstats.mspan_inuse 446 stats.MSpanSys = memstats.mspan_sys.load() 447 stats.MCacheInuse = memstats.mcache_inuse 448 stats.MCacheSys = memstats.mcache_sys.load() 449 stats.BuckHashSys = memstats.buckhash_sys.load() 450 // MemStats defines GCSys as an aggregate of all memory related 451 // to the memory management system, but we track this memory 452 // at a more granular level in the runtime. 453 stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse 454 stats.OtherSys = memstats.other_sys.load() 455 stats.NextGC = gcController.heapGoal 456 stats.LastGC = memstats.last_gc_unix 457 stats.PauseTotalNs = memstats.pause_total_ns 458 stats.PauseNs = memstats.pause_ns 459 stats.PauseEnd = memstats.pause_end 460 stats.NumGC = memstats.numgc 461 stats.NumForcedGC = memstats.numforcedgc 462 stats.GCCPUFraction = memstats.gc_cpu_fraction 463 stats.EnableGC = true 464 465 // Handle BySize. Copy N values, where N is 466 // the minimum of the lengths of the two arrays. 467 // Unfortunately copy() won't work here because 468 // the arrays have different structs. 469 // 470 // TODO(mknyszek): Consider renaming the fields 471 // of by_size's elements to align so we can use 472 // the copy built-in. 473 bySizeLen := len(stats.BySize) 474 if l := len(memstats.by_size); l < bySizeLen { 475 bySizeLen = l 476 } 477 for i := 0; i < bySizeLen; i++ { 478 stats.BySize[i].Size = memstats.by_size[i].size 479 stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc 480 stats.BySize[i].Frees = memstats.by_size[i].nfree 481 } 482 } 483 484 //go:linkname readGCStats runtime/debug.readGCStats 485 func readGCStats(pauses *[]uint64) { 486 systemstack(func() { 487 readGCStats_m(pauses) 488 }) 489 } 490 491 // readGCStats_m must be called on the system stack because it acquires the heap 492 // lock. See mheap for details. 493 //go:systemstack 494 func readGCStats_m(pauses *[]uint64) { 495 p := *pauses 496 // Calling code in runtime/debug should make the slice large enough. 497 if cap(p) < len(memstats.pause_ns)+3 { 498 throw("short slice passed to readGCStats") 499 } 500 501 // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns. 502 lock(&mheap_.lock) 503 504 n := memstats.numgc 505 if n > uint32(len(memstats.pause_ns)) { 506 n = uint32(len(memstats.pause_ns)) 507 } 508 509 // The pause buffer is circular. The most recent pause is at 510 // pause_ns[(numgc-1)%len(pause_ns)], and then backward 511 // from there to go back farther in time. We deliver the times 512 // most recent first (in p[0]). 513 p = p[:cap(p)] 514 for i := uint32(0); i < n; i++ { 515 j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns)) 516 p[i] = memstats.pause_ns[j] 517 p[n+i] = memstats.pause_end[j] 518 } 519 520 p[n+n] = memstats.last_gc_unix 521 p[n+n+1] = uint64(memstats.numgc) 522 p[n+n+2] = memstats.pause_total_ns 523 unlock(&mheap_.lock) 524 *pauses = p[:n+n+3] 525 } 526 527 // Updates the memstats structure. 528 // 529 // The world must be stopped. 530 // 531 //go:nowritebarrier 532 func updatememstats() { 533 assertWorldStopped() 534 535 // Flush mcaches to mcentral before doing anything else. 536 // 537 // Flushing to the mcentral may in general cause stats to 538 // change as mcentral data structures are manipulated. 539 systemstack(flushallmcaches) 540 541 memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) 542 memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) 543 memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() + 544 memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + 545 memstats.other_sys.load() 546 547 // Calculate memory allocator stats. 548 // During program execution we only count number of frees and amount of freed memory. 549 // Current number of alive objects in the heap and amount of alive heap memory 550 // are calculated by scanning all spans. 551 // Total number of mallocs is calculated as number of frees plus number of alive objects. 552 // Similarly, total amount of allocated memory is calculated as amount of freed memory 553 // plus amount of alive heap memory. 554 memstats.alloc = 0 555 memstats.total_alloc = 0 556 memstats.nmalloc = 0 557 memstats.nfree = 0 558 for i := 0; i < len(memstats.by_size); i++ { 559 memstats.by_size[i].nmalloc = 0 560 memstats.by_size[i].nfree = 0 561 } 562 // Collect consistent stats, which are the source-of-truth in the some cases. 563 var consStats heapStatsDelta 564 memstats.heapStats.unsafeRead(&consStats) 565 566 // Collect large allocation stats. 567 totalAlloc := consStats.largeAlloc 568 memstats.nmalloc += consStats.largeAllocCount 569 totalFree := consStats.largeFree 570 memstats.nfree += consStats.largeFreeCount 571 572 // Collect per-sizeclass stats. 573 for i := 0; i < _NumSizeClasses; i++ { 574 // Malloc stats. 575 a := consStats.smallAllocCount[i] 576 totalAlloc += a * uint64(class_to_size[i]) 577 memstats.nmalloc += a 578 memstats.by_size[i].nmalloc = a 579 580 // Free stats. 581 f := consStats.smallFreeCount[i] 582 totalFree += f * uint64(class_to_size[i]) 583 memstats.nfree += f 584 memstats.by_size[i].nfree = f 585 } 586 587 // Account for tiny allocations. 588 memstats.nfree += consStats.tinyAllocCount 589 memstats.nmalloc += consStats.tinyAllocCount 590 591 // Calculate derived stats. 592 memstats.total_alloc = totalAlloc 593 memstats.alloc = totalAlloc - totalFree 594 memstats.heap_objects = memstats.nmalloc - memstats.nfree 595 596 memstats.stacks_inuse = uint64(consStats.inStacks) 597 memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs) 598 memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits) 599 600 // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. 601 memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse 602 603 // The world is stopped, so the consistent stats (after aggregation) 604 // should be identical to some combination of memstats. In particular: 605 // 606 // * heap_inuse == inHeap 607 // * heap_released == released 608 // * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits 609 // 610 // Check if that's actually true. 611 // 612 // TODO(mknyszek): Maybe don't throw here. It would be bad if a 613 // bug in otherwise benign accounting caused the whole application 614 // to crash. 615 if memstats.heap_inuse != uint64(consStats.inHeap) { 616 print("runtime: heap_inuse=", memstats.heap_inuse, "\n") 617 print("runtime: consistent value=", consStats.inHeap, "\n") 618 throw("heap_inuse and consistent stats are not equal") 619 } 620 if memstats.heap_released != uint64(consStats.released) { 621 print("runtime: heap_released=", memstats.heap_released, "\n") 622 print("runtime: consistent value=", consStats.released, "\n") 623 throw("heap_released and consistent stats are not equal") 624 } 625 globalRetained := memstats.heap_sys.load() - memstats.heap_released 626 consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits) 627 if globalRetained != consRetained { 628 print("runtime: global value=", globalRetained, "\n") 629 print("runtime: consistent value=", consRetained, "\n") 630 throw("measures of the retained heap are not equal") 631 } 632 } 633 634 // flushmcache flushes the mcache of allp[i]. 635 // 636 // The world must be stopped. 637 // 638 //go:nowritebarrier 639 func flushmcache(i int) { 640 assertWorldStopped() 641 642 p := allp[i] 643 c := p.mcache 644 if c == nil { 645 return 646 } 647 c.releaseAll() 648 stackcache_clear(c) 649 } 650 651 // flushallmcaches flushes the mcaches of all Ps. 652 // 653 // The world must be stopped. 654 // 655 //go:nowritebarrier 656 func flushallmcaches() { 657 assertWorldStopped() 658 659 for i := 0; i < int(gomaxprocs); i++ { 660 flushmcache(i) 661 } 662 } 663 664 // sysMemStat represents a global system statistic that is managed atomically. 665 // 666 // This type must structurally be a uint64 so that mstats aligns with MemStats. 667 type sysMemStat uint64 668 669 // load atomically reads the value of the stat. 670 // 671 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. 672 //go:nosplit 673 func (s *sysMemStat) load() uint64 { 674 return atomic.Load64((*uint64)(s)) 675 } 676 677 // add atomically adds the sysMemStat by n. 678 // 679 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. 680 //go:nosplit 681 func (s *sysMemStat) add(n int64) { 682 if s == nil { 683 return 684 } 685 val := atomic.Xadd64((*uint64)(s), n) 686 if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) { 687 print("runtime: val=", val, " n=", n, "\n") 688 throw("sysMemStat overflow") 689 } 690 } 691 692 // heapStatsDelta contains deltas of various runtime memory statistics 693 // that need to be updated together in order for them to be kept 694 // consistent with one another. 695 type heapStatsDelta struct { 696 // Memory stats. 697 committed int64 // byte delta of memory committed 698 released int64 // byte delta of released memory generated 699 inHeap int64 // byte delta of memory placed in the heap 700 inStacks int64 // byte delta of memory reserved for stacks 701 inWorkBufs int64 // byte delta of memory reserved for work bufs 702 inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits 703 704 // Allocator stats. 705 // 706 // These are all uint64 because they're cumulative, and could quickly wrap 707 // around otherwise. 708 tinyAllocCount uint64 // number of tiny allocations 709 largeAlloc uint64 // bytes allocated for large objects 710 largeAllocCount uint64 // number of large object allocations 711 smallAllocCount [_NumSizeClasses]uint64 // number of allocs for small objects 712 largeFree uint64 // bytes freed for large objects (>maxSmallSize) 713 largeFreeCount uint64 // number of frees for large objects (>maxSmallSize) 714 smallFreeCount [_NumSizeClasses]uint64 // number of frees for small objects (<=maxSmallSize) 715 716 // NOTE: This struct must be a multiple of 8 bytes in size because it 717 // is stored in an array. If it's not, atomic accesses to the above 718 // fields may be unaligned and fail on 32-bit platforms. 719 } 720 721 // merge adds in the deltas from b into a. 722 func (a *heapStatsDelta) merge(b *heapStatsDelta) { 723 a.committed += b.committed 724 a.released += b.released 725 a.inHeap += b.inHeap 726 a.inStacks += b.inStacks 727 a.inWorkBufs += b.inWorkBufs 728 a.inPtrScalarBits += b.inPtrScalarBits 729 730 a.tinyAllocCount += b.tinyAllocCount 731 a.largeAlloc += b.largeAlloc 732 a.largeAllocCount += b.largeAllocCount 733 for i := range b.smallAllocCount { 734 a.smallAllocCount[i] += b.smallAllocCount[i] 735 } 736 a.largeFree += b.largeFree 737 a.largeFreeCount += b.largeFreeCount 738 for i := range b.smallFreeCount { 739 a.smallFreeCount[i] += b.smallFreeCount[i] 740 } 741 } 742 743 // consistentHeapStats represents a set of various memory statistics 744 // whose updates must be viewed completely to get a consistent 745 // state of the world. 746 // 747 // To write updates to memory stats use the acquire and release 748 // methods. To obtain a consistent global snapshot of these statistics, 749 // use read. 750 type consistentHeapStats struct { 751 // stats is a ring buffer of heapStatsDelta values. 752 // Writers always atomically update the delta at index gen. 753 // 754 // Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...) 755 // and synchronizing with writers by observing each P's 756 // statsSeq field. If the reader observes a P not writing, 757 // it can be sure that it will pick up the new gen value the 758 // next time it writes. 759 // 760 // The reader then takes responsibility by clearing space 761 // in the ring buffer for the next reader to rotate gen to 762 // that space (i.e. it merges in values from index (gen-2) mod 3 763 // to index (gen-1) mod 3, then clears the former). 764 // 765 // Note that this means only one reader can be reading at a time. 766 // There is no way for readers to synchronize. 767 // 768 // This process is why we need a ring buffer of size 3 instead 769 // of 2: one is for the writers, one contains the most recent 770 // data, and the last one is clear so writers can begin writing 771 // to it the moment gen is updated. 772 stats [3]heapStatsDelta 773 774 // gen represents the current index into which writers 775 // are writing, and can take on the value of 0, 1, or 2. 776 // This value is updated atomically. 777 gen uint32 778 779 // noPLock is intended to provide mutual exclusion for updating 780 // stats when no P is available. It does not block other writers 781 // with a P, only other writers without a P and the reader. Because 782 // stats are usually updated when a P is available, contention on 783 // this lock should be minimal. 784 noPLock mutex 785 } 786 787 // acquire returns a heapStatsDelta to be updated. In effect, 788 // it acquires the shard for writing. release must be called 789 // as soon as the relevant deltas are updated. 790 // 791 // The returned heapStatsDelta must be updated atomically. 792 // 793 // The caller's P must not change between acquire and 794 // release. This also means that the caller should not 795 // acquire a P or release its P in between. 796 func (m *consistentHeapStats) acquire() *heapStatsDelta { 797 if pp := getg().m.p.ptr(); pp != nil { 798 seq := atomic.Xadd(&pp.statsSeq, 1) 799 if seq%2 == 0 { 800 // Should have been incremented to odd. 801 print("runtime: seq=", seq, "\n") 802 throw("bad sequence number") 803 } 804 } else { 805 lock(&m.noPLock) 806 } 807 gen := atomic.Load(&m.gen) % 3 808 return &m.stats[gen] 809 } 810 811 // release indicates that the writer is done modifying 812 // the delta. The value returned by the corresponding 813 // acquire must no longer be accessed or modified after 814 // release is called. 815 // 816 // The caller's P must not change between acquire and 817 // release. This also means that the caller should not 818 // acquire a P or release its P in between. 819 func (m *consistentHeapStats) release() { 820 if pp := getg().m.p.ptr(); pp != nil { 821 seq := atomic.Xadd(&pp.statsSeq, 1) 822 if seq%2 != 0 { 823 // Should have been incremented to even. 824 print("runtime: seq=", seq, "\n") 825 throw("bad sequence number") 826 } 827 } else { 828 unlock(&m.noPLock) 829 } 830 } 831 832 // unsafeRead aggregates the delta for this shard into out. 833 // 834 // Unsafe because it does so without any synchronization. The 835 // world must be stopped. 836 func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { 837 assertWorldStopped() 838 839 for i := range m.stats { 840 out.merge(&m.stats[i]) 841 } 842 } 843 844 // unsafeClear clears the shard. 845 // 846 // Unsafe because the world must be stopped and values should 847 // be donated elsewhere before clearing. 848 func (m *consistentHeapStats) unsafeClear() { 849 assertWorldStopped() 850 851 for i := range m.stats { 852 m.stats[i] = heapStatsDelta{} 853 } 854 } 855 856 // read takes a globally consistent snapshot of m 857 // and puts the aggregated value in out. Even though out is a 858 // heapStatsDelta, the resulting values should be complete and 859 // valid statistic values. 860 // 861 // Not safe to call concurrently. The world must be stopped 862 // or metricsSema must be held. 863 func (m *consistentHeapStats) read(out *heapStatsDelta) { 864 // Getting preempted after this point is not safe because 865 // we read allp. We need to make sure a STW can't happen 866 // so it doesn't change out from under us. 867 mp := acquirem() 868 869 // Get the current generation. We can be confident that this 870 // will not change since read is serialized and is the only 871 // one that modifies currGen. 872 currGen := atomic.Load(&m.gen) 873 prevGen := currGen - 1 874 if currGen == 0 { 875 prevGen = 2 876 } 877 878 // Prevent writers without a P from writing while we update gen. 879 lock(&m.noPLock) 880 881 // Rotate gen, effectively taking a snapshot of the state of 882 // these statistics at the point of the exchange by moving 883 // writers to the next set of deltas. 884 // 885 // This exchange is safe to do because we won't race 886 // with anyone else trying to update this value. 887 atomic.Xchg(&m.gen, (currGen+1)%3) 888 889 // Allow P-less writers to continue. They'll be writing to the 890 // next generation now. 891 unlock(&m.noPLock) 892 893 for _, p := range allp { 894 // Spin until there are no more writers. 895 for atomic.Load(&p.statsSeq)%2 != 0 { 896 } 897 } 898 899 // At this point we've observed that each sequence 900 // number is even, so any future writers will observe 901 // the new gen value. That means it's safe to read from 902 // the other deltas in the stats buffer. 903 904 // Perform our responsibilities and free up 905 // stats[prevGen] for the next time we want to take 906 // a snapshot. 907 m.stats[currGen].merge(&m.stats[prevGen]) 908 m.stats[prevGen] = heapStatsDelta{} 909 910 // Finally, copy out the complete delta. 911 *out = m.stats[currGen] 912 913 releasem(mp) 914 } 915