...

Source file src/runtime/mstats.go

Documentation: runtime

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Memory statistics
		 6  
		 7  package runtime
		 8  
		 9  import (
		10  	"runtime/internal/atomic"
		11  	"unsafe"
		12  )
		13  
		14  // Statistics.
		15  //
		16  // For detailed descriptions see the documentation for MemStats.
		17  // Fields that differ from MemStats are further documented here.
		18  //
		19  // Many of these fields are updated on the fly, while others are only
		20  // updated when updatememstats is called.
		21  type mstats struct {
		22  	// General statistics.
		23  	alloc			 uint64 // bytes allocated and not yet freed
		24  	total_alloc uint64 // bytes allocated (even if freed)
		25  	sys				 uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
		26  	nlookup		 uint64 // number of pointer lookups (unused)
		27  	nmalloc		 uint64 // number of mallocs
		28  	nfree			 uint64 // number of frees
		29  
		30  	// Statistics about malloc heap.
		31  	// Updated atomically, or with the world stopped.
		32  	//
		33  	// Like MemStats, heap_sys and heap_inuse do not count memory
		34  	// in manually-managed spans.
		35  	heap_sys			sysMemStat // virtual address space obtained from system for GC'd heap
		36  	heap_inuse		uint64		 // bytes in mSpanInUse spans
		37  	heap_released uint64		 // bytes released to the os
		38  
		39  	// heap_objects is not used by the runtime directly and instead
		40  	// computed on the fly by updatememstats.
		41  	heap_objects uint64 // total number of allocated objects
		42  
		43  	// Statistics about stacks.
		44  	stacks_inuse uint64		 // bytes in manually-managed stack spans; computed by updatememstats
		45  	stacks_sys	 sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
		46  
		47  	// Statistics about allocation of low-level fixed-size structures.
		48  	// Protected by FixAlloc locks.
		49  	mspan_inuse	uint64 // mspan structures
		50  	mspan_sys		sysMemStat
		51  	mcache_inuse uint64 // mcache structures
		52  	mcache_sys	 sysMemStat
		53  	buckhash_sys sysMemStat // profiling bucket hash table
		54  
		55  	// Statistics about GC overhead.
		56  	gcWorkBufInUse					 uint64		 // computed by updatememstats
		57  	gcProgPtrScalarBitsInUse uint64		 // computed by updatememstats
		58  	gcMiscSys								sysMemStat // updated atomically or during STW
		59  
		60  	// Miscellaneous statistics.
		61  	other_sys sysMemStat // updated atomically or during STW
		62  
		63  	// Statistics about the garbage collector.
		64  
		65  	// Protected by mheap or stopping the world during GC.
		66  	last_gc_unix		uint64 // last gc (in unix time)
		67  	pause_total_ns	uint64
		68  	pause_ns				[256]uint64 // circular buffer of recent gc pause lengths
		69  	pause_end			 [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
		70  	numgc					 uint32
		71  	numforcedgc		 uint32	// number of user-forced GCs
		72  	gc_cpu_fraction float64 // fraction of CPU time used by GC
		73  	enablegc				bool
		74  	debuggc				 bool
		75  
		76  	// Statistics about allocation size classes.
		77  
		78  	by_size [_NumSizeClasses]struct {
		79  		size		uint32
		80  		nmalloc uint64
		81  		nfree	 uint64
		82  	}
		83  
		84  	// Add an uint32 for even number of size classes to align below fields
		85  	// to 64 bits for atomic operations on 32 bit platforms.
		86  	_ [1 - _NumSizeClasses%2]uint32
		87  
		88  	last_gc_nanotime uint64 // last gc (monotonic time)
		89  	last_heap_inuse	uint64 // heap_inuse at mark termination of the previous GC
		90  
		91  	// heapStats is a set of statistics
		92  	heapStats consistentHeapStats
		93  
		94  	// _ uint32 // ensure gcPauseDist is aligned
		95  
		96  	// gcPauseDist represents the distribution of all GC-related
		97  	// application pauses in the runtime.
		98  	//
		99  	// Each individual pause is counted separately, unlike pause_ns.
	 100  	gcPauseDist timeHistogram
	 101  }
	 102  
	 103  var memstats mstats
	 104  
	 105  // A MemStats records statistics about the memory allocator.
	 106  type MemStats struct {
	 107  	// General statistics.
	 108  
	 109  	// Alloc is bytes of allocated heap objects.
	 110  	//
	 111  	// This is the same as HeapAlloc (see below).
	 112  	Alloc uint64
	 113  
	 114  	// TotalAlloc is cumulative bytes allocated for heap objects.
	 115  	//
	 116  	// TotalAlloc increases as heap objects are allocated, but
	 117  	// unlike Alloc and HeapAlloc, it does not decrease when
	 118  	// objects are freed.
	 119  	TotalAlloc uint64
	 120  
	 121  	// Sys is the total bytes of memory obtained from the OS.
	 122  	//
	 123  	// Sys is the sum of the XSys fields below. Sys measures the
	 124  	// virtual address space reserved by the Go runtime for the
	 125  	// heap, stacks, and other internal data structures. It's
	 126  	// likely that not all of the virtual address space is backed
	 127  	// by physical memory at any given moment, though in general
	 128  	// it all was at some point.
	 129  	Sys uint64
	 130  
	 131  	// Lookups is the number of pointer lookups performed by the
	 132  	// runtime.
	 133  	//
	 134  	// This is primarily useful for debugging runtime internals.
	 135  	Lookups uint64
	 136  
	 137  	// Mallocs is the cumulative count of heap objects allocated.
	 138  	// The number of live objects is Mallocs - Frees.
	 139  	Mallocs uint64
	 140  
	 141  	// Frees is the cumulative count of heap objects freed.
	 142  	Frees uint64
	 143  
	 144  	// Heap memory statistics.
	 145  	//
	 146  	// Interpreting the heap statistics requires some knowledge of
	 147  	// how Go organizes memory. Go divides the virtual address
	 148  	// space of the heap into "spans", which are contiguous
	 149  	// regions of memory 8K or larger. A span may be in one of
	 150  	// three states:
	 151  	//
	 152  	// An "idle" span contains no objects or other data. The
	 153  	// physical memory backing an idle span can be released back
	 154  	// to the OS (but the virtual address space never is), or it
	 155  	// can be converted into an "in use" or "stack" span.
	 156  	//
	 157  	// An "in use" span contains at least one heap object and may
	 158  	// have free space available to allocate more heap objects.
	 159  	//
	 160  	// A "stack" span is used for goroutine stacks. Stack spans
	 161  	// are not considered part of the heap. A span can change
	 162  	// between heap and stack memory; it is never used for both
	 163  	// simultaneously.
	 164  
	 165  	// HeapAlloc is bytes of allocated heap objects.
	 166  	//
	 167  	// "Allocated" heap objects include all reachable objects, as
	 168  	// well as unreachable objects that the garbage collector has
	 169  	// not yet freed. Specifically, HeapAlloc increases as heap
	 170  	// objects are allocated and decreases as the heap is swept
	 171  	// and unreachable objects are freed. Sweeping occurs
	 172  	// incrementally between GC cycles, so these two processes
	 173  	// occur simultaneously, and as a result HeapAlloc tends to
	 174  	// change smoothly (in contrast with the sawtooth that is
	 175  	// typical of stop-the-world garbage collectors).
	 176  	HeapAlloc uint64
	 177  
	 178  	// HeapSys is bytes of heap memory obtained from the OS.
	 179  	//
	 180  	// HeapSys measures the amount of virtual address space
	 181  	// reserved for the heap. This includes virtual address space
	 182  	// that has been reserved but not yet used, which consumes no
	 183  	// physical memory, but tends to be small, as well as virtual
	 184  	// address space for which the physical memory has been
	 185  	// returned to the OS after it became unused (see HeapReleased
	 186  	// for a measure of the latter).
	 187  	//
	 188  	// HeapSys estimates the largest size the heap has had.
	 189  	HeapSys uint64
	 190  
	 191  	// HeapIdle is bytes in idle (unused) spans.
	 192  	//
	 193  	// Idle spans have no objects in them. These spans could be
	 194  	// (and may already have been) returned to the OS, or they can
	 195  	// be reused for heap allocations, or they can be reused as
	 196  	// stack memory.
	 197  	//
	 198  	// HeapIdle minus HeapReleased estimates the amount of memory
	 199  	// that could be returned to the OS, but is being retained by
	 200  	// the runtime so it can grow the heap without requesting more
	 201  	// memory from the OS. If this difference is significantly
	 202  	// larger than the heap size, it indicates there was a recent
	 203  	// transient spike in live heap size.
	 204  	HeapIdle uint64
	 205  
	 206  	// HeapInuse is bytes in in-use spans.
	 207  	//
	 208  	// In-use spans have at least one object in them. These spans
	 209  	// can only be used for other objects of roughly the same
	 210  	// size.
	 211  	//
	 212  	// HeapInuse minus HeapAlloc estimates the amount of memory
	 213  	// that has been dedicated to particular size classes, but is
	 214  	// not currently being used. This is an upper bound on
	 215  	// fragmentation, but in general this memory can be reused
	 216  	// efficiently.
	 217  	HeapInuse uint64
	 218  
	 219  	// HeapReleased is bytes of physical memory returned to the OS.
	 220  	//
	 221  	// This counts heap memory from idle spans that was returned
	 222  	// to the OS and has not yet been reacquired for the heap.
	 223  	HeapReleased uint64
	 224  
	 225  	// HeapObjects is the number of allocated heap objects.
	 226  	//
	 227  	// Like HeapAlloc, this increases as objects are allocated and
	 228  	// decreases as the heap is swept and unreachable objects are
	 229  	// freed.
	 230  	HeapObjects uint64
	 231  
	 232  	// Stack memory statistics.
	 233  	//
	 234  	// Stacks are not considered part of the heap, but the runtime
	 235  	// can reuse a span of heap memory for stack memory, and
	 236  	// vice-versa.
	 237  
	 238  	// StackInuse is bytes in stack spans.
	 239  	//
	 240  	// In-use stack spans have at least one stack in them. These
	 241  	// spans can only be used for other stacks of the same size.
	 242  	//
	 243  	// There is no StackIdle because unused stack spans are
	 244  	// returned to the heap (and hence counted toward HeapIdle).
	 245  	StackInuse uint64
	 246  
	 247  	// StackSys is bytes of stack memory obtained from the OS.
	 248  	//
	 249  	// StackSys is StackInuse, plus any memory obtained directly
	 250  	// from the OS for OS thread stacks (which should be minimal).
	 251  	StackSys uint64
	 252  
	 253  	// Off-heap memory statistics.
	 254  	//
	 255  	// The following statistics measure runtime-internal
	 256  	// structures that are not allocated from heap memory (usually
	 257  	// because they are part of implementing the heap). Unlike
	 258  	// heap or stack memory, any memory allocated to these
	 259  	// structures is dedicated to these structures.
	 260  	//
	 261  	// These are primarily useful for debugging runtime memory
	 262  	// overheads.
	 263  
	 264  	// MSpanInuse is bytes of allocated mspan structures.
	 265  	MSpanInuse uint64
	 266  
	 267  	// MSpanSys is bytes of memory obtained from the OS for mspan
	 268  	// structures.
	 269  	MSpanSys uint64
	 270  
	 271  	// MCacheInuse is bytes of allocated mcache structures.
	 272  	MCacheInuse uint64
	 273  
	 274  	// MCacheSys is bytes of memory obtained from the OS for
	 275  	// mcache structures.
	 276  	MCacheSys uint64
	 277  
	 278  	// BuckHashSys is bytes of memory in profiling bucket hash tables.
	 279  	BuckHashSys uint64
	 280  
	 281  	// GCSys is bytes of memory in garbage collection metadata.
	 282  	GCSys uint64
	 283  
	 284  	// OtherSys is bytes of memory in miscellaneous off-heap
	 285  	// runtime allocations.
	 286  	OtherSys uint64
	 287  
	 288  	// Garbage collector statistics.
	 289  
	 290  	// NextGC is the target heap size of the next GC cycle.
	 291  	//
	 292  	// The garbage collector's goal is to keep HeapAlloc ≤ NextGC.
	 293  	// At the end of each GC cycle, the target for the next cycle
	 294  	// is computed based on the amount of reachable data and the
	 295  	// value of GOGC.
	 296  	NextGC uint64
	 297  
	 298  	// LastGC is the time the last garbage collection finished, as
	 299  	// nanoseconds since 1970 (the UNIX epoch).
	 300  	LastGC uint64
	 301  
	 302  	// PauseTotalNs is the cumulative nanoseconds in GC
	 303  	// stop-the-world pauses since the program started.
	 304  	//
	 305  	// During a stop-the-world pause, all goroutines are paused
	 306  	// and only the garbage collector can run.
	 307  	PauseTotalNs uint64
	 308  
	 309  	// PauseNs is a circular buffer of recent GC stop-the-world
	 310  	// pause times in nanoseconds.
	 311  	//
	 312  	// The most recent pause is at PauseNs[(NumGC+255)%256]. In
	 313  	// general, PauseNs[N%256] records the time paused in the most
	 314  	// recent N%256th GC cycle. There may be multiple pauses per
	 315  	// GC cycle; this is the sum of all pauses during a cycle.
	 316  	PauseNs [256]uint64
	 317  
	 318  	// PauseEnd is a circular buffer of recent GC pause end times,
	 319  	// as nanoseconds since 1970 (the UNIX epoch).
	 320  	//
	 321  	// This buffer is filled the same way as PauseNs. There may be
	 322  	// multiple pauses per GC cycle; this records the end of the
	 323  	// last pause in a cycle.
	 324  	PauseEnd [256]uint64
	 325  
	 326  	// NumGC is the number of completed GC cycles.
	 327  	NumGC uint32
	 328  
	 329  	// NumForcedGC is the number of GC cycles that were forced by
	 330  	// the application calling the GC function.
	 331  	NumForcedGC uint32
	 332  
	 333  	// GCCPUFraction is the fraction of this program's available
	 334  	// CPU time used by the GC since the program started.
	 335  	//
	 336  	// GCCPUFraction is expressed as a number between 0 and 1,
	 337  	// where 0 means GC has consumed none of this program's CPU. A
	 338  	// program's available CPU time is defined as the integral of
	 339  	// GOMAXPROCS since the program started. That is, if
	 340  	// GOMAXPROCS is 2 and a program has been running for 10
	 341  	// seconds, its "available CPU" is 20 seconds. GCCPUFraction
	 342  	// does not include CPU time used for write barrier activity.
	 343  	//
	 344  	// This is the same as the fraction of CPU reported by
	 345  	// GODEBUG=gctrace=1.
	 346  	GCCPUFraction float64
	 347  
	 348  	// EnableGC indicates that GC is enabled. It is always true,
	 349  	// even if GOGC=off.
	 350  	EnableGC bool
	 351  
	 352  	// DebugGC is currently unused.
	 353  	DebugGC bool
	 354  
	 355  	// BySize reports per-size class allocation statistics.
	 356  	//
	 357  	// BySize[N] gives statistics for allocations of size S where
	 358  	// BySize[N-1].Size < S ≤ BySize[N].Size.
	 359  	//
	 360  	// This does not report allocations larger than BySize[60].Size.
	 361  	BySize [61]struct {
	 362  		// Size is the maximum byte size of an object in this
	 363  		// size class.
	 364  		Size uint32
	 365  
	 366  		// Mallocs is the cumulative count of heap objects
	 367  		// allocated in this size class. The cumulative bytes
	 368  		// of allocation is Size*Mallocs. The number of live
	 369  		// objects in this size class is Mallocs - Frees.
	 370  		Mallocs uint64
	 371  
	 372  		// Frees is the cumulative count of heap objects freed
	 373  		// in this size class.
	 374  		Frees uint64
	 375  	}
	 376  }
	 377  
	 378  func init() {
	 379  	if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 {
	 380  		println(offset)
	 381  		throw("memstats.heapStats not aligned to 8 bytes")
	 382  	}
	 383  	if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 {
	 384  		println(offset)
	 385  		throw("memstats.gcPauseDist not aligned to 8 bytes")
	 386  	}
	 387  	// Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g.
	 388  	// [3]heapStatsDelta) to be 8-byte aligned.
	 389  	if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 {
	 390  		println(size)
	 391  		throw("heapStatsDelta not a multiple of 8 bytes in size")
	 392  	}
	 393  }
	 394  
	 395  // ReadMemStats populates m with memory allocator statistics.
	 396  //
	 397  // The returned memory allocator statistics are up to date as of the
	 398  // call to ReadMemStats. This is in contrast with a heap profile,
	 399  // which is a snapshot as of the most recently completed garbage
	 400  // collection cycle.
	 401  func ReadMemStats(m *MemStats) {
	 402  	stopTheWorld("read mem stats")
	 403  
	 404  	systemstack(func() {
	 405  		readmemstats_m(m)
	 406  	})
	 407  
	 408  	startTheWorld()
	 409  }
	 410  
	 411  func readmemstats_m(stats *MemStats) {
	 412  	updatememstats()
	 413  
	 414  	stats.Alloc = memstats.alloc
	 415  	stats.TotalAlloc = memstats.total_alloc
	 416  	stats.Sys = memstats.sys
	 417  	stats.Mallocs = memstats.nmalloc
	 418  	stats.Frees = memstats.nfree
	 419  	stats.HeapAlloc = memstats.alloc
	 420  	stats.HeapSys = memstats.heap_sys.load()
	 421  	// By definition, HeapIdle is memory that was mapped
	 422  	// for the heap but is not currently used to hold heap
	 423  	// objects. It also specifically is memory that can be
	 424  	// used for other purposes, like stacks, but this memory
	 425  	// is subtracted out of HeapSys before it makes that
	 426  	// transition. Put another way:
	 427  	//
	 428  	// heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes
	 429  	// heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose
	 430  	//
	 431  	// or
	 432  	//
	 433  	// heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse
	 434  	// heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse
	 435  	//
	 436  	// => heap_idle = heap_sys - heap_inuse
	 437  	stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse
	 438  	stats.HeapInuse = memstats.heap_inuse
	 439  	stats.HeapReleased = memstats.heap_released
	 440  	stats.HeapObjects = memstats.heap_objects
	 441  	stats.StackInuse = memstats.stacks_inuse
	 442  	// memstats.stacks_sys is only memory mapped directly for OS stacks.
	 443  	// Add in heap-allocated stack memory for user consumption.
	 444  	stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load()
	 445  	stats.MSpanInuse = memstats.mspan_inuse
	 446  	stats.MSpanSys = memstats.mspan_sys.load()
	 447  	stats.MCacheInuse = memstats.mcache_inuse
	 448  	stats.MCacheSys = memstats.mcache_sys.load()
	 449  	stats.BuckHashSys = memstats.buckhash_sys.load()
	 450  	// MemStats defines GCSys as an aggregate of all memory related
	 451  	// to the memory management system, but we track this memory
	 452  	// at a more granular level in the runtime.
	 453  	stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse
	 454  	stats.OtherSys = memstats.other_sys.load()
	 455  	stats.NextGC = gcController.heapGoal
	 456  	stats.LastGC = memstats.last_gc_unix
	 457  	stats.PauseTotalNs = memstats.pause_total_ns
	 458  	stats.PauseNs = memstats.pause_ns
	 459  	stats.PauseEnd = memstats.pause_end
	 460  	stats.NumGC = memstats.numgc
	 461  	stats.NumForcedGC = memstats.numforcedgc
	 462  	stats.GCCPUFraction = memstats.gc_cpu_fraction
	 463  	stats.EnableGC = true
	 464  
	 465  	// Handle BySize. Copy N values, where N is
	 466  	// the minimum of the lengths of the two arrays.
	 467  	// Unfortunately copy() won't work here because
	 468  	// the arrays have different structs.
	 469  	//
	 470  	// TODO(mknyszek): Consider renaming the fields
	 471  	// of by_size's elements to align so we can use
	 472  	// the copy built-in.
	 473  	bySizeLen := len(stats.BySize)
	 474  	if l := len(memstats.by_size); l < bySizeLen {
	 475  		bySizeLen = l
	 476  	}
	 477  	for i := 0; i < bySizeLen; i++ {
	 478  		stats.BySize[i].Size = memstats.by_size[i].size
	 479  		stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc
	 480  		stats.BySize[i].Frees = memstats.by_size[i].nfree
	 481  	}
	 482  }
	 483  
	 484  //go:linkname readGCStats runtime/debug.readGCStats
	 485  func readGCStats(pauses *[]uint64) {
	 486  	systemstack(func() {
	 487  		readGCStats_m(pauses)
	 488  	})
	 489  }
	 490  
	 491  // readGCStats_m must be called on the system stack because it acquires the heap
	 492  // lock. See mheap for details.
	 493  //go:systemstack
	 494  func readGCStats_m(pauses *[]uint64) {
	 495  	p := *pauses
	 496  	// Calling code in runtime/debug should make the slice large enough.
	 497  	if cap(p) < len(memstats.pause_ns)+3 {
	 498  		throw("short slice passed to readGCStats")
	 499  	}
	 500  
	 501  	// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
	 502  	lock(&mheap_.lock)
	 503  
	 504  	n := memstats.numgc
	 505  	if n > uint32(len(memstats.pause_ns)) {
	 506  		n = uint32(len(memstats.pause_ns))
	 507  	}
	 508  
	 509  	// The pause buffer is circular. The most recent pause is at
	 510  	// pause_ns[(numgc-1)%len(pause_ns)], and then backward
	 511  	// from there to go back farther in time. We deliver the times
	 512  	// most recent first (in p[0]).
	 513  	p = p[:cap(p)]
	 514  	for i := uint32(0); i < n; i++ {
	 515  		j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
	 516  		p[i] = memstats.pause_ns[j]
	 517  		p[n+i] = memstats.pause_end[j]
	 518  	}
	 519  
	 520  	p[n+n] = memstats.last_gc_unix
	 521  	p[n+n+1] = uint64(memstats.numgc)
	 522  	p[n+n+2] = memstats.pause_total_ns
	 523  	unlock(&mheap_.lock)
	 524  	*pauses = p[:n+n+3]
	 525  }
	 526  
	 527  // Updates the memstats structure.
	 528  //
	 529  // The world must be stopped.
	 530  //
	 531  //go:nowritebarrier
	 532  func updatememstats() {
	 533  	assertWorldStopped()
	 534  
	 535  	// Flush mcaches to mcentral before doing anything else.
	 536  	//
	 537  	// Flushing to the mcentral may in general cause stats to
	 538  	// change as mcentral data structures are manipulated.
	 539  	systemstack(flushallmcaches)
	 540  
	 541  	memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
	 542  	memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
	 543  	memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() +
	 544  		memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() +
	 545  		memstats.other_sys.load()
	 546  
	 547  	// Calculate memory allocator stats.
	 548  	// During program execution we only count number of frees and amount of freed memory.
	 549  	// Current number of alive objects in the heap and amount of alive heap memory
	 550  	// are calculated by scanning all spans.
	 551  	// Total number of mallocs is calculated as number of frees plus number of alive objects.
	 552  	// Similarly, total amount of allocated memory is calculated as amount of freed memory
	 553  	// plus amount of alive heap memory.
	 554  	memstats.alloc = 0
	 555  	memstats.total_alloc = 0
	 556  	memstats.nmalloc = 0
	 557  	memstats.nfree = 0
	 558  	for i := 0; i < len(memstats.by_size); i++ {
	 559  		memstats.by_size[i].nmalloc = 0
	 560  		memstats.by_size[i].nfree = 0
	 561  	}
	 562  	// Collect consistent stats, which are the source-of-truth in the some cases.
	 563  	var consStats heapStatsDelta
	 564  	memstats.heapStats.unsafeRead(&consStats)
	 565  
	 566  	// Collect large allocation stats.
	 567  	totalAlloc := consStats.largeAlloc
	 568  	memstats.nmalloc += consStats.largeAllocCount
	 569  	totalFree := consStats.largeFree
	 570  	memstats.nfree += consStats.largeFreeCount
	 571  
	 572  	// Collect per-sizeclass stats.
	 573  	for i := 0; i < _NumSizeClasses; i++ {
	 574  		// Malloc stats.
	 575  		a := consStats.smallAllocCount[i]
	 576  		totalAlloc += a * uint64(class_to_size[i])
	 577  		memstats.nmalloc += a
	 578  		memstats.by_size[i].nmalloc = a
	 579  
	 580  		// Free stats.
	 581  		f := consStats.smallFreeCount[i]
	 582  		totalFree += f * uint64(class_to_size[i])
	 583  		memstats.nfree += f
	 584  		memstats.by_size[i].nfree = f
	 585  	}
	 586  
	 587  	// Account for tiny allocations.
	 588  	memstats.nfree += consStats.tinyAllocCount
	 589  	memstats.nmalloc += consStats.tinyAllocCount
	 590  
	 591  	// Calculate derived stats.
	 592  	memstats.total_alloc = totalAlloc
	 593  	memstats.alloc = totalAlloc - totalFree
	 594  	memstats.heap_objects = memstats.nmalloc - memstats.nfree
	 595  
	 596  	memstats.stacks_inuse = uint64(consStats.inStacks)
	 597  	memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs)
	 598  	memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits)
	 599  
	 600  	// We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory.
	 601  	memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse
	 602  
	 603  	// The world is stopped, so the consistent stats (after aggregation)
	 604  	// should be identical to some combination of memstats. In particular:
	 605  	//
	 606  	// * heap_inuse == inHeap
	 607  	// * heap_released == released
	 608  	// * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits
	 609  	//
	 610  	// Check if that's actually true.
	 611  	//
	 612  	// TODO(mknyszek): Maybe don't throw here. It would be bad if a
	 613  	// bug in otherwise benign accounting caused the whole application
	 614  	// to crash.
	 615  	if memstats.heap_inuse != uint64(consStats.inHeap) {
	 616  		print("runtime: heap_inuse=", memstats.heap_inuse, "\n")
	 617  		print("runtime: consistent value=", consStats.inHeap, "\n")
	 618  		throw("heap_inuse and consistent stats are not equal")
	 619  	}
	 620  	if memstats.heap_released != uint64(consStats.released) {
	 621  		print("runtime: heap_released=", memstats.heap_released, "\n")
	 622  		print("runtime: consistent value=", consStats.released, "\n")
	 623  		throw("heap_released and consistent stats are not equal")
	 624  	}
	 625  	globalRetained := memstats.heap_sys.load() - memstats.heap_released
	 626  	consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
	 627  	if globalRetained != consRetained {
	 628  		print("runtime: global value=", globalRetained, "\n")
	 629  		print("runtime: consistent value=", consRetained, "\n")
	 630  		throw("measures of the retained heap are not equal")
	 631  	}
	 632  }
	 633  
	 634  // flushmcache flushes the mcache of allp[i].
	 635  //
	 636  // The world must be stopped.
	 637  //
	 638  //go:nowritebarrier
	 639  func flushmcache(i int) {
	 640  	assertWorldStopped()
	 641  
	 642  	p := allp[i]
	 643  	c := p.mcache
	 644  	if c == nil {
	 645  		return
	 646  	}
	 647  	c.releaseAll()
	 648  	stackcache_clear(c)
	 649  }
	 650  
	 651  // flushallmcaches flushes the mcaches of all Ps.
	 652  //
	 653  // The world must be stopped.
	 654  //
	 655  //go:nowritebarrier
	 656  func flushallmcaches() {
	 657  	assertWorldStopped()
	 658  
	 659  	for i := 0; i < int(gomaxprocs); i++ {
	 660  		flushmcache(i)
	 661  	}
	 662  }
	 663  
	 664  // sysMemStat represents a global system statistic that is managed atomically.
	 665  //
	 666  // This type must structurally be a uint64 so that mstats aligns with MemStats.
	 667  type sysMemStat uint64
	 668  
	 669  // load atomically reads the value of the stat.
	 670  //
	 671  // Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
	 672  //go:nosplit
	 673  func (s *sysMemStat) load() uint64 {
	 674  	return atomic.Load64((*uint64)(s))
	 675  }
	 676  
	 677  // add atomically adds the sysMemStat by n.
	 678  //
	 679  // Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
	 680  //go:nosplit
	 681  func (s *sysMemStat) add(n int64) {
	 682  	if s == nil {
	 683  		return
	 684  	}
	 685  	val := atomic.Xadd64((*uint64)(s), n)
	 686  	if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) {
	 687  		print("runtime: val=", val, " n=", n, "\n")
	 688  		throw("sysMemStat overflow")
	 689  	}
	 690  }
	 691  
	 692  // heapStatsDelta contains deltas of various runtime memory statistics
	 693  // that need to be updated together in order for them to be kept
	 694  // consistent with one another.
	 695  type heapStatsDelta struct {
	 696  	// Memory stats.
	 697  	committed			 int64 // byte delta of memory committed
	 698  	released				int64 // byte delta of released memory generated
	 699  	inHeap					int64 // byte delta of memory placed in the heap
	 700  	inStacks				int64 // byte delta of memory reserved for stacks
	 701  	inWorkBufs			int64 // byte delta of memory reserved for work bufs
	 702  	inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits
	 703  
	 704  	// Allocator stats.
	 705  	//
	 706  	// These are all uint64 because they're cumulative, and could quickly wrap
	 707  	// around otherwise.
	 708  	tinyAllocCount	uint64									// number of tiny allocations
	 709  	largeAlloc			uint64									// bytes allocated for large objects
	 710  	largeAllocCount uint64									// number of large object allocations
	 711  	smallAllocCount [_NumSizeClasses]uint64 // number of allocs for small objects
	 712  	largeFree			 uint64									// bytes freed for large objects (>maxSmallSize)
	 713  	largeFreeCount	uint64									// number of frees for large objects (>maxSmallSize)
	 714  	smallFreeCount	[_NumSizeClasses]uint64 // number of frees for small objects (<=maxSmallSize)
	 715  
	 716  	// NOTE: This struct must be a multiple of 8 bytes in size because it
	 717  	// is stored in an array. If it's not, atomic accesses to the above
	 718  	// fields may be unaligned and fail on 32-bit platforms.
	 719  }
	 720  
	 721  // merge adds in the deltas from b into a.
	 722  func (a *heapStatsDelta) merge(b *heapStatsDelta) {
	 723  	a.committed += b.committed
	 724  	a.released += b.released
	 725  	a.inHeap += b.inHeap
	 726  	a.inStacks += b.inStacks
	 727  	a.inWorkBufs += b.inWorkBufs
	 728  	a.inPtrScalarBits += b.inPtrScalarBits
	 729  
	 730  	a.tinyAllocCount += b.tinyAllocCount
	 731  	a.largeAlloc += b.largeAlloc
	 732  	a.largeAllocCount += b.largeAllocCount
	 733  	for i := range b.smallAllocCount {
	 734  		a.smallAllocCount[i] += b.smallAllocCount[i]
	 735  	}
	 736  	a.largeFree += b.largeFree
	 737  	a.largeFreeCount += b.largeFreeCount
	 738  	for i := range b.smallFreeCount {
	 739  		a.smallFreeCount[i] += b.smallFreeCount[i]
	 740  	}
	 741  }
	 742  
	 743  // consistentHeapStats represents a set of various memory statistics
	 744  // whose updates must be viewed completely to get a consistent
	 745  // state of the world.
	 746  //
	 747  // To write updates to memory stats use the acquire and release
	 748  // methods. To obtain a consistent global snapshot of these statistics,
	 749  // use read.
	 750  type consistentHeapStats struct {
	 751  	// stats is a ring buffer of heapStatsDelta values.
	 752  	// Writers always atomically update the delta at index gen.
	 753  	//
	 754  	// Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...)
	 755  	// and synchronizing with writers by observing each P's
	 756  	// statsSeq field. If the reader observes a P not writing,
	 757  	// it can be sure that it will pick up the new gen value the
	 758  	// next time it writes.
	 759  	//
	 760  	// The reader then takes responsibility by clearing space
	 761  	// in the ring buffer for the next reader to rotate gen to
	 762  	// that space (i.e. it merges in values from index (gen-2) mod 3
	 763  	// to index (gen-1) mod 3, then clears the former).
	 764  	//
	 765  	// Note that this means only one reader can be reading at a time.
	 766  	// There is no way for readers to synchronize.
	 767  	//
	 768  	// This process is why we need a ring buffer of size 3 instead
	 769  	// of 2: one is for the writers, one contains the most recent
	 770  	// data, and the last one is clear so writers can begin writing
	 771  	// to it the moment gen is updated.
	 772  	stats [3]heapStatsDelta
	 773  
	 774  	// gen represents the current index into which writers
	 775  	// are writing, and can take on the value of 0, 1, or 2.
	 776  	// This value is updated atomically.
	 777  	gen uint32
	 778  
	 779  	// noPLock is intended to provide mutual exclusion for updating
	 780  	// stats when no P is available. It does not block other writers
	 781  	// with a P, only other writers without a P and the reader. Because
	 782  	// stats are usually updated when a P is available, contention on
	 783  	// this lock should be minimal.
	 784  	noPLock mutex
	 785  }
	 786  
	 787  // acquire returns a heapStatsDelta to be updated. In effect,
	 788  // it acquires the shard for writing. release must be called
	 789  // as soon as the relevant deltas are updated.
	 790  //
	 791  // The returned heapStatsDelta must be updated atomically.
	 792  //
	 793  // The caller's P must not change between acquire and
	 794  // release. This also means that the caller should not
	 795  // acquire a P or release its P in between.
	 796  func (m *consistentHeapStats) acquire() *heapStatsDelta {
	 797  	if pp := getg().m.p.ptr(); pp != nil {
	 798  		seq := atomic.Xadd(&pp.statsSeq, 1)
	 799  		if seq%2 == 0 {
	 800  			// Should have been incremented to odd.
	 801  			print("runtime: seq=", seq, "\n")
	 802  			throw("bad sequence number")
	 803  		}
	 804  	} else {
	 805  		lock(&m.noPLock)
	 806  	}
	 807  	gen := atomic.Load(&m.gen) % 3
	 808  	return &m.stats[gen]
	 809  }
	 810  
	 811  // release indicates that the writer is done modifying
	 812  // the delta. The value returned by the corresponding
	 813  // acquire must no longer be accessed or modified after
	 814  // release is called.
	 815  //
	 816  // The caller's P must not change between acquire and
	 817  // release. This also means that the caller should not
	 818  // acquire a P or release its P in between.
	 819  func (m *consistentHeapStats) release() {
	 820  	if pp := getg().m.p.ptr(); pp != nil {
	 821  		seq := atomic.Xadd(&pp.statsSeq, 1)
	 822  		if seq%2 != 0 {
	 823  			// Should have been incremented to even.
	 824  			print("runtime: seq=", seq, "\n")
	 825  			throw("bad sequence number")
	 826  		}
	 827  	} else {
	 828  		unlock(&m.noPLock)
	 829  	}
	 830  }
	 831  
	 832  // unsafeRead aggregates the delta for this shard into out.
	 833  //
	 834  // Unsafe because it does so without any synchronization. The
	 835  // world must be stopped.
	 836  func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) {
	 837  	assertWorldStopped()
	 838  
	 839  	for i := range m.stats {
	 840  		out.merge(&m.stats[i])
	 841  	}
	 842  }
	 843  
	 844  // unsafeClear clears the shard.
	 845  //
	 846  // Unsafe because the world must be stopped and values should
	 847  // be donated elsewhere before clearing.
	 848  func (m *consistentHeapStats) unsafeClear() {
	 849  	assertWorldStopped()
	 850  
	 851  	for i := range m.stats {
	 852  		m.stats[i] = heapStatsDelta{}
	 853  	}
	 854  }
	 855  
	 856  // read takes a globally consistent snapshot of m
	 857  // and puts the aggregated value in out. Even though out is a
	 858  // heapStatsDelta, the resulting values should be complete and
	 859  // valid statistic values.
	 860  //
	 861  // Not safe to call concurrently. The world must be stopped
	 862  // or metricsSema must be held.
	 863  func (m *consistentHeapStats) read(out *heapStatsDelta) {
	 864  	// Getting preempted after this point is not safe because
	 865  	// we read allp. We need to make sure a STW can't happen
	 866  	// so it doesn't change out from under us.
	 867  	mp := acquirem()
	 868  
	 869  	// Get the current generation. We can be confident that this
	 870  	// will not change since read is serialized and is the only
	 871  	// one that modifies currGen.
	 872  	currGen := atomic.Load(&m.gen)
	 873  	prevGen := currGen - 1
	 874  	if currGen == 0 {
	 875  		prevGen = 2
	 876  	}
	 877  
	 878  	// Prevent writers without a P from writing while we update gen.
	 879  	lock(&m.noPLock)
	 880  
	 881  	// Rotate gen, effectively taking a snapshot of the state of
	 882  	// these statistics at the point of the exchange by moving
	 883  	// writers to the next set of deltas.
	 884  	//
	 885  	// This exchange is safe to do because we won't race
	 886  	// with anyone else trying to update this value.
	 887  	atomic.Xchg(&m.gen, (currGen+1)%3)
	 888  
	 889  	// Allow P-less writers to continue. They'll be writing to the
	 890  	// next generation now.
	 891  	unlock(&m.noPLock)
	 892  
	 893  	for _, p := range allp {
	 894  		// Spin until there are no more writers.
	 895  		for atomic.Load(&p.statsSeq)%2 != 0 {
	 896  		}
	 897  	}
	 898  
	 899  	// At this point we've observed that each sequence
	 900  	// number is even, so any future writers will observe
	 901  	// the new gen value. That means it's safe to read from
	 902  	// the other deltas in the stats buffer.
	 903  
	 904  	// Perform our responsibilities and free up
	 905  	// stats[prevGen] for the next time we want to take
	 906  	// a snapshot.
	 907  	m.stats[currGen].merge(&m.stats[prevGen])
	 908  	m.stats[prevGen] = heapStatsDelta{}
	 909  
	 910  	// Finally, copy out the complete delta.
	 911  	*out = m.stats[currGen]
	 912  
	 913  	releasem(mp)
	 914  }
	 915  

View as plain text