...

Source file src/runtime/mgcmark.go

Documentation: runtime

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Garbage collector: marking and scanning
		 6  
		 7  package runtime
		 8  
		 9  import (
		10  	"runtime/internal/atomic"
		11  	"runtime/internal/sys"
		12  	"unsafe"
		13  )
		14  
		15  const (
		16  	fixedRootFinalizers = iota
		17  	fixedRootFreeGStacks
		18  	fixedRootCount
		19  
		20  	// rootBlockBytes is the number of bytes to scan per data or
		21  	// BSS root.
		22  	rootBlockBytes = 256 << 10
		23  
		24  	// maxObletBytes is the maximum bytes of an object to scan at
		25  	// once. Larger objects will be split up into "oblets" of at
		26  	// most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
		27  	// scan preemption at ~100 µs.
		28  	//
		29  	// This must be > _MaxSmallSize so that the object base is the
		30  	// span base.
		31  	maxObletBytes = 128 << 10
		32  
		33  	// drainCheckThreshold specifies how many units of work to do
		34  	// between self-preemption checks in gcDrain. Assuming a scan
		35  	// rate of 1 MB/ms, this is ~100 µs. Lower values have higher
		36  	// overhead in the scan loop (the scheduler check may perform
		37  	// a syscall, so its overhead is nontrivial). Higher values
		38  	// make the system less responsive to incoming work.
		39  	drainCheckThreshold = 100000
		40  
		41  	// pagesPerSpanRoot indicates how many pages to scan from a span root
		42  	// at a time. Used by special root marking.
		43  	//
		44  	// Higher values improve throughput by increasing locality, but
		45  	// increase the minimum latency of a marking operation.
		46  	//
		47  	// Must be a multiple of the pageInUse bitmap element size and
		48  	// must also evenly divide pagesPerArena.
		49  	pagesPerSpanRoot = 512
		50  )
		51  
		52  // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
		53  // some miscellany) and initializes scanning-related state.
		54  //
		55  // The world must be stopped.
		56  func gcMarkRootPrepare() {
		57  	assertWorldStopped()
		58  
		59  	// Compute how many data and BSS root blocks there are.
		60  	nBlocks := func(bytes uintptr) int {
		61  		return int(divRoundUp(bytes, rootBlockBytes))
		62  	}
		63  
		64  	work.nDataRoots = 0
		65  	work.nBSSRoots = 0
		66  
		67  	// Scan globals.
		68  	for _, datap := range activeModules() {
		69  		nDataRoots := nBlocks(datap.edata - datap.data)
		70  		if nDataRoots > work.nDataRoots {
		71  			work.nDataRoots = nDataRoots
		72  		}
		73  	}
		74  
		75  	for _, datap := range activeModules() {
		76  		nBSSRoots := nBlocks(datap.ebss - datap.bss)
		77  		if nBSSRoots > work.nBSSRoots {
		78  			work.nBSSRoots = nBSSRoots
		79  		}
		80  	}
		81  
		82  	// Scan span roots for finalizer specials.
		83  	//
		84  	// We depend on addfinalizer to mark objects that get
		85  	// finalizers after root marking.
		86  	//
		87  	// We're going to scan the whole heap (that was available at the time the
		88  	// mark phase started, i.e. markArenas) for in-use spans which have specials.
		89  	//
		90  	// Break up the work into arenas, and further into chunks.
		91  	//
		92  	// Snapshot allArenas as markArenas. This snapshot is safe because allArenas
		93  	// is append-only.
		94  	mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)]
		95  	work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot)
		96  
		97  	// Scan stacks.
		98  	//
		99  	// Gs may be created after this point, but it's okay that we
	 100  	// ignore them because they begin life without any roots, so
	 101  	// there's nothing to scan, and any roots they create during
	 102  	// the concurrent phase will be caught by the write barrier.
	 103  	work.nStackRoots = int(atomic.Loaduintptr(&allglen))
	 104  
	 105  	work.markrootNext = 0
	 106  	work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
	 107  
	 108  	// Calculate base indexes of each root type
	 109  	work.baseData = uint32(fixedRootCount)
	 110  	work.baseBSS = work.baseData + uint32(work.nDataRoots)
	 111  	work.baseSpans = work.baseBSS + uint32(work.nBSSRoots)
	 112  	work.baseStacks = work.baseSpans + uint32(work.nSpanRoots)
	 113  	work.baseEnd = work.baseStacks + uint32(work.nStackRoots)
	 114  }
	 115  
	 116  // gcMarkRootCheck checks that all roots have been scanned. It is
	 117  // purely for debugging.
	 118  func gcMarkRootCheck() {
	 119  	if work.markrootNext < work.markrootJobs {
	 120  		print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n")
	 121  		throw("left over markroot jobs")
	 122  	}
	 123  
	 124  	// Check that stacks have been scanned.
	 125  	//
	 126  	// We only check the first nStackRoots Gs that we should have scanned.
	 127  	// Since we don't care about newer Gs (see comment in
	 128  	// gcMarkRootPrepare), no locking is required.
	 129  	i := 0
	 130  	forEachGRace(func(gp *g) {
	 131  		if i >= work.nStackRoots {
	 132  			return
	 133  		}
	 134  
	 135  		if !gp.gcscandone {
	 136  			println("gp", gp, "goid", gp.goid,
	 137  				"status", readgstatus(gp),
	 138  				"gcscandone", gp.gcscandone)
	 139  			throw("scan missed a g")
	 140  		}
	 141  
	 142  		i++
	 143  	})
	 144  }
	 145  
	 146  // ptrmask for an allocation containing a single pointer.
	 147  var oneptrmask = [...]uint8{1}
	 148  
	 149  // markroot scans the i'th root.
	 150  //
	 151  // Preemption must be disabled (because this uses a gcWork).
	 152  //
	 153  // nowritebarrier is only advisory here.
	 154  //
	 155  //go:nowritebarrier
	 156  func markroot(gcw *gcWork, i uint32) {
	 157  	// Note: if you add a case here, please also update heapdump.go:dumproots.
	 158  	switch {
	 159  	case work.baseData <= i && i < work.baseBSS:
	 160  		for _, datap := range activeModules() {
	 161  			markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-work.baseData))
	 162  		}
	 163  
	 164  	case work.baseBSS <= i && i < work.baseSpans:
	 165  		for _, datap := range activeModules() {
	 166  			markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-work.baseBSS))
	 167  		}
	 168  
	 169  	case i == fixedRootFinalizers:
	 170  		for fb := allfin; fb != nil; fb = fb.alllink {
	 171  			cnt := uintptr(atomic.Load(&fb.cnt))
	 172  			scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw, nil)
	 173  		}
	 174  
	 175  	case i == fixedRootFreeGStacks:
	 176  		// Switch to the system stack so we can call
	 177  		// stackfree.
	 178  		systemstack(markrootFreeGStacks)
	 179  
	 180  	case work.baseSpans <= i && i < work.baseStacks:
	 181  		// mark mspan.specials
	 182  		markrootSpans(gcw, int(i-work.baseSpans))
	 183  
	 184  	default:
	 185  		// the rest is scanning goroutine stacks
	 186  		var gp *g
	 187  		if work.baseStacks <= i && i < work.baseEnd {
	 188  			// N.B. Atomic read of allglen in gcMarkRootPrepare
	 189  			// acts as a barrier to ensure that allgs must be large
	 190  			// enough to contain all relevant Gs.
	 191  			gp = allgs[i-work.baseStacks]
	 192  		} else {
	 193  			throw("markroot: bad index")
	 194  		}
	 195  
	 196  		// remember when we've first observed the G blocked
	 197  		// needed only to output in traceback
	 198  		status := readgstatus(gp) // We are not in a scan state
	 199  		if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
	 200  			gp.waitsince = work.tstart
	 201  		}
	 202  
	 203  		// scanstack must be done on the system stack in case
	 204  		// we're trying to scan our own stack.
	 205  		systemstack(func() {
	 206  			// If this is a self-scan, put the user G in
	 207  			// _Gwaiting to prevent self-deadlock. It may
	 208  			// already be in _Gwaiting if this is a mark
	 209  			// worker or we're in mark termination.
	 210  			userG := getg().m.curg
	 211  			selfScan := gp == userG && readgstatus(userG) == _Grunning
	 212  			if selfScan {
	 213  				casgstatus(userG, _Grunning, _Gwaiting)
	 214  				userG.waitreason = waitReasonGarbageCollectionScan
	 215  			}
	 216  
	 217  			// TODO: suspendG blocks (and spins) until gp
	 218  			// stops, which may take a while for
	 219  			// running goroutines. Consider doing this in
	 220  			// two phases where the first is non-blocking:
	 221  			// we scan the stacks we can and ask running
	 222  			// goroutines to scan themselves; and the
	 223  			// second blocks.
	 224  			stopped := suspendG(gp)
	 225  			if stopped.dead {
	 226  				gp.gcscandone = true
	 227  				return
	 228  			}
	 229  			if gp.gcscandone {
	 230  				throw("g already scanned")
	 231  			}
	 232  			scanstack(gp, gcw)
	 233  			gp.gcscandone = true
	 234  			resumeG(stopped)
	 235  
	 236  			if selfScan {
	 237  				casgstatus(userG, _Gwaiting, _Grunning)
	 238  			}
	 239  		})
	 240  	}
	 241  }
	 242  
	 243  // markrootBlock scans the shard'th shard of the block of memory [b0,
	 244  // b0+n0), with the given pointer mask.
	 245  //
	 246  //go:nowritebarrier
	 247  func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
	 248  	if rootBlockBytes%(8*sys.PtrSize) != 0 {
	 249  		// This is necessary to pick byte offsets in ptrmask0.
	 250  		throw("rootBlockBytes must be a multiple of 8*ptrSize")
	 251  	}
	 252  
	 253  	// Note that if b0 is toward the end of the address space,
	 254  	// then b0 + rootBlockBytes might wrap around.
	 255  	// These tests are written to avoid any possible overflow.
	 256  	off := uintptr(shard) * rootBlockBytes
	 257  	if off >= n0 {
	 258  		return
	 259  	}
	 260  	b := b0 + off
	 261  	ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*sys.PtrSize))))
	 262  	n := uintptr(rootBlockBytes)
	 263  	if off+n > n0 {
	 264  		n = n0 - off
	 265  	}
	 266  
	 267  	// Scan this shard.
	 268  	scanblock(b, n, ptrmask, gcw, nil)
	 269  }
	 270  
	 271  // markrootFreeGStacks frees stacks of dead Gs.
	 272  //
	 273  // This does not free stacks of dead Gs cached on Ps, but having a few
	 274  // cached stacks around isn't a problem.
	 275  func markrootFreeGStacks() {
	 276  	// Take list of dead Gs with stacks.
	 277  	lock(&sched.gFree.lock)
	 278  	list := sched.gFree.stack
	 279  	sched.gFree.stack = gList{}
	 280  	unlock(&sched.gFree.lock)
	 281  	if list.empty() {
	 282  		return
	 283  	}
	 284  
	 285  	// Free stacks.
	 286  	q := gQueue{list.head, list.head}
	 287  	for gp := list.head.ptr(); gp != nil; gp = gp.schedlink.ptr() {
	 288  		stackfree(gp.stack)
	 289  		gp.stack.lo = 0
	 290  		gp.stack.hi = 0
	 291  		// Manipulate the queue directly since the Gs are
	 292  		// already all linked the right way.
	 293  		q.tail.set(gp)
	 294  	}
	 295  
	 296  	// Put Gs back on the free list.
	 297  	lock(&sched.gFree.lock)
	 298  	sched.gFree.noStack.pushAll(q)
	 299  	unlock(&sched.gFree.lock)
	 300  }
	 301  
	 302  // markrootSpans marks roots for one shard of markArenas.
	 303  //
	 304  //go:nowritebarrier
	 305  func markrootSpans(gcw *gcWork, shard int) {
	 306  	// Objects with finalizers have two GC-related invariants:
	 307  	//
	 308  	// 1) Everything reachable from the object must be marked.
	 309  	// This ensures that when we pass the object to its finalizer,
	 310  	// everything the finalizer can reach will be retained.
	 311  	//
	 312  	// 2) Finalizer specials (which are not in the garbage
	 313  	// collected heap) are roots. In practice, this means the fn
	 314  	// field must be scanned.
	 315  	sg := mheap_.sweepgen
	 316  
	 317  	// Find the arena and page index into that arena for this shard.
	 318  	ai := mheap_.markArenas[shard/(pagesPerArena/pagesPerSpanRoot)]
	 319  	ha := mheap_.arenas[ai.l1()][ai.l2()]
	 320  	arenaPage := uint(uintptr(shard) * pagesPerSpanRoot % pagesPerArena)
	 321  
	 322  	// Construct slice of bitmap which we'll iterate over.
	 323  	specialsbits := ha.pageSpecials[arenaPage/8:]
	 324  	specialsbits = specialsbits[:pagesPerSpanRoot/8]
	 325  	for i := range specialsbits {
	 326  		// Find set bits, which correspond to spans with specials.
	 327  		specials := atomic.Load8(&specialsbits[i])
	 328  		if specials == 0 {
	 329  			continue
	 330  		}
	 331  		for j := uint(0); j < 8; j++ {
	 332  			if specials&(1<<j) == 0 {
	 333  				continue
	 334  			}
	 335  			// Find the span for this bit.
	 336  			//
	 337  			// This value is guaranteed to be non-nil because having
	 338  			// specials implies that the span is in-use, and since we're
	 339  			// currently marking we can be sure that we don't have to worry
	 340  			// about the span being freed and re-used.
	 341  			s := ha.spans[arenaPage+uint(i)*8+j]
	 342  
	 343  			// The state must be mSpanInUse if the specials bit is set, so
	 344  			// sanity check that.
	 345  			if state := s.state.get(); state != mSpanInUse {
	 346  				print("s.state = ", state, "\n")
	 347  				throw("non in-use span found with specials bit set")
	 348  			}
	 349  			// Check that this span was swept (it may be cached or uncached).
	 350  			if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
	 351  				// sweepgen was updated (+2) during non-checkmark GC pass
	 352  				print("sweep ", s.sweepgen, " ", sg, "\n")
	 353  				throw("gc: unswept span")
	 354  			}
	 355  
	 356  			// Lock the specials to prevent a special from being
	 357  			// removed from the list while we're traversing it.
	 358  			lock(&s.speciallock)
	 359  			for sp := s.specials; sp != nil; sp = sp.next {
	 360  				if sp.kind != _KindSpecialFinalizer {
	 361  					continue
	 362  				}
	 363  				// don't mark finalized object, but scan it so we
	 364  				// retain everything it points to.
	 365  				spf := (*specialfinalizer)(unsafe.Pointer(sp))
	 366  				// A finalizer can be set for an inner byte of an object, find object beginning.
	 367  				p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
	 368  
	 369  				// Mark everything that can be reached from
	 370  				// the object (but *not* the object itself or
	 371  				// we'll never collect it).
	 372  				scanobject(p, gcw)
	 373  
	 374  				// The special itself is a root.
	 375  				scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
	 376  			}
	 377  			unlock(&s.speciallock)
	 378  		}
	 379  	}
	 380  }
	 381  
	 382  // gcAssistAlloc performs GC work to make gp's assist debt positive.
	 383  // gp must be the calling user gorountine.
	 384  //
	 385  // This must be called with preemption enabled.
	 386  func gcAssistAlloc(gp *g) {
	 387  	// Don't assist in non-preemptible contexts. These are
	 388  	// generally fragile and won't allow the assist to block.
	 389  	if getg() == gp.m.g0 {
	 390  		return
	 391  	}
	 392  	if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" {
	 393  		return
	 394  	}
	 395  
	 396  	traced := false
	 397  retry:
	 398  	// Compute the amount of scan work we need to do to make the
	 399  	// balance positive. When the required amount of work is low,
	 400  	// we over-assist to build up credit for future allocations
	 401  	// and amortize the cost of assisting.
	 402  	assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
	 403  	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
	 404  	debtBytes := -gp.gcAssistBytes
	 405  	scanWork := int64(assistWorkPerByte * float64(debtBytes))
	 406  	if scanWork < gcOverAssistWork {
	 407  		scanWork = gcOverAssistWork
	 408  		debtBytes = int64(assistBytesPerWork * float64(scanWork))
	 409  	}
	 410  
	 411  	// Steal as much credit as we can from the background GC's
	 412  	// scan credit. This is racy and may drop the background
	 413  	// credit below 0 if two mutators steal at the same time. This
	 414  	// will just cause steals to fail until credit is accumulated
	 415  	// again, so in the long run it doesn't really matter, but we
	 416  	// do have to handle the negative credit case.
	 417  	bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
	 418  	stolen := int64(0)
	 419  	if bgScanCredit > 0 {
	 420  		if bgScanCredit < scanWork {
	 421  			stolen = bgScanCredit
	 422  			gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(stolen))
	 423  		} else {
	 424  			stolen = scanWork
	 425  			gp.gcAssistBytes += debtBytes
	 426  		}
	 427  		atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
	 428  
	 429  		scanWork -= stolen
	 430  
	 431  		if scanWork == 0 {
	 432  			// We were able to steal all of the credit we
	 433  			// needed.
	 434  			if traced {
	 435  				traceGCMarkAssistDone()
	 436  			}
	 437  			return
	 438  		}
	 439  	}
	 440  
	 441  	if trace.enabled && !traced {
	 442  		traced = true
	 443  		traceGCMarkAssistStart()
	 444  	}
	 445  
	 446  	// Perform assist work
	 447  	systemstack(func() {
	 448  		gcAssistAlloc1(gp, scanWork)
	 449  		// The user stack may have moved, so this can't touch
	 450  		// anything on it until it returns from systemstack.
	 451  	})
	 452  
	 453  	completed := gp.param != nil
	 454  	gp.param = nil
	 455  	if completed {
	 456  		gcMarkDone()
	 457  	}
	 458  
	 459  	if gp.gcAssistBytes < 0 {
	 460  		// We were unable steal enough credit or perform
	 461  		// enough work to pay off the assist debt. We need to
	 462  		// do one of these before letting the mutator allocate
	 463  		// more to prevent over-allocation.
	 464  		//
	 465  		// If this is because we were preempted, reschedule
	 466  		// and try some more.
	 467  		if gp.preempt {
	 468  			Gosched()
	 469  			goto retry
	 470  		}
	 471  
	 472  		// Add this G to an assist queue and park. When the GC
	 473  		// has more background credit, it will satisfy queued
	 474  		// assists before flushing to the global credit pool.
	 475  		//
	 476  		// Note that this does *not* get woken up when more
	 477  		// work is added to the work list. The theory is that
	 478  		// there wasn't enough work to do anyway, so we might
	 479  		// as well let background marking take care of the
	 480  		// work that is available.
	 481  		if !gcParkAssist() {
	 482  			goto retry
	 483  		}
	 484  
	 485  		// At this point either background GC has satisfied
	 486  		// this G's assist debt, or the GC cycle is over.
	 487  	}
	 488  	if traced {
	 489  		traceGCMarkAssistDone()
	 490  	}
	 491  }
	 492  
	 493  // gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system
	 494  // stack. This is a separate function to make it easier to see that
	 495  // we're not capturing anything from the user stack, since the user
	 496  // stack may move while we're in this function.
	 497  //
	 498  // gcAssistAlloc1 indicates whether this assist completed the mark
	 499  // phase by setting gp.param to non-nil. This can't be communicated on
	 500  // the stack since it may move.
	 501  //
	 502  //go:systemstack
	 503  func gcAssistAlloc1(gp *g, scanWork int64) {
	 504  	// Clear the flag indicating that this assist completed the
	 505  	// mark phase.
	 506  	gp.param = nil
	 507  
	 508  	if atomic.Load(&gcBlackenEnabled) == 0 {
	 509  		// The gcBlackenEnabled check in malloc races with the
	 510  		// store that clears it but an atomic check in every malloc
	 511  		// would be a performance hit.
	 512  		// Instead we recheck it here on the non-preemptable system
	 513  		// stack to determine if we should perform an assist.
	 514  
	 515  		// GC is done, so ignore any remaining debt.
	 516  		gp.gcAssistBytes = 0
	 517  		return
	 518  	}
	 519  	// Track time spent in this assist. Since we're on the
	 520  	// system stack, this is non-preemptible, so we can
	 521  	// just measure start and end time.
	 522  	startTime := nanotime()
	 523  
	 524  	decnwait := atomic.Xadd(&work.nwait, -1)
	 525  	if decnwait == work.nproc {
	 526  		println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
	 527  		throw("nwait > work.nprocs")
	 528  	}
	 529  
	 530  	// gcDrainN requires the caller to be preemptible.
	 531  	casgstatus(gp, _Grunning, _Gwaiting)
	 532  	gp.waitreason = waitReasonGCAssistMarking
	 533  
	 534  	// drain own cached work first in the hopes that it
	 535  	// will be more cache friendly.
	 536  	gcw := &getg().m.p.ptr().gcw
	 537  	workDone := gcDrainN(gcw, scanWork)
	 538  
	 539  	casgstatus(gp, _Gwaiting, _Grunning)
	 540  
	 541  	// Record that we did this much scan work.
	 542  	//
	 543  	// Back out the number of bytes of assist credit that
	 544  	// this scan work counts for. The "1+" is a poor man's
	 545  	// round-up, to ensure this adds credit even if
	 546  	// assistBytesPerWork is very low.
	 547  	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
	 548  	gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(workDone))
	 549  
	 550  	// If this is the last worker and we ran out of work,
	 551  	// signal a completion point.
	 552  	incnwait := atomic.Xadd(&work.nwait, +1)
	 553  	if incnwait > work.nproc {
	 554  		println("runtime: work.nwait=", incnwait,
	 555  			"work.nproc=", work.nproc)
	 556  		throw("work.nwait > work.nproc")
	 557  	}
	 558  
	 559  	if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
	 560  		// This has reached a background completion point. Set
	 561  		// gp.param to a non-nil value to indicate this. It
	 562  		// doesn't matter what we set it to (it just has to be
	 563  		// a valid pointer).
	 564  		gp.param = unsafe.Pointer(gp)
	 565  	}
	 566  	duration := nanotime() - startTime
	 567  	_p_ := gp.m.p.ptr()
	 568  	_p_.gcAssistTime += duration
	 569  	if _p_.gcAssistTime > gcAssistTimeSlack {
	 570  		atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime)
	 571  		_p_.gcAssistTime = 0
	 572  	}
	 573  }
	 574  
	 575  // gcWakeAllAssists wakes all currently blocked assists. This is used
	 576  // at the end of a GC cycle. gcBlackenEnabled must be false to prevent
	 577  // new assists from going to sleep after this point.
	 578  func gcWakeAllAssists() {
	 579  	lock(&work.assistQueue.lock)
	 580  	list := work.assistQueue.q.popList()
	 581  	injectglist(&list)
	 582  	unlock(&work.assistQueue.lock)
	 583  }
	 584  
	 585  // gcParkAssist puts the current goroutine on the assist queue and parks.
	 586  //
	 587  // gcParkAssist reports whether the assist is now satisfied. If it
	 588  // returns false, the caller must retry the assist.
	 589  //
	 590  //go:nowritebarrier
	 591  func gcParkAssist() bool {
	 592  	lock(&work.assistQueue.lock)
	 593  	// If the GC cycle finished while we were getting the lock,
	 594  	// exit the assist. The cycle can't finish while we hold the
	 595  	// lock.
	 596  	if atomic.Load(&gcBlackenEnabled) == 0 {
	 597  		unlock(&work.assistQueue.lock)
	 598  		return true
	 599  	}
	 600  
	 601  	gp := getg()
	 602  	oldList := work.assistQueue.q
	 603  	work.assistQueue.q.pushBack(gp)
	 604  
	 605  	// Recheck for background credit now that this G is in
	 606  	// the queue, but can still back out. This avoids a
	 607  	// race in case background marking has flushed more
	 608  	// credit since we checked above.
	 609  	if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
	 610  		work.assistQueue.q = oldList
	 611  		if oldList.tail != 0 {
	 612  			oldList.tail.ptr().schedlink.set(nil)
	 613  		}
	 614  		unlock(&work.assistQueue.lock)
	 615  		return false
	 616  	}
	 617  	// Park.
	 618  	goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2)
	 619  	return true
	 620  }
	 621  
	 622  // gcFlushBgCredit flushes scanWork units of background scan work
	 623  // credit. This first satisfies blocked assists on the
	 624  // work.assistQueue and then flushes any remaining credit to
	 625  // gcController.bgScanCredit.
	 626  //
	 627  // Write barriers are disallowed because this is used by gcDrain after
	 628  // it has ensured that all work is drained and this must preserve that
	 629  // condition.
	 630  //
	 631  //go:nowritebarrierrec
	 632  func gcFlushBgCredit(scanWork int64) {
	 633  	if work.assistQueue.q.empty() {
	 634  		// Fast path; there are no blocked assists. There's a
	 635  		// small window here where an assist may add itself to
	 636  		// the blocked queue and park. If that happens, we'll
	 637  		// just get it on the next flush.
	 638  		atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
	 639  		return
	 640  	}
	 641  
	 642  	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
	 643  	scanBytes := int64(float64(scanWork) * assistBytesPerWork)
	 644  
	 645  	lock(&work.assistQueue.lock)
	 646  	for !work.assistQueue.q.empty() && scanBytes > 0 {
	 647  		gp := work.assistQueue.q.pop()
	 648  		// Note that gp.gcAssistBytes is negative because gp
	 649  		// is in debt. Think carefully about the signs below.
	 650  		if scanBytes+gp.gcAssistBytes >= 0 {
	 651  			// Satisfy this entire assist debt.
	 652  			scanBytes += gp.gcAssistBytes
	 653  			gp.gcAssistBytes = 0
	 654  			// It's important that we *not* put gp in
	 655  			// runnext. Otherwise, it's possible for user
	 656  			// code to exploit the GC worker's high
	 657  			// scheduler priority to get itself always run
	 658  			// before other goroutines and always in the
	 659  			// fresh quantum started by GC.
	 660  			ready(gp, 0, false)
	 661  		} else {
	 662  			// Partially satisfy this assist.
	 663  			gp.gcAssistBytes += scanBytes
	 664  			scanBytes = 0
	 665  			// As a heuristic, we move this assist to the
	 666  			// back of the queue so that large assists
	 667  			// can't clog up the assist queue and
	 668  			// substantially delay small assists.
	 669  			work.assistQueue.q.pushBack(gp)
	 670  			break
	 671  		}
	 672  	}
	 673  
	 674  	if scanBytes > 0 {
	 675  		// Convert from scan bytes back to work.
	 676  		assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
	 677  		scanWork = int64(float64(scanBytes) * assistWorkPerByte)
	 678  		atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
	 679  	}
	 680  	unlock(&work.assistQueue.lock)
	 681  }
	 682  
	 683  // scanstack scans gp's stack, greying all pointers found on the stack.
	 684  //
	 685  // scanstack will also shrink the stack if it is safe to do so. If it
	 686  // is not, it schedules a stack shrink for the next synchronous safe
	 687  // point.
	 688  //
	 689  // scanstack is marked go:systemstack because it must not be preempted
	 690  // while using a workbuf.
	 691  //
	 692  //go:nowritebarrier
	 693  //go:systemstack
	 694  func scanstack(gp *g, gcw *gcWork) {
	 695  	if readgstatus(gp)&_Gscan == 0 {
	 696  		print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
	 697  		throw("scanstack - bad status")
	 698  	}
	 699  
	 700  	switch readgstatus(gp) &^ _Gscan {
	 701  	default:
	 702  		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
	 703  		throw("mark - bad status")
	 704  	case _Gdead:
	 705  		return
	 706  	case _Grunning:
	 707  		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
	 708  		throw("scanstack: goroutine not stopped")
	 709  	case _Grunnable, _Gsyscall, _Gwaiting:
	 710  		// ok
	 711  	}
	 712  
	 713  	if gp == getg() {
	 714  		throw("can't scan our own stack")
	 715  	}
	 716  
	 717  	if isShrinkStackSafe(gp) {
	 718  		// Shrink the stack if not much of it is being used.
	 719  		shrinkstack(gp)
	 720  	} else {
	 721  		// Otherwise, shrink the stack at the next sync safe point.
	 722  		gp.preemptShrink = true
	 723  	}
	 724  
	 725  	var state stackScanState
	 726  	state.stack = gp.stack
	 727  
	 728  	if stackTraceDebug {
	 729  		println("stack trace goroutine", gp.goid)
	 730  	}
	 731  
	 732  	if debugScanConservative && gp.asyncSafePoint {
	 733  		print("scanning async preempted goroutine ", gp.goid, " stack [", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
	 734  	}
	 735  
	 736  	// Scan the saved context register. This is effectively a live
	 737  	// register that gets moved back and forth between the
	 738  	// register and sched.ctxt without a write barrier.
	 739  	if gp.sched.ctxt != nil {
	 740  		scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state)
	 741  	}
	 742  
	 743  	// Scan the stack. Accumulate a list of stack objects.
	 744  	scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
	 745  		scanframeworker(frame, &state, gcw)
	 746  		return true
	 747  	}
	 748  	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
	 749  
	 750  	// Find additional pointers that point into the stack from the heap.
	 751  	// Currently this includes defers and panics. See also function copystack.
	 752  
	 753  	// Find and trace all defer arguments.
	 754  	tracebackdefers(gp, scanframe, nil)
	 755  
	 756  	// Find and trace other pointers in defer records.
	 757  	for d := gp._defer; d != nil; d = d.link {
	 758  		if d.fn != nil {
	 759  			// tracebackdefers above does not scan the func value, which could
	 760  			// be a stack allocated closure. See issue 30453.
	 761  			scanblock(uintptr(unsafe.Pointer(&d.fn)), sys.PtrSize, &oneptrmask[0], gcw, &state)
	 762  		}
	 763  		if d.link != nil {
	 764  			// The link field of a stack-allocated defer record might point
	 765  			// to a heap-allocated defer record. Keep that heap record live.
	 766  			scanblock(uintptr(unsafe.Pointer(&d.link)), sys.PtrSize, &oneptrmask[0], gcw, &state)
	 767  		}
	 768  		// Retain defers records themselves.
	 769  		// Defer records might not be reachable from the G through regular heap
	 770  		// tracing because the defer linked list might weave between the stack and the heap.
	 771  		if d.heap {
	 772  			scanblock(uintptr(unsafe.Pointer(&d)), sys.PtrSize, &oneptrmask[0], gcw, &state)
	 773  		}
	 774  	}
	 775  	if gp._panic != nil {
	 776  		// Panics are always stack allocated.
	 777  		state.putPtr(uintptr(unsafe.Pointer(gp._panic)), false)
	 778  	}
	 779  
	 780  	// Find and scan all reachable stack objects.
	 781  	//
	 782  	// The state's pointer queue prioritizes precise pointers over
	 783  	// conservative pointers so that we'll prefer scanning stack
	 784  	// objects precisely.
	 785  	state.buildIndex()
	 786  	for {
	 787  		p, conservative := state.getPtr()
	 788  		if p == 0 {
	 789  			break
	 790  		}
	 791  		obj := state.findObject(p)
	 792  		if obj == nil {
	 793  			continue
	 794  		}
	 795  		r := obj.r
	 796  		if r == nil {
	 797  			// We've already scanned this object.
	 798  			continue
	 799  		}
	 800  		obj.setRecord(nil) // Don't scan it again.
	 801  		if stackTraceDebug {
	 802  			printlock()
	 803  			print("	live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of size", obj.size)
	 804  			if conservative {
	 805  				print(" (conservative)")
	 806  			}
	 807  			println()
	 808  			printunlock()
	 809  		}
	 810  		gcdata := r.gcdata
	 811  		var s *mspan
	 812  		if r.useGCProg() {
	 813  			// This path is pretty unlikely, an object large enough
	 814  			// to have a GC program allocated on the stack.
	 815  			// We need some space to unpack the program into a straight
	 816  			// bitmask, which we allocate/free here.
	 817  			// TODO: it would be nice if there were a way to run a GC
	 818  			// program without having to store all its bits. We'd have
	 819  			// to change from a Lempel-Ziv style program to something else.
	 820  			// Or we can forbid putting objects on stacks if they require
	 821  			// a gc program (see issue 27447).
	 822  			s = materializeGCProg(r.ptrdata(), gcdata)
	 823  			gcdata = (*byte)(unsafe.Pointer(s.startAddr))
	 824  		}
	 825  
	 826  		b := state.stack.lo + uintptr(obj.off)
	 827  		if conservative {
	 828  			scanConservative(b, r.ptrdata(), gcdata, gcw, &state)
	 829  		} else {
	 830  			scanblock(b, r.ptrdata(), gcdata, gcw, &state)
	 831  		}
	 832  
	 833  		if s != nil {
	 834  			dematerializeGCProg(s)
	 835  		}
	 836  	}
	 837  
	 838  	// Deallocate object buffers.
	 839  	// (Pointer buffers were all deallocated in the loop above.)
	 840  	for state.head != nil {
	 841  		x := state.head
	 842  		state.head = x.next
	 843  		if stackTraceDebug {
	 844  			for i := 0; i < x.nobj; i++ {
	 845  				obj := &x.obj[i]
	 846  				if obj.r == nil { // reachable
	 847  					continue
	 848  				}
	 849  				println("	dead stkobj at", hex(gp.stack.lo+uintptr(obj.off)), "of size", obj.r.size)
	 850  				// Note: not necessarily really dead - only reachable-from-ptr dead.
	 851  			}
	 852  		}
	 853  		x.nobj = 0
	 854  		putempty((*workbuf)(unsafe.Pointer(x)))
	 855  	}
	 856  	if state.buf != nil || state.cbuf != nil || state.freeBuf != nil {
	 857  		throw("remaining pointer buffers")
	 858  	}
	 859  }
	 860  
	 861  // Scan a stack frame: local variables and function arguments/results.
	 862  //go:nowritebarrier
	 863  func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
	 864  	if _DebugGC > 1 && frame.continpc != 0 {
	 865  		print("scanframe ", funcname(frame.fn), "\n")
	 866  	}
	 867  
	 868  	isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == funcID_asyncPreempt
	 869  	isDebugCall := frame.fn.valid() && frame.fn.funcID == funcID_debugCallV2
	 870  	if state.conservative || isAsyncPreempt || isDebugCall {
	 871  		if debugScanConservative {
	 872  			println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc))
	 873  		}
	 874  
	 875  		// Conservatively scan the frame. Unlike the precise
	 876  		// case, this includes the outgoing argument space
	 877  		// since we may have stopped while this function was
	 878  		// setting up a call.
	 879  		//
	 880  		// TODO: We could narrow this down if the compiler
	 881  		// produced a single map per function of stack slots
	 882  		// and registers that ever contain a pointer.
	 883  		if frame.varp != 0 {
	 884  			size := frame.varp - frame.sp
	 885  			if size > 0 {
	 886  				scanConservative(frame.sp, size, nil, gcw, state)
	 887  			}
	 888  		}
	 889  
	 890  		// Scan arguments to this frame.
	 891  		if frame.arglen != 0 {
	 892  			// TODO: We could pass the entry argument map
	 893  			// to narrow this down further.
	 894  			scanConservative(frame.argp, frame.arglen, nil, gcw, state)
	 895  		}
	 896  
	 897  		if isAsyncPreempt || isDebugCall {
	 898  			// This function's frame contained the
	 899  			// registers for the asynchronously stopped
	 900  			// parent frame. Scan the parent
	 901  			// conservatively.
	 902  			state.conservative = true
	 903  		} else {
	 904  			// We only wanted to scan those two frames
	 905  			// conservatively. Clear the flag for future
	 906  			// frames.
	 907  			state.conservative = false
	 908  		}
	 909  		return
	 910  	}
	 911  
	 912  	locals, args, objs := getStackMap(frame, &state.cache, false)
	 913  
	 914  	// Scan local variables if stack frame has been allocated.
	 915  	if locals.n > 0 {
	 916  		size := uintptr(locals.n) * sys.PtrSize
	 917  		scanblock(frame.varp-size, size, locals.bytedata, gcw, state)
	 918  	}
	 919  
	 920  	// Scan arguments.
	 921  	if args.n > 0 {
	 922  		scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state)
	 923  	}
	 924  
	 925  	// Add all stack objects to the stack object list.
	 926  	if frame.varp != 0 {
	 927  		// varp is 0 for defers, where there are no locals.
	 928  		// In that case, there can't be a pointer to its args, either.
	 929  		// (And all args would be scanned above anyway.)
	 930  		for i, obj := range objs {
	 931  			off := obj.off
	 932  			base := frame.varp // locals base pointer
	 933  			if off >= 0 {
	 934  				base = frame.argp // arguments and return values base pointer
	 935  			}
	 936  			ptr := base + uintptr(off)
	 937  			if ptr < frame.sp {
	 938  				// object hasn't been allocated in the frame yet.
	 939  				continue
	 940  			}
	 941  			if stackTraceDebug {
	 942  				println("stkobj at", hex(ptr), "of size", obj.size)
	 943  			}
	 944  			state.addObject(ptr, &objs[i])
	 945  		}
	 946  	}
	 947  }
	 948  
	 949  type gcDrainFlags int
	 950  
	 951  const (
	 952  	gcDrainUntilPreempt gcDrainFlags = 1 << iota
	 953  	gcDrainFlushBgCredit
	 954  	gcDrainIdle
	 955  	gcDrainFractional
	 956  )
	 957  
	 958  // gcDrain scans roots and objects in work buffers, blackening grey
	 959  // objects until it is unable to get more work. It may return before
	 960  // GC is done; it's the caller's responsibility to balance work from
	 961  // other Ps.
	 962  //
	 963  // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
	 964  // is set.
	 965  //
	 966  // If flags&gcDrainIdle != 0, gcDrain returns when there is other work
	 967  // to do.
	 968  //
	 969  // If flags&gcDrainFractional != 0, gcDrain self-preempts when
	 970  // pollFractionalWorkerExit() returns true. This implies
	 971  // gcDrainNoBlock.
	 972  //
	 973  // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
	 974  // credit to gcController.bgScanCredit every gcCreditSlack units of
	 975  // scan work.
	 976  //
	 977  // gcDrain will always return if there is a pending STW.
	 978  //
	 979  //go:nowritebarrier
	 980  func gcDrain(gcw *gcWork, flags gcDrainFlags) {
	 981  	if !writeBarrier.needed {
	 982  		throw("gcDrain phase incorrect")
	 983  	}
	 984  
	 985  	gp := getg().m.curg
	 986  	preemptible := flags&gcDrainUntilPreempt != 0
	 987  	flushBgCredit := flags&gcDrainFlushBgCredit != 0
	 988  	idle := flags&gcDrainIdle != 0
	 989  
	 990  	initScanWork := gcw.scanWork
	 991  
	 992  	// checkWork is the scan work before performing the next
	 993  	// self-preempt check.
	 994  	checkWork := int64(1<<63 - 1)
	 995  	var check func() bool
	 996  	if flags&(gcDrainIdle|gcDrainFractional) != 0 {
	 997  		checkWork = initScanWork + drainCheckThreshold
	 998  		if idle {
	 999  			check = pollWork
	1000  		} else if flags&gcDrainFractional != 0 {
	1001  			check = pollFractionalWorkerExit
	1002  		}
	1003  	}
	1004  
	1005  	// Drain root marking jobs.
	1006  	if work.markrootNext < work.markrootJobs {
	1007  		// Stop if we're preemptible or if someone wants to STW.
	1008  		for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
	1009  			job := atomic.Xadd(&work.markrootNext, +1) - 1
	1010  			if job >= work.markrootJobs {
	1011  				break
	1012  			}
	1013  			markroot(gcw, job)
	1014  			if check != nil && check() {
	1015  				goto done
	1016  			}
	1017  		}
	1018  	}
	1019  
	1020  	// Drain heap marking jobs.
	1021  	// Stop if we're preemptible or if someone wants to STW.
	1022  	for !(gp.preempt && (preemptible || atomic.Load(&sched.gcwaiting) != 0)) {
	1023  		// Try to keep work available on the global queue. We used to
	1024  		// check if there were waiting workers, but it's better to
	1025  		// just keep work available than to make workers wait. In the
	1026  		// worst case, we'll do O(log(_WorkbufSize)) unnecessary
	1027  		// balances.
	1028  		if work.full == 0 {
	1029  			gcw.balance()
	1030  		}
	1031  
	1032  		b := gcw.tryGetFast()
	1033  		if b == 0 {
	1034  			b = gcw.tryGet()
	1035  			if b == 0 {
	1036  				// Flush the write barrier
	1037  				// buffer; this may create
	1038  				// more work.
	1039  				wbBufFlush(nil, 0)
	1040  				b = gcw.tryGet()
	1041  			}
	1042  		}
	1043  		if b == 0 {
	1044  			// Unable to get work.
	1045  			break
	1046  		}
	1047  		scanobject(b, gcw)
	1048  
	1049  		// Flush background scan work credit to the global
	1050  		// account if we've accumulated enough locally so
	1051  		// mutator assists can draw on it.
	1052  		if gcw.scanWork >= gcCreditSlack {
	1053  			atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
	1054  			if flushBgCredit {
	1055  				gcFlushBgCredit(gcw.scanWork - initScanWork)
	1056  				initScanWork = 0
	1057  			}
	1058  			checkWork -= gcw.scanWork
	1059  			gcw.scanWork = 0
	1060  
	1061  			if checkWork <= 0 {
	1062  				checkWork += drainCheckThreshold
	1063  				if check != nil && check() {
	1064  					break
	1065  				}
	1066  			}
	1067  		}
	1068  	}
	1069  
	1070  done:
	1071  	// Flush remaining scan work credit.
	1072  	if gcw.scanWork > 0 {
	1073  		atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
	1074  		if flushBgCredit {
	1075  			gcFlushBgCredit(gcw.scanWork - initScanWork)
	1076  		}
	1077  		gcw.scanWork = 0
	1078  	}
	1079  }
	1080  
	1081  // gcDrainN blackens grey objects until it has performed roughly
	1082  // scanWork units of scan work or the G is preempted. This is
	1083  // best-effort, so it may perform less work if it fails to get a work
	1084  // buffer. Otherwise, it will perform at least n units of work, but
	1085  // may perform more because scanning is always done in whole object
	1086  // increments. It returns the amount of scan work performed.
	1087  //
	1088  // The caller goroutine must be in a preemptible state (e.g.,
	1089  // _Gwaiting) to prevent deadlocks during stack scanning. As a
	1090  // consequence, this must be called on the system stack.
	1091  //
	1092  //go:nowritebarrier
	1093  //go:systemstack
	1094  func gcDrainN(gcw *gcWork, scanWork int64) int64 {
	1095  	if !writeBarrier.needed {
	1096  		throw("gcDrainN phase incorrect")
	1097  	}
	1098  
	1099  	// There may already be scan work on the gcw, which we don't
	1100  	// want to claim was done by this call.
	1101  	workFlushed := -gcw.scanWork
	1102  
	1103  	gp := getg().m.curg
	1104  	for !gp.preempt && workFlushed+gcw.scanWork < scanWork {
	1105  		// See gcDrain comment.
	1106  		if work.full == 0 {
	1107  			gcw.balance()
	1108  		}
	1109  
	1110  		// This might be a good place to add prefetch code...
	1111  		// if(wbuf.nobj > 4) {
	1112  		//				 PREFETCH(wbuf->obj[wbuf.nobj - 3];
	1113  		//	}
	1114  		//
	1115  		b := gcw.tryGetFast()
	1116  		if b == 0 {
	1117  			b = gcw.tryGet()
	1118  			if b == 0 {
	1119  				// Flush the write barrier buffer;
	1120  				// this may create more work.
	1121  				wbBufFlush(nil, 0)
	1122  				b = gcw.tryGet()
	1123  			}
	1124  		}
	1125  
	1126  		if b == 0 {
	1127  			// Try to do a root job.
	1128  			//
	1129  			// TODO: Assists should get credit for this
	1130  			// work.
	1131  			if work.markrootNext < work.markrootJobs {
	1132  				job := atomic.Xadd(&work.markrootNext, +1) - 1
	1133  				if job < work.markrootJobs {
	1134  					markroot(gcw, job)
	1135  					continue
	1136  				}
	1137  			}
	1138  			// No heap or root jobs.
	1139  			break
	1140  		}
	1141  		scanobject(b, gcw)
	1142  
	1143  		// Flush background scan work credit.
	1144  		if gcw.scanWork >= gcCreditSlack {
	1145  			atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
	1146  			workFlushed += gcw.scanWork
	1147  			gcw.scanWork = 0
	1148  		}
	1149  	}
	1150  
	1151  	// Unlike gcDrain, there's no need to flush remaining work
	1152  	// here because this never flushes to bgScanCredit and
	1153  	// gcw.dispose will flush any remaining work to scanWork.
	1154  
	1155  	return workFlushed + gcw.scanWork
	1156  }
	1157  
	1158  // scanblock scans b as scanobject would, but using an explicit
	1159  // pointer bitmap instead of the heap bitmap.
	1160  //
	1161  // This is used to scan non-heap roots, so it does not update
	1162  // gcw.bytesMarked or gcw.scanWork.
	1163  //
	1164  // If stk != nil, possible stack pointers are also reported to stk.putPtr.
	1165  //go:nowritebarrier
	1166  func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
	1167  	// Use local copies of original parameters, so that a stack trace
	1168  	// due to one of the throws below shows the original block
	1169  	// base and extent.
	1170  	b := b0
	1171  	n := n0
	1172  
	1173  	for i := uintptr(0); i < n; {
	1174  		// Find bits for the next word.
	1175  		bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
	1176  		if bits == 0 {
	1177  			i += sys.PtrSize * 8
	1178  			continue
	1179  		}
	1180  		for j := 0; j < 8 && i < n; j++ {
	1181  			if bits&1 != 0 {
	1182  				// Same work as in scanobject; see comments there.
	1183  				p := *(*uintptr)(unsafe.Pointer(b + i))
	1184  				if p != 0 {
	1185  					if obj, span, objIndex := findObject(p, b, i); obj != 0 {
	1186  						greyobject(obj, b, i, span, gcw, objIndex)
	1187  					} else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi {
	1188  						stk.putPtr(p, false)
	1189  					}
	1190  				}
	1191  			}
	1192  			bits >>= 1
	1193  			i += sys.PtrSize
	1194  		}
	1195  	}
	1196  }
	1197  
	1198  // scanobject scans the object starting at b, adding pointers to gcw.
	1199  // b must point to the beginning of a heap object or an oblet.
	1200  // scanobject consults the GC bitmap for the pointer mask and the
	1201  // spans for the size of the object.
	1202  //
	1203  //go:nowritebarrier
	1204  func scanobject(b uintptr, gcw *gcWork) {
	1205  	// Find the bits for b and the size of the object at b.
	1206  	//
	1207  	// b is either the beginning of an object, in which case this
	1208  	// is the size of the object to scan, or it points to an
	1209  	// oblet, in which case we compute the size to scan below.
	1210  	hbits := heapBitsForAddr(b)
	1211  	s := spanOfUnchecked(b)
	1212  	n := s.elemsize
	1213  	if n == 0 {
	1214  		throw("scanobject n == 0")
	1215  	}
	1216  
	1217  	if n > maxObletBytes {
	1218  		// Large object. Break into oblets for better
	1219  		// parallelism and lower latency.
	1220  		if b == s.base() {
	1221  			// It's possible this is a noscan object (not
	1222  			// from greyobject, but from other code
	1223  			// paths), in which case we must *not* enqueue
	1224  			// oblets since their bitmaps will be
	1225  			// uninitialized.
	1226  			if s.spanclass.noscan() {
	1227  				// Bypass the whole scan.
	1228  				gcw.bytesMarked += uint64(n)
	1229  				return
	1230  			}
	1231  
	1232  			// Enqueue the other oblets to scan later.
	1233  			// Some oblets may be in b's scalar tail, but
	1234  			// these will be marked as "no more pointers",
	1235  			// so we'll drop out immediately when we go to
	1236  			// scan those.
	1237  			for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes {
	1238  				if !gcw.putFast(oblet) {
	1239  					gcw.put(oblet)
	1240  				}
	1241  			}
	1242  		}
	1243  
	1244  		// Compute the size of the oblet. Since this object
	1245  		// must be a large object, s.base() is the beginning
	1246  		// of the object.
	1247  		n = s.base() + s.elemsize - b
	1248  		if n > maxObletBytes {
	1249  			n = maxObletBytes
	1250  		}
	1251  	}
	1252  
	1253  	var i uintptr
	1254  	for i = 0; i < n; i, hbits = i+sys.PtrSize, hbits.next() {
	1255  		// Load bits once. See CL 22712 and issue 16973 for discussion.
	1256  		bits := hbits.bits()
	1257  		if bits&bitScan == 0 {
	1258  			break // no more pointers in this object
	1259  		}
	1260  		if bits&bitPointer == 0 {
	1261  			continue // not a pointer
	1262  		}
	1263  
	1264  		// Work here is duplicated in scanblock and above.
	1265  		// If you make changes here, make changes there too.
	1266  		obj := *(*uintptr)(unsafe.Pointer(b + i))
	1267  
	1268  		// At this point we have extracted the next potential pointer.
	1269  		// Quickly filter out nil and pointers back to the current object.
	1270  		if obj != 0 && obj-b >= n {
	1271  			// Test if obj points into the Go heap and, if so,
	1272  			// mark the object.
	1273  			//
	1274  			// Note that it's possible for findObject to
	1275  			// fail if obj points to a just-allocated heap
	1276  			// object because of a race with growing the
	1277  			// heap. In this case, we know the object was
	1278  			// just allocated and hence will be marked by
	1279  			// allocation itself.
	1280  			if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
	1281  				greyobject(obj, b, i, span, gcw, objIndex)
	1282  			}
	1283  		}
	1284  	}
	1285  	gcw.bytesMarked += uint64(n)
	1286  	gcw.scanWork += int64(i)
	1287  }
	1288  
	1289  // scanConservative scans block [b, b+n) conservatively, treating any
	1290  // pointer-like value in the block as a pointer.
	1291  //
	1292  // If ptrmask != nil, only words that are marked in ptrmask are
	1293  // considered as potential pointers.
	1294  //
	1295  // If state != nil, it's assumed that [b, b+n) is a block in the stack
	1296  // and may contain pointers to stack objects.
	1297  func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackScanState) {
	1298  	if debugScanConservative {
	1299  		printlock()
	1300  		print("conservatively scanning [", hex(b), ",", hex(b+n), ")\n")
	1301  		hexdumpWords(b, b+n, func(p uintptr) byte {
	1302  			if ptrmask != nil {
	1303  				word := (p - b) / sys.PtrSize
	1304  				bits := *addb(ptrmask, word/8)
	1305  				if (bits>>(word%8))&1 == 0 {
	1306  					return '$'
	1307  				}
	1308  			}
	1309  
	1310  			val := *(*uintptr)(unsafe.Pointer(p))
	1311  			if state != nil && state.stack.lo <= val && val < state.stack.hi {
	1312  				return '@'
	1313  			}
	1314  
	1315  			span := spanOfHeap(val)
	1316  			if span == nil {
	1317  				return ' '
	1318  			}
	1319  			idx := span.objIndex(val)
	1320  			if span.isFree(idx) {
	1321  				return ' '
	1322  			}
	1323  			return '*'
	1324  		})
	1325  		printunlock()
	1326  	}
	1327  
	1328  	for i := uintptr(0); i < n; i += sys.PtrSize {
	1329  		if ptrmask != nil {
	1330  			word := i / sys.PtrSize
	1331  			bits := *addb(ptrmask, word/8)
	1332  			if bits == 0 {
	1333  				// Skip 8 words (the loop increment will do the 8th)
	1334  				//
	1335  				// This must be the first time we've
	1336  				// seen this word of ptrmask, so i
	1337  				// must be 8-word-aligned, but check
	1338  				// our reasoning just in case.
	1339  				if i%(sys.PtrSize*8) != 0 {
	1340  					throw("misaligned mask")
	1341  				}
	1342  				i += sys.PtrSize*8 - sys.PtrSize
	1343  				continue
	1344  			}
	1345  			if (bits>>(word%8))&1 == 0 {
	1346  				continue
	1347  			}
	1348  		}
	1349  
	1350  		val := *(*uintptr)(unsafe.Pointer(b + i))
	1351  
	1352  		// Check if val points into the stack.
	1353  		if state != nil && state.stack.lo <= val && val < state.stack.hi {
	1354  			// val may point to a stack object. This
	1355  			// object may be dead from last cycle and
	1356  			// hence may contain pointers to unallocated
	1357  			// objects, but unlike heap objects we can't
	1358  			// tell if it's already dead. Hence, if all
	1359  			// pointers to this object are from
	1360  			// conservative scanning, we have to scan it
	1361  			// defensively, too.
	1362  			state.putPtr(val, true)
	1363  			continue
	1364  		}
	1365  
	1366  		// Check if val points to a heap span.
	1367  		span := spanOfHeap(val)
	1368  		if span == nil {
	1369  			continue
	1370  		}
	1371  
	1372  		// Check if val points to an allocated object.
	1373  		idx := span.objIndex(val)
	1374  		if span.isFree(idx) {
	1375  			continue
	1376  		}
	1377  
	1378  		// val points to an allocated object. Mark it.
	1379  		obj := span.base() + idx*span.elemsize
	1380  		greyobject(obj, b, i, span, gcw, idx)
	1381  	}
	1382  }
	1383  
	1384  // Shade the object if it isn't already.
	1385  // The object is not nil and known to be in the heap.
	1386  // Preemption must be disabled.
	1387  //go:nowritebarrier
	1388  func shade(b uintptr) {
	1389  	if obj, span, objIndex := findObject(b, 0, 0); obj != 0 {
	1390  		gcw := &getg().m.p.ptr().gcw
	1391  		greyobject(obj, 0, 0, span, gcw, objIndex)
	1392  	}
	1393  }
	1394  
	1395  // obj is the start of an object with mark mbits.
	1396  // If it isn't already marked, mark it and enqueue into gcw.
	1397  // base and off are for debugging only and could be removed.
	1398  //
	1399  // See also wbBufFlush1, which partially duplicates this logic.
	1400  //
	1401  //go:nowritebarrierrec
	1402  func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr) {
	1403  	// obj should be start of allocation, and so must be at least pointer-aligned.
	1404  	if obj&(sys.PtrSize-1) != 0 {
	1405  		throw("greyobject: obj not pointer-aligned")
	1406  	}
	1407  	mbits := span.markBitsForIndex(objIndex)
	1408  
	1409  	if useCheckmark {
	1410  		if setCheckmark(obj, base, off, mbits) {
	1411  			// Already marked.
	1412  			return
	1413  		}
	1414  	} else {
	1415  		if debug.gccheckmark > 0 && span.isFree(objIndex) {
	1416  			print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n")
	1417  			gcDumpObject("base", base, off)
	1418  			gcDumpObject("obj", obj, ^uintptr(0))
	1419  			getg().m.traceback = 2
	1420  			throw("marking free object")
	1421  		}
	1422  
	1423  		// If marked we have nothing to do.
	1424  		if mbits.isMarked() {
	1425  			return
	1426  		}
	1427  		mbits.setMarked()
	1428  
	1429  		// Mark span.
	1430  		arena, pageIdx, pageMask := pageIndexOf(span.base())
	1431  		if arena.pageMarks[pageIdx]&pageMask == 0 {
	1432  			atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
	1433  		}
	1434  
	1435  		// If this is a noscan object, fast-track it to black
	1436  		// instead of greying it.
	1437  		if span.spanclass.noscan() {
	1438  			gcw.bytesMarked += uint64(span.elemsize)
	1439  			return
	1440  		}
	1441  	}
	1442  
	1443  	// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
	1444  	// seems like a nice optimization that can be added back in.
	1445  	// There needs to be time between the PREFETCH and the use.
	1446  	// Previously we put the obj in an 8 element buffer that is drained at a rate
	1447  	// to give the PREFETCH time to do its work.
	1448  	// Use of PREFETCHNTA might be more appropriate than PREFETCH
	1449  	if !gcw.putFast(obj) {
	1450  		gcw.put(obj)
	1451  	}
	1452  }
	1453  
	1454  // gcDumpObject dumps the contents of obj for debugging and marks the
	1455  // field at byte offset off in obj.
	1456  func gcDumpObject(label string, obj, off uintptr) {
	1457  	s := spanOf(obj)
	1458  	print(label, "=", hex(obj))
	1459  	if s == nil {
	1460  		print(" s=nil\n")
	1461  		return
	1462  	}
	1463  	print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=")
	1464  	if state := s.state.get(); 0 <= state && int(state) < len(mSpanStateNames) {
	1465  		print(mSpanStateNames[state], "\n")
	1466  	} else {
	1467  		print("unknown(", state, ")\n")
	1468  	}
	1469  
	1470  	skipped := false
	1471  	size := s.elemsize
	1472  	if s.state.get() == mSpanManual && size == 0 {
	1473  		// We're printing something from a stack frame. We
	1474  		// don't know how big it is, so just show up to an
	1475  		// including off.
	1476  		size = off + sys.PtrSize
	1477  	}
	1478  	for i := uintptr(0); i < size; i += sys.PtrSize {
	1479  		// For big objects, just print the beginning (because
	1480  		// that usually hints at the object's type) and the
	1481  		// fields around off.
	1482  		if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) {
	1483  			skipped = true
	1484  			continue
	1485  		}
	1486  		if skipped {
	1487  			print(" ...\n")
	1488  			skipped = false
	1489  		}
	1490  		print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + i))))
	1491  		if i == off {
	1492  			print(" <==")
	1493  		}
	1494  		print("\n")
	1495  	}
	1496  	if skipped {
	1497  		print(" ...\n")
	1498  	}
	1499  }
	1500  
	1501  // gcmarknewobject marks a newly allocated object black. obj must
	1502  // not contain any non-nil pointers.
	1503  //
	1504  // This is nosplit so it can manipulate a gcWork without preemption.
	1505  //
	1506  //go:nowritebarrier
	1507  //go:nosplit
	1508  func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
	1509  	if useCheckmark { // The world should be stopped so this should not happen.
	1510  		throw("gcmarknewobject called while doing checkmark")
	1511  	}
	1512  
	1513  	// Mark object.
	1514  	objIndex := span.objIndex(obj)
	1515  	span.markBitsForIndex(objIndex).setMarked()
	1516  
	1517  	// Mark span.
	1518  	arena, pageIdx, pageMask := pageIndexOf(span.base())
	1519  	if arena.pageMarks[pageIdx]&pageMask == 0 {
	1520  		atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
	1521  	}
	1522  
	1523  	gcw := &getg().m.p.ptr().gcw
	1524  	gcw.bytesMarked += uint64(size)
	1525  	gcw.scanWork += int64(scanSize)
	1526  }
	1527  
	1528  // gcMarkTinyAllocs greys all active tiny alloc blocks.
	1529  //
	1530  // The world must be stopped.
	1531  func gcMarkTinyAllocs() {
	1532  	assertWorldStopped()
	1533  
	1534  	for _, p := range allp {
	1535  		c := p.mcache
	1536  		if c == nil || c.tiny == 0 {
	1537  			continue
	1538  		}
	1539  		_, span, objIndex := findObject(c.tiny, 0, 0)
	1540  		gcw := &p.gcw
	1541  		greyobject(c.tiny, 0, 0, span, gcw, objIndex)
	1542  	}
	1543  }
	1544  

View as plain text