...

Text file src/runtime/asm_amd64.s

Documentation: runtime

		 1// Copyright 2009 The Go Authors. All rights reserved.
		 2// Use of this source code is governed by a BSD-style
		 3// license that can be found in the LICENSE file.
		 4
		 5#include "go_asm.h"
		 6#include "go_tls.h"
		 7#include "funcdata.h"
		 8#include "textflag.h"
		 9#include "cgo/abi_amd64.h"
		10
		11// _rt0_amd64 is common startup code for most amd64 systems when using
		12// internal linking. This is the entry point for the program from the
		13// kernel for an ordinary -buildmode=exe program. The stack holds the
		14// number of arguments and the C-style argv.
		15TEXT _rt0_amd64(SB),NOSPLIT,$-8
		16	MOVQ	0(SP), DI	// argc
		17	LEAQ	8(SP), SI	// argv
		18	JMP	runtime·rt0_go(SB)
		19
		20// main is common startup code for most amd64 systems when using
		21// external linking. The C startup code will call the symbol "main"
		22// passing argc and argv in the usual C ABI registers DI and SI.
		23TEXT main(SB),NOSPLIT,$-8
		24	JMP	runtime·rt0_go(SB)
		25
		26// _rt0_amd64_lib is common startup code for most amd64 systems when
		27// using -buildmode=c-archive or -buildmode=c-shared. The linker will
		28// arrange to invoke this function as a global constructor (for
		29// c-archive) or when the shared library is loaded (for c-shared).
		30// We expect argc and argv to be passed in the usual C ABI registers
		31// DI and SI.
		32TEXT _rt0_amd64_lib(SB),NOSPLIT,$0
		33	// Transition from C ABI to Go ABI.
		34	PUSH_REGS_HOST_TO_ABI0()
		35
		36	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
		37	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
		38
		39	// Synchronous initialization.
		40	CALL	runtime·libpreinit(SB)
		41
		42	// Create a new thread to finish Go runtime initialization.
		43	MOVQ	_cgo_sys_thread_create(SB), AX
		44	TESTQ	AX, AX
		45	JZ	nocgo
		46
		47	// We're calling back to C.
		48	// Align stack per ELF ABI requirements.
		49	MOVQ	SP, BX	// Callee-save in C ABI
		50	ANDQ	$~15, SP
		51	MOVQ	$_rt0_amd64_lib_go(SB), DI
		52	MOVQ	$0, SI
		53	CALL	AX
		54	MOVQ	BX, SP
		55	JMP	restore
		56
		57nocgo:
		58	ADJSP	$16
		59	MOVQ	$0x800000, 0(SP)		// stacksize
		60	MOVQ	$_rt0_amd64_lib_go(SB), AX
		61	MOVQ	AX, 8(SP)			// fn
		62	CALL	runtime·newosproc0(SB)
		63	ADJSP	$-16
		64
		65restore:
		66	POP_REGS_HOST_TO_ABI0()
		67	RET
		68
		69// _rt0_amd64_lib_go initializes the Go runtime.
		70// This is started in a separate thread by _rt0_amd64_lib.
		71TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
		72	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
		73	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
		74	JMP	runtime·rt0_go(SB)
		75
		76DATA _rt0_amd64_lib_argc<>(SB)/8, $0
		77GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
		78DATA _rt0_amd64_lib_argv<>(SB)/8, $0
		79GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
		80
		81TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
		82	// copy arguments forward on an even stack
		83	MOVQ	DI, AX		// argc
		84	MOVQ	SI, BX		// argv
		85	SUBQ	$(4*8+7), SP		// 2args 2auto
		86	ANDQ	$~15, SP
		87	MOVQ	AX, 16(SP)
		88	MOVQ	BX, 24(SP)
		89
		90	// create istack out of the given (operating system) stack.
		91	// _cgo_init may update stackguard.
		92	MOVQ	$runtime·g0(SB), DI
		93	LEAQ	(-64*1024+104)(SP), BX
		94	MOVQ	BX, g_stackguard0(DI)
		95	MOVQ	BX, g_stackguard1(DI)
		96	MOVQ	BX, (g_stack+stack_lo)(DI)
		97	MOVQ	SP, (g_stack+stack_hi)(DI)
		98
		99	// find out information about the processor we're on
	 100	MOVL	$0, AX
	 101	CPUID
	 102	MOVL	AX, SI
	 103	CMPL	AX, $0
	 104	JE	nocpuinfo
	 105
	 106	// Figure out how to serialize RDTSC.
	 107	// On Intel processors LFENCE is enough. AMD requires MFENCE.
	 108	// Don't know about the rest, so let's do MFENCE.
	 109	CMPL	BX, $0x756E6547	// "Genu"
	 110	JNE	notintel
	 111	CMPL	DX, $0x49656E69	// "ineI"
	 112	JNE	notintel
	 113	CMPL	CX, $0x6C65746E	// "ntel"
	 114	JNE	notintel
	 115	MOVB	$1, runtime·isIntel(SB)
	 116	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
	 117notintel:
	 118
	 119	// Load EAX=1 cpuid flags
	 120	MOVL	$1, AX
	 121	CPUID
	 122	MOVL	AX, runtime·processorVersionInfo(SB)
	 123
	 124nocpuinfo:
	 125	// if there is an _cgo_init, call it.
	 126	MOVQ	_cgo_init(SB), AX
	 127	TESTQ	AX, AX
	 128	JZ	needtls
	 129	// arg 1: g0, already in DI
	 130	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
	 131#ifdef GOOS_android
	 132	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
	 133	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
	 134	// Compensate for tls_g (+16).
	 135	MOVQ	-16(TLS), CX
	 136#else
	 137	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
	 138	MOVQ	$0, CX
	 139#endif
	 140#ifdef GOOS_windows
	 141	// Adjust for the Win64 calling convention.
	 142	MOVQ	CX, R9 // arg 4
	 143	MOVQ	DX, R8 // arg 3
	 144	MOVQ	SI, DX // arg 2
	 145	MOVQ	DI, CX // arg 1
	 146#endif
	 147	CALL	AX
	 148
	 149	// update stackguard after _cgo_init
	 150	MOVQ	$runtime·g0(SB), CX
	 151	MOVQ	(g_stack+stack_lo)(CX), AX
	 152	ADDQ	$const__StackGuard, AX
	 153	MOVQ	AX, g_stackguard0(CX)
	 154	MOVQ	AX, g_stackguard1(CX)
	 155
	 156#ifndef GOOS_windows
	 157	JMP ok
	 158#endif
	 159needtls:
	 160#ifdef GOOS_plan9
	 161	// skip TLS setup on Plan 9
	 162	JMP ok
	 163#endif
	 164#ifdef GOOS_solaris
	 165	// skip TLS setup on Solaris
	 166	JMP ok
	 167#endif
	 168#ifdef GOOS_illumos
	 169	// skip TLS setup on illumos
	 170	JMP ok
	 171#endif
	 172#ifdef GOOS_darwin
	 173	// skip TLS setup on Darwin
	 174	JMP ok
	 175#endif
	 176#ifdef GOOS_openbsd
	 177	// skip TLS setup on OpenBSD
	 178	JMP ok
	 179#endif
	 180
	 181	LEAQ	runtime·m0+m_tls(SB), DI
	 182	CALL	runtime·settls(SB)
	 183
	 184	// store through it, to make sure it works
	 185	get_tls(BX)
	 186	MOVQ	$0x123, g(BX)
	 187	MOVQ	runtime·m0+m_tls(SB), AX
	 188	CMPQ	AX, $0x123
	 189	JEQ 2(PC)
	 190	CALL	runtime·abort(SB)
	 191ok:
	 192	// set the per-goroutine and per-mach "registers"
	 193	get_tls(BX)
	 194	LEAQ	runtime·g0(SB), CX
	 195	MOVQ	CX, g(BX)
	 196	LEAQ	runtime·m0(SB), AX
	 197
	 198	// save m->g0 = g0
	 199	MOVQ	CX, m_g0(AX)
	 200	// save m0 to g0->m
	 201	MOVQ	AX, g_m(CX)
	 202
	 203	CLD				// convention is D is always left cleared
	 204	CALL	runtime·check(SB)
	 205
	 206	MOVL	16(SP), AX		// copy argc
	 207	MOVL	AX, 0(SP)
	 208	MOVQ	24(SP), AX		// copy argv
	 209	MOVQ	AX, 8(SP)
	 210	CALL	runtime·args(SB)
	 211	CALL	runtime·osinit(SB)
	 212	CALL	runtime·schedinit(SB)
	 213
	 214	// create a new goroutine to start program
	 215	MOVQ	$runtime·mainPC(SB), AX		// entry
	 216	PUSHQ	AX
	 217	PUSHQ	$0			// arg size
	 218	CALL	runtime·newproc(SB)
	 219	POPQ	AX
	 220	POPQ	AX
	 221
	 222	// start this M
	 223	CALL	runtime·mstart(SB)
	 224
	 225	CALL	runtime·abort(SB)	// mstart should never return
	 226	RET
	 227
	 228	// Prevent dead-code elimination of debugCallV2, which is
	 229	// intended to be called by debuggers.
	 230	MOVQ	$runtime·debugCallV2<ABIInternal>(SB), AX
	 231	RET
	 232
	 233// mainPC is a function value for runtime.main, to be passed to newproc.
	 234// The reference to runtime.main is made via ABIInternal, since the
	 235// actual function (not the ABI0 wrapper) is needed by newproc.
	 236DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
	 237GLOBL	runtime·mainPC(SB),RODATA,$8
	 238
	 239TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
	 240	BYTE	$0xcc
	 241	RET
	 242
	 243TEXT runtime·asminit(SB),NOSPLIT,$0-0
	 244	// No per-thread init.
	 245	RET
	 246
	 247TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
	 248	CALL	runtime·mstart0(SB)
	 249	RET // not reached
	 250
	 251/*
	 252 *	go-routine
	 253 */
	 254
	 255// func gogo(buf *gobuf)
	 256// restore state from Gobuf; longjmp
	 257TEXT runtime·gogo(SB), NOSPLIT, $0-8
	 258	MOVQ	buf+0(FP), BX		// gobuf
	 259	MOVQ	gobuf_g(BX), DX
	 260	MOVQ	0(DX), CX		// make sure g != nil
	 261	JMP	gogo<>(SB)
	 262
	 263TEXT gogo<>(SB), NOSPLIT, $0
	 264	get_tls(CX)
	 265	MOVQ	DX, g(CX)
	 266	MOVQ	DX, R14		// set the g register
	 267	MOVQ	gobuf_sp(BX), SP	// restore SP
	 268	MOVQ	gobuf_ret(BX), AX
	 269	MOVQ	gobuf_ctxt(BX), DX
	 270	MOVQ	gobuf_bp(BX), BP
	 271	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
	 272	MOVQ	$0, gobuf_ret(BX)
	 273	MOVQ	$0, gobuf_ctxt(BX)
	 274	MOVQ	$0, gobuf_bp(BX)
	 275	MOVQ	gobuf_pc(BX), BX
	 276	JMP	BX
	 277
	 278// func mcall(fn func(*g))
	 279// Switch to m->g0's stack, call fn(g).
	 280// Fn must never return. It should gogo(&g->sched)
	 281// to keep running g.
	 282#ifdef GOEXPERIMENT_regabiargs
	 283TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
	 284	MOVQ	AX, DX	// DX = fn
	 285
	 286	// save state in g->sched
	 287	MOVQ	0(SP), BX	// caller's PC
	 288	MOVQ	BX, (g_sched+gobuf_pc)(R14)
	 289	LEAQ	fn+0(FP), BX	// caller's SP
	 290	MOVQ	BX, (g_sched+gobuf_sp)(R14)
	 291	MOVQ	BP, (g_sched+gobuf_bp)(R14)
	 292
	 293	// switch to m->g0 & its stack, call fn
	 294	MOVQ	g_m(R14), BX
	 295	MOVQ	m_g0(BX), SI	// SI = g.m.g0
	 296	CMPQ	SI, R14	// if g == m->g0 call badmcall
	 297	JNE	goodm
	 298	JMP	runtime·badmcall(SB)
	 299goodm:
	 300	MOVQ	R14, AX		// AX (and arg 0) = g
	 301	MOVQ	SI, R14		// g = g.m.g0
	 302	get_tls(CX)		// Set G in TLS
	 303	MOVQ	R14, g(CX)
	 304	MOVQ	(g_sched+gobuf_sp)(R14), SP	// sp = g0.sched.sp
	 305	PUSHQ	AX	// open up space for fn's arg spill slot
	 306	MOVQ	0(DX), R12
	 307	CALL	R12		// fn(g)
	 308	POPQ	AX
	 309	JMP	runtime·badmcall2(SB)
	 310	RET
	 311#else
	 312TEXT runtime·mcall(SB), NOSPLIT, $0-8
	 313	MOVQ	fn+0(FP), DI
	 314
	 315	get_tls(CX)
	 316	MOVQ	g(CX), AX	// save state in g->sched
	 317	MOVQ	0(SP), BX	// caller's PC
	 318	MOVQ	BX, (g_sched+gobuf_pc)(AX)
	 319	LEAQ	fn+0(FP), BX	// caller's SP
	 320	MOVQ	BX, (g_sched+gobuf_sp)(AX)
	 321	MOVQ	BP, (g_sched+gobuf_bp)(AX)
	 322
	 323	// switch to m->g0 & its stack, call fn
	 324	MOVQ	g(CX), BX
	 325	MOVQ	g_m(BX), BX
	 326	MOVQ	m_g0(BX), SI
	 327	CMPQ	SI, AX	// if g == m->g0 call badmcall
	 328	JNE	3(PC)
	 329	MOVQ	$runtime·badmcall(SB), AX
	 330	JMP	AX
	 331	MOVQ	SI, g(CX)	// g = m->g0
	 332	MOVQ	SI, R14	// set the g register
	 333	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
	 334	PUSHQ	AX
	 335	MOVQ	DI, DX
	 336	MOVQ	0(DI), DI
	 337	CALL	DI
	 338	POPQ	AX
	 339	MOVQ	$runtime·badmcall2(SB), AX
	 340	JMP	AX
	 341	RET
	 342#endif
	 343
	 344// systemstack_switch is a dummy routine that systemstack leaves at the bottom
	 345// of the G stack. We need to distinguish the routine that
	 346// lives at the bottom of the G stack from the one that lives
	 347// at the top of the system stack because the one at the top of
	 348// the system stack terminates the stack walk (see topofstack()).
	 349TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
	 350	RET
	 351
	 352// func systemstack(fn func())
	 353TEXT runtime·systemstack(SB), NOSPLIT, $0-8
	 354	MOVQ	fn+0(FP), DI	// DI = fn
	 355	get_tls(CX)
	 356	MOVQ	g(CX), AX	// AX = g
	 357	MOVQ	g_m(AX), BX	// BX = m
	 358
	 359	CMPQ	AX, m_gsignal(BX)
	 360	JEQ	noswitch
	 361
	 362	MOVQ	m_g0(BX), DX	// DX = g0
	 363	CMPQ	AX, DX
	 364	JEQ	noswitch
	 365
	 366	CMPQ	AX, m_curg(BX)
	 367	JNE	bad
	 368
	 369	// switch stacks
	 370	// save our state in g->sched. Pretend to
	 371	// be systemstack_switch if the G stack is scanned.
	 372	CALL	gosave_systemstack_switch<>(SB)
	 373
	 374	// switch to g0
	 375	MOVQ	DX, g(CX)
	 376	MOVQ	DX, R14 // set the g register
	 377	MOVQ	(g_sched+gobuf_sp)(DX), BX
	 378	MOVQ	BX, SP
	 379
	 380	// call target function
	 381	MOVQ	DI, DX
	 382	MOVQ	0(DI), DI
	 383	CALL	DI
	 384
	 385	// switch back to g
	 386	get_tls(CX)
	 387	MOVQ	g(CX), AX
	 388	MOVQ	g_m(AX), BX
	 389	MOVQ	m_curg(BX), AX
	 390	MOVQ	AX, g(CX)
	 391	MOVQ	(g_sched+gobuf_sp)(AX), SP
	 392	MOVQ	$0, (g_sched+gobuf_sp)(AX)
	 393	RET
	 394
	 395noswitch:
	 396	// already on m stack; tail call the function
	 397	// Using a tail call here cleans up tracebacks since we won't stop
	 398	// at an intermediate systemstack.
	 399	MOVQ	DI, DX
	 400	MOVQ	0(DI), DI
	 401	JMP	DI
	 402
	 403bad:
	 404	// Bad: g is not gsignal, not g0, not curg. What is it?
	 405	MOVQ	$runtime·badsystemstack(SB), AX
	 406	CALL	AX
	 407	INT	$3
	 408
	 409
	 410/*
	 411 * support for morestack
	 412 */
	 413
	 414// Called during function prolog when more stack is needed.
	 415//
	 416// The traceback routines see morestack on a g0 as being
	 417// the top of a stack (for example, morestack calling newstack
	 418// calling the scheduler calling newm calling gc), so we must
	 419// record an argument size. For that purpose, it has no arguments.
	 420TEXT runtime·morestack(SB),NOSPLIT,$0-0
	 421	// Cannot grow scheduler stack (m->g0).
	 422	get_tls(CX)
	 423	MOVQ	g(CX), BX
	 424	MOVQ	g_m(BX), BX
	 425	MOVQ	m_g0(BX), SI
	 426	CMPQ	g(CX), SI
	 427	JNE	3(PC)
	 428	CALL	runtime·badmorestackg0(SB)
	 429	CALL	runtime·abort(SB)
	 430
	 431	// Cannot grow signal stack (m->gsignal).
	 432	MOVQ	m_gsignal(BX), SI
	 433	CMPQ	g(CX), SI
	 434	JNE	3(PC)
	 435	CALL	runtime·badmorestackgsignal(SB)
	 436	CALL	runtime·abort(SB)
	 437
	 438	// Called from f.
	 439	// Set m->morebuf to f's caller.
	 440	NOP	SP	// tell vet SP changed - stop checking offsets
	 441	MOVQ	8(SP), AX	// f's caller's PC
	 442	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
	 443	LEAQ	16(SP), AX	// f's caller's SP
	 444	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
	 445	get_tls(CX)
	 446	MOVQ	g(CX), SI
	 447	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
	 448
	 449	// Set g->sched to context in f.
	 450	MOVQ	0(SP), AX // f's PC
	 451	MOVQ	AX, (g_sched+gobuf_pc)(SI)
	 452	LEAQ	8(SP), AX // f's SP
	 453	MOVQ	AX, (g_sched+gobuf_sp)(SI)
	 454	MOVQ	BP, (g_sched+gobuf_bp)(SI)
	 455	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
	 456
	 457	// Call newstack on m->g0's stack.
	 458	MOVQ	m_g0(BX), BX
	 459	MOVQ	BX, g(CX)
	 460	MOVQ	(g_sched+gobuf_sp)(BX), SP
	 461	CALL	runtime·newstack(SB)
	 462	CALL	runtime·abort(SB)	// crash if newstack returns
	 463	RET
	 464
	 465// morestack but not preserving ctxt.
	 466TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
	 467	MOVL	$0, DX
	 468	JMP	runtime·morestack(SB)
	 469
	 470#ifdef GOEXPERIMENT_regabireflect
	 471// spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
	 472TEXT ·spillArgs<ABIInternal>(SB),NOSPLIT,$0-0
	 473	MOVQ AX, 0(R12)
	 474	MOVQ BX, 8(R12)
	 475	MOVQ CX, 16(R12)
	 476	MOVQ DI, 24(R12)
	 477	MOVQ SI, 32(R12)
	 478	MOVQ R8, 40(R12)
	 479	MOVQ R9, 48(R12)
	 480	MOVQ R10, 56(R12)
	 481	MOVQ R11, 64(R12)
	 482	MOVQ X0, 72(R12)
	 483	MOVQ X1, 80(R12)
	 484	MOVQ X2, 88(R12)
	 485	MOVQ X3, 96(R12)
	 486	MOVQ X4, 104(R12)
	 487	MOVQ X5, 112(R12)
	 488	MOVQ X6, 120(R12)
	 489	MOVQ X7, 128(R12)
	 490	MOVQ X8, 136(R12)
	 491	MOVQ X9, 144(R12)
	 492	MOVQ X10, 152(R12)
	 493	MOVQ X11, 160(R12)
	 494	MOVQ X12, 168(R12)
	 495	MOVQ X13, 176(R12)
	 496	MOVQ X14, 184(R12)
	 497	RET
	 498
	 499// unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
	 500TEXT ·unspillArgs<ABIInternal>(SB),NOSPLIT,$0-0
	 501	MOVQ 0(R12), AX
	 502	MOVQ 8(R12), BX
	 503	MOVQ 16(R12), CX
	 504	MOVQ 24(R12), DI
	 505	MOVQ 32(R12), SI
	 506	MOVQ 40(R12), R8
	 507	MOVQ 48(R12), R9
	 508	MOVQ 56(R12), R10
	 509	MOVQ 64(R12), R11
	 510	MOVQ 72(R12), X0
	 511	MOVQ 80(R12), X1
	 512	MOVQ 88(R12), X2
	 513	MOVQ 96(R12), X3
	 514	MOVQ 104(R12), X4
	 515	MOVQ 112(R12), X5
	 516	MOVQ 120(R12), X6
	 517	MOVQ 128(R12), X7
	 518	MOVQ 136(R12), X8
	 519	MOVQ 144(R12), X9
	 520	MOVQ 152(R12), X10
	 521	MOVQ 160(R12), X11
	 522	MOVQ 168(R12), X12
	 523	MOVQ 176(R12), X13
	 524	MOVQ 184(R12), X14
	 525	RET
	 526#else
	 527// spillArgs stores return values from registers to a pointer in R12.
	 528TEXT ·spillArgs<ABIInternal>(SB),NOSPLIT,$0-0
	 529	RET
	 530
	 531// unspillArgs loads args into registers from a pointer in R12.
	 532TEXT ·unspillArgs<ABIInternal>(SB),NOSPLIT,$0-0
	 533	RET
	 534#endif
	 535
	 536// reflectcall: call a function with the given argument list
	 537// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
	 538// we don't have variable-sized frames, so we use a small number
	 539// of constant-sized-frame functions to encode a few bits of size in the pc.
	 540// Caution: ugly multiline assembly macros in your future!
	 541
	 542#define DISPATCH(NAME,MAXSIZE)		\
	 543	CMPQ	CX, $MAXSIZE;		\
	 544	JA	3(PC);			\
	 545	MOVQ	$NAME(SB), AX;		\
	 546	JMP	AX
	 547// Note: can't just "JMP NAME(SB)" - bad inlining results.
	 548
	 549TEXT ·reflectcall(SB), NOSPLIT, $0-48
	 550	MOVLQZX frameSize+32(FP), CX
	 551	DISPATCH(runtime·call16, 16)
	 552	DISPATCH(runtime·call32, 32)
	 553	DISPATCH(runtime·call64, 64)
	 554	DISPATCH(runtime·call128, 128)
	 555	DISPATCH(runtime·call256, 256)
	 556	DISPATCH(runtime·call512, 512)
	 557	DISPATCH(runtime·call1024, 1024)
	 558	DISPATCH(runtime·call2048, 2048)
	 559	DISPATCH(runtime·call4096, 4096)
	 560	DISPATCH(runtime·call8192, 8192)
	 561	DISPATCH(runtime·call16384, 16384)
	 562	DISPATCH(runtime·call32768, 32768)
	 563	DISPATCH(runtime·call65536, 65536)
	 564	DISPATCH(runtime·call131072, 131072)
	 565	DISPATCH(runtime·call262144, 262144)
	 566	DISPATCH(runtime·call524288, 524288)
	 567	DISPATCH(runtime·call1048576, 1048576)
	 568	DISPATCH(runtime·call2097152, 2097152)
	 569	DISPATCH(runtime·call4194304, 4194304)
	 570	DISPATCH(runtime·call8388608, 8388608)
	 571	DISPATCH(runtime·call16777216, 16777216)
	 572	DISPATCH(runtime·call33554432, 33554432)
	 573	DISPATCH(runtime·call67108864, 67108864)
	 574	DISPATCH(runtime·call134217728, 134217728)
	 575	DISPATCH(runtime·call268435456, 268435456)
	 576	DISPATCH(runtime·call536870912, 536870912)
	 577	DISPATCH(runtime·call1073741824, 1073741824)
	 578	MOVQ	$runtime·badreflectcall(SB), AX
	 579	JMP	AX
	 580
	 581#define CALLFN(NAME,MAXSIZE)			\
	 582TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
	 583	NO_LOCAL_POINTERS;			\
	 584	/* copy arguments to stack */		\
	 585	MOVQ	stackArgs+16(FP), SI;		\
	 586	MOVLQZX stackArgsSize+24(FP), CX;		\
	 587	MOVQ	SP, DI;				\
	 588	REP;MOVSB;				\
	 589	/* set up argument registers */		\
	 590	MOVQ		regArgs+40(FP), R12;		\
	 591	CALL		·unspillArgs<ABIInternal>(SB);		\
	 592	/* call function */			\
	 593	MOVQ	f+8(FP), DX;			\
	 594	PCDATA	$PCDATA_StackMapIndex, $0;	\
	 595	MOVQ	(DX), R12;			\
	 596	CALL	R12;				\
	 597	/* copy register return values back */		\
	 598	MOVQ		regArgs+40(FP), R12;		\
	 599	CALL		·spillArgs<ABIInternal>(SB);		\
	 600	MOVLQZX	stackArgsSize+24(FP), CX;		\
	 601	MOVLQZX	stackRetOffset+28(FP), BX;		\
	 602	MOVQ	stackArgs+16(FP), DI;		\
	 603	MOVQ	stackArgsType+0(FP), DX;		\
	 604	MOVQ	SP, SI;				\
	 605	ADDQ	BX, DI;				\
	 606	ADDQ	BX, SI;				\
	 607	SUBQ	BX, CX;				\
	 608	CALL	callRet<>(SB);			\
	 609	RET
	 610
	 611// callRet copies return values back at the end of call*. This is a
	 612// separate function so it can allocate stack space for the arguments
	 613// to reflectcallmove. It does not follow the Go ABI; it expects its
	 614// arguments in registers.
	 615TEXT callRet<>(SB), NOSPLIT, $40-0
	 616	NO_LOCAL_POINTERS
	 617	MOVQ	DX, 0(SP)
	 618	MOVQ	DI, 8(SP)
	 619	MOVQ	SI, 16(SP)
	 620	MOVQ	CX, 24(SP)
	 621	MOVQ	R12, 32(SP)
	 622	CALL	runtime·reflectcallmove(SB)
	 623	RET
	 624
	 625CALLFN(·call16, 16)
	 626CALLFN(·call32, 32)
	 627CALLFN(·call64, 64)
	 628CALLFN(·call128, 128)
	 629CALLFN(·call256, 256)
	 630CALLFN(·call512, 512)
	 631CALLFN(·call1024, 1024)
	 632CALLFN(·call2048, 2048)
	 633CALLFN(·call4096, 4096)
	 634CALLFN(·call8192, 8192)
	 635CALLFN(·call16384, 16384)
	 636CALLFN(·call32768, 32768)
	 637CALLFN(·call65536, 65536)
	 638CALLFN(·call131072, 131072)
	 639CALLFN(·call262144, 262144)
	 640CALLFN(·call524288, 524288)
	 641CALLFN(·call1048576, 1048576)
	 642CALLFN(·call2097152, 2097152)
	 643CALLFN(·call4194304, 4194304)
	 644CALLFN(·call8388608, 8388608)
	 645CALLFN(·call16777216, 16777216)
	 646CALLFN(·call33554432, 33554432)
	 647CALLFN(·call67108864, 67108864)
	 648CALLFN(·call134217728, 134217728)
	 649CALLFN(·call268435456, 268435456)
	 650CALLFN(·call536870912, 536870912)
	 651CALLFN(·call1073741824, 1073741824)
	 652
	 653TEXT runtime·procyield(SB),NOSPLIT,$0-0
	 654	MOVL	cycles+0(FP), AX
	 655again:
	 656	PAUSE
	 657	SUBL	$1, AX
	 658	JNZ	again
	 659	RET
	 660
	 661
	 662TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
	 663	// Stores are already ordered on x86, so this is just a
	 664	// compile barrier.
	 665	RET
	 666
	 667// func jmpdefer(fv *funcval, argp uintptr)
	 668// argp is a caller SP.
	 669// called from deferreturn.
	 670// 1. pop the caller
	 671// 2. sub 5 bytes from the callers return
	 672// 3. jmp to the argument
	 673TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
	 674	MOVQ	fv+0(FP), DX	// fn
	 675	MOVQ	argp+8(FP), BX	// caller sp
	 676	LEAQ	-8(BX), SP	// caller sp after CALL
	 677	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
	 678	SUBQ	$5, (SP)	// return to CALL again
	 679	MOVQ	0(DX), BX
	 680	JMP	BX	// but first run the deferred function
	 681
	 682// Save state of caller into g->sched,
	 683// but using fake PC from systemstack_switch.
	 684// Must only be called from functions with no locals ($0)
	 685// or else unwinding from systemstack_switch is incorrect.
	 686// Smashes R9.
	 687TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
	 688#ifndef GOEXPERIMENT_regabig
	 689	get_tls(R14)
	 690	MOVQ	g(R14), R14
	 691#endif
	 692	MOVQ	$runtime·systemstack_switch(SB), R9
	 693	MOVQ	R9, (g_sched+gobuf_pc)(R14)
	 694	LEAQ	8(SP), R9
	 695	MOVQ	R9, (g_sched+gobuf_sp)(R14)
	 696	MOVQ	$0, (g_sched+gobuf_ret)(R14)
	 697	MOVQ	BP, (g_sched+gobuf_bp)(R14)
	 698	// Assert ctxt is zero. See func save.
	 699	MOVQ	(g_sched+gobuf_ctxt)(R14), R9
	 700	TESTQ	R9, R9
	 701	JZ	2(PC)
	 702	CALL	runtime·abort(SB)
	 703	RET
	 704
	 705// func asmcgocall_no_g(fn, arg unsafe.Pointer)
	 706// Call fn(arg) aligned appropriately for the gcc ABI.
	 707// Called on a system stack, and there may be no g yet (during needm).
	 708TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
	 709	MOVQ	fn+0(FP), AX
	 710	MOVQ	arg+8(FP), BX
	 711	MOVQ	SP, DX
	 712	SUBQ	$32, SP
	 713	ANDQ	$~15, SP	// alignment
	 714	MOVQ	DX, 8(SP)
	 715	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
	 716	MOVQ	BX, CX		// CX = first argument in Win64
	 717	CALL	AX
	 718	MOVQ	8(SP), DX
	 719	MOVQ	DX, SP
	 720	RET
	 721
	 722// func asmcgocall(fn, arg unsafe.Pointer) int32
	 723// Call fn(arg) on the scheduler stack,
	 724// aligned appropriately for the gcc ABI.
	 725// See cgocall.go for more details.
	 726TEXT ·asmcgocall(SB),NOSPLIT,$0-20
	 727	MOVQ	fn+0(FP), AX
	 728	MOVQ	arg+8(FP), BX
	 729
	 730	MOVQ	SP, DX
	 731
	 732	// Figure out if we need to switch to m->g0 stack.
	 733	// We get called to create new OS threads too, and those
	 734	// come in on the m->g0 stack already.
	 735	get_tls(CX)
	 736	MOVQ	g(CX), R8
	 737	CMPQ	R8, $0
	 738	JEQ	nosave
	 739	MOVQ	g_m(R8), R8
	 740	MOVQ	m_g0(R8), SI
	 741	MOVQ	g(CX), DI
	 742	CMPQ	SI, DI
	 743	JEQ	nosave
	 744	MOVQ	m_gsignal(R8), SI
	 745	CMPQ	SI, DI
	 746	JEQ	nosave
	 747
	 748	// Switch to system stack.
	 749	MOVQ	m_g0(R8), SI
	 750	CALL	gosave_systemstack_switch<>(SB)
	 751	MOVQ	SI, g(CX)
	 752	MOVQ	(g_sched+gobuf_sp)(SI), SP
	 753
	 754	// Now on a scheduling stack (a pthread-created stack).
	 755	// Make sure we have enough room for 4 stack-backed fast-call
	 756	// registers as per windows amd64 calling convention.
	 757	SUBQ	$64, SP
	 758	ANDQ	$~15, SP	// alignment for gcc ABI
	 759	MOVQ	DI, 48(SP)	// save g
	 760	MOVQ	(g_stack+stack_hi)(DI), DI
	 761	SUBQ	DX, DI
	 762	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
	 763	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
	 764	MOVQ	BX, CX		// CX = first argument in Win64
	 765	CALL	AX
	 766
	 767	// Restore registers, g, stack pointer.
	 768	get_tls(CX)
	 769	MOVQ	48(SP), DI
	 770	MOVQ	(g_stack+stack_hi)(DI), SI
	 771	SUBQ	40(SP), SI
	 772	MOVQ	DI, g(CX)
	 773	MOVQ	SI, SP
	 774
	 775	MOVL	AX, ret+16(FP)
	 776	RET
	 777
	 778nosave:
	 779	// Running on a system stack, perhaps even without a g.
	 780	// Having no g can happen during thread creation or thread teardown
	 781	// (see needm/dropm on Solaris, for example).
	 782	// This code is like the above sequence but without saving/restoring g
	 783	// and without worrying about the stack moving out from under us
	 784	// (because we're on a system stack, not a goroutine stack).
	 785	// The above code could be used directly if already on a system stack,
	 786	// but then the only path through this code would be a rare case on Solaris.
	 787	// Using this code for all "already on system stack" calls exercises it more,
	 788	// which should help keep it correct.
	 789	SUBQ	$64, SP
	 790	ANDQ	$~15, SP
	 791	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
	 792	MOVQ	DX, 40(SP)	// save original stack pointer
	 793	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
	 794	MOVQ	BX, CX		// CX = first argument in Win64
	 795	CALL	AX
	 796	MOVQ	40(SP), SI	// restore original stack pointer
	 797	MOVQ	SI, SP
	 798	MOVL	AX, ret+16(FP)
	 799	RET
	 800
	 801#ifdef GOOS_windows
	 802// Dummy TLS that's used on Windows so that we don't crash trying
	 803// to restore the G register in needm. needm and its callees are
	 804// very careful never to actually use the G, the TLS just can't be
	 805// unset since we're in Go code.
	 806GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
	 807#endif
	 808
	 809// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
	 810// See cgocall.go for more details.
	 811TEXT ·cgocallback(SB),NOSPLIT,$24-24
	 812	NO_LOCAL_POINTERS
	 813
	 814	// If g is nil, Go did not create the current thread.
	 815	// Call needm to obtain one m for temporary use.
	 816	// In this case, we're running on the thread stack, so there's
	 817	// lots of space, but the linker doesn't know. Hide the call from
	 818	// the linker analysis by using an indirect call through AX.
	 819	get_tls(CX)
	 820#ifdef GOOS_windows
	 821	MOVL	$0, BX
	 822	CMPQ	CX, $0
	 823	JEQ	2(PC)
	 824#endif
	 825	MOVQ	g(CX), BX
	 826	CMPQ	BX, $0
	 827	JEQ	needm
	 828	MOVQ	g_m(BX), BX
	 829	MOVQ	BX, savedm-8(SP)	// saved copy of oldm
	 830	JMP	havem
	 831needm:
	 832#ifdef GOOS_windows
	 833	// Set up a dummy TLS value. needm is careful not to use it,
	 834	// but it needs to be there to prevent autogenerated code from
	 835	// crashing when it loads from it.
	 836	// We don't need to clear it or anything later because needm
	 837	// will set up TLS properly.
	 838	MOVQ	$zeroTLS<>(SB), DI
	 839	CALL	runtime·settls(SB)
	 840#endif
	 841	// On some platforms (Windows) we cannot call needm through
	 842	// an ABI wrapper because there's no TLS set up, and the ABI
	 843	// wrapper will try to restore the G register (R14) from TLS.
	 844	// Clear X15 because Go expects it and we're not calling
	 845	// through a wrapper, but otherwise avoid setting the G
	 846	// register in the wrapper and call needm directly. It
	 847	// takes no arguments and doesn't return any values so
	 848	// there's no need to handle that. Clear R14 so that there's
	 849	// a bad value in there, in case needm tries to use it.
	 850	XORPS	X15, X15
	 851	XORQ		R14, R14
	 852	MOVQ	$runtime·needm<ABIInternal>(SB), AX
	 853	CALL	AX
	 854	MOVQ	$0, savedm-8(SP) // dropm on return
	 855	get_tls(CX)
	 856	MOVQ	g(CX), BX
	 857	MOVQ	g_m(BX), BX
	 858
	 859	// Set m->sched.sp = SP, so that if a panic happens
	 860	// during the function we are about to execute, it will
	 861	// have a valid SP to run on the g0 stack.
	 862	// The next few lines (after the havem label)
	 863	// will save this SP onto the stack and then write
	 864	// the same SP back to m->sched.sp. That seems redundant,
	 865	// but if an unrecovered panic happens, unwindm will
	 866	// restore the g->sched.sp from the stack location
	 867	// and then systemstack will try to use it. If we don't set it here,
	 868	// that restored SP will be uninitialized (typically 0) and
	 869	// will not be usable.
	 870	MOVQ	m_g0(BX), SI
	 871	MOVQ	SP, (g_sched+gobuf_sp)(SI)
	 872
	 873havem:
	 874	// Now there's a valid m, and we're running on its m->g0.
	 875	// Save current m->g0->sched.sp on stack and then set it to SP.
	 876	// Save current sp in m->g0->sched.sp in preparation for
	 877	// switch back to m->curg stack.
	 878	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
	 879	MOVQ	m_g0(BX), SI
	 880	MOVQ	(g_sched+gobuf_sp)(SI), AX
	 881	MOVQ	AX, 0(SP)
	 882	MOVQ	SP, (g_sched+gobuf_sp)(SI)
	 883
	 884	// Switch to m->curg stack and call runtime.cgocallbackg.
	 885	// Because we are taking over the execution of m->curg
	 886	// but *not* resuming what had been running, we need to
	 887	// save that information (m->curg->sched) so we can restore it.
	 888	// We can restore m->curg->sched.sp easily, because calling
	 889	// runtime.cgocallbackg leaves SP unchanged upon return.
	 890	// To save m->curg->sched.pc, we push it onto the curg stack and
	 891	// open a frame the same size as cgocallback's g0 frame.
	 892	// Once we switch to the curg stack, the pushed PC will appear
	 893	// to be the return PC of cgocallback, so that the traceback
	 894	// will seamlessly trace back into the earlier calls.
	 895	MOVQ	m_curg(BX), SI
	 896	MOVQ	SI, g(CX)
	 897	MOVQ	(g_sched+gobuf_sp)(SI), DI	// prepare stack as DI
	 898	MOVQ	(g_sched+gobuf_pc)(SI), BX
	 899	MOVQ	BX, -8(DI)	// "push" return PC on the g stack
	 900	// Gather our arguments into registers.
	 901	MOVQ	fn+0(FP), BX
	 902	MOVQ	frame+8(FP), CX
	 903	MOVQ	ctxt+16(FP), DX
	 904	// Compute the size of the frame, including return PC and, if
	 905	// GOEXPERIMENT=framepointer, the saved base pointer
	 906	LEAQ	fn+0(FP), AX
	 907	SUBQ	SP, AX	 // AX is our actual frame size
	 908	SUBQ	AX, DI	 // Allocate the same frame size on the g stack
	 909	MOVQ	DI, SP
	 910
	 911	MOVQ	BX, 0(SP)
	 912	MOVQ	CX, 8(SP)
	 913	MOVQ	DX, 16(SP)
	 914	MOVQ	$runtime·cgocallbackg(SB), AX
	 915	CALL	AX	// indirect call to bypass nosplit check. We're on a different stack now.
	 916
	 917	// Compute the size of the frame again. FP and SP have
	 918	// completely different values here than they did above,
	 919	// but only their difference matters.
	 920	LEAQ	fn+0(FP), AX
	 921	SUBQ	SP, AX
	 922
	 923	// Restore g->sched (== m->curg->sched) from saved values.
	 924	get_tls(CX)
	 925	MOVQ	g(CX), SI
	 926	MOVQ	SP, DI
	 927	ADDQ	AX, DI
	 928	MOVQ	-8(DI), BX
	 929	MOVQ	BX, (g_sched+gobuf_pc)(SI)
	 930	MOVQ	DI, (g_sched+gobuf_sp)(SI)
	 931
	 932	// Switch back to m->g0's stack and restore m->g0->sched.sp.
	 933	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
	 934	// so we do not have to restore it.)
	 935	MOVQ	g(CX), BX
	 936	MOVQ	g_m(BX), BX
	 937	MOVQ	m_g0(BX), SI
	 938	MOVQ	SI, g(CX)
	 939	MOVQ	(g_sched+gobuf_sp)(SI), SP
	 940	MOVQ	0(SP), AX
	 941	MOVQ	AX, (g_sched+gobuf_sp)(SI)
	 942
	 943	// If the m on entry was nil, we called needm above to borrow an m
	 944	// for the duration of the call. Since the call is over, return it with dropm.
	 945	MOVQ	savedm-8(SP), BX
	 946	CMPQ	BX, $0
	 947	JNE	done
	 948	MOVQ	$runtime·dropm(SB), AX
	 949	CALL	AX
	 950#ifdef GOOS_windows
	 951	// We need to clear the TLS pointer in case the next
	 952	// thread that comes into Go tries to reuse that space
	 953	// but uses the same M.
	 954	XORQ	DI, DI
	 955	CALL	runtime·settls(SB)
	 956#endif
	 957done:
	 958
	 959	// Done!
	 960	RET
	 961
	 962// func setg(gg *g)
	 963// set g. for use by needm.
	 964TEXT runtime·setg(SB), NOSPLIT, $0-8
	 965	MOVQ	gg+0(FP), BX
	 966	get_tls(CX)
	 967	MOVQ	BX, g(CX)
	 968	RET
	 969
	 970// void setg_gcc(G*); set g called from gcc.
	 971TEXT setg_gcc<>(SB),NOSPLIT,$0
	 972	get_tls(AX)
	 973	MOVQ	DI, g(AX)
	 974	MOVQ	DI, R14 // set the g register
	 975	RET
	 976
	 977TEXT runtime·abort(SB),NOSPLIT,$0-0
	 978	INT	$3
	 979loop:
	 980	JMP	loop
	 981
	 982// check that SP is in range [g->stack.lo, g->stack.hi)
	 983TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
	 984	get_tls(CX)
	 985	MOVQ	g(CX), AX
	 986	CMPQ	(g_stack+stack_hi)(AX), SP
	 987	JHI	2(PC)
	 988	CALL	runtime·abort(SB)
	 989	CMPQ	SP, (g_stack+stack_lo)(AX)
	 990	JHI	2(PC)
	 991	CALL	runtime·abort(SB)
	 992	RET
	 993
	 994// func cputicks() int64
	 995TEXT runtime·cputicks(SB),NOSPLIT,$0-0
	 996	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
	 997	JNE	mfence
	 998	LFENCE
	 999	JMP	done
	1000mfence:
	1001	MFENCE
	1002done:
	1003	RDTSC
	1004	SHLQ	$32, DX
	1005	ADDQ	DX, AX
	1006	MOVQ	AX, ret+0(FP)
	1007	RET
	1008
	1009// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
	1010// hash function using AES hardware instructions
	1011TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
	1012#ifdef GOEXPERIMENT_regabiargs
	1013	// AX = ptr to data
	1014	// BX = seed
	1015	// CX = size
	1016#endif
	1017	CMPB	runtime·useAeshash(SB), $0
	1018	JEQ	noaes
	1019#ifndef GOEXPERIMENT_regabiargs
	1020	MOVQ	p+0(FP), AX	// ptr to data
	1021	MOVQ	s+16(FP), CX	// size
	1022	LEAQ	ret+24(FP), DX
	1023#endif
	1024	JMP	aeshashbody<>(SB)
	1025noaes:
	1026	JMP	runtime·memhashFallback<ABIInternal>(SB)
	1027
	1028// func strhash(p unsafe.Pointer, h uintptr) uintptr
	1029TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
	1030#ifdef GOEXPERIMENT_regabiargs
	1031	// AX = ptr to string struct
	1032	// BX = seed
	1033#endif
	1034	CMPB	runtime·useAeshash(SB), $0
	1035	JEQ	noaes
	1036#ifndef GOEXPERIMENT_regabiargs
	1037	MOVQ	p+0(FP), AX	// ptr to string struct
	1038#endif
	1039	MOVQ	8(AX), CX	// length of string
	1040	MOVQ	(AX), AX	// string data
	1041#ifndef GOEXPERIMENT_regabiargs
	1042	LEAQ	ret+16(FP), DX
	1043#endif
	1044	JMP	aeshashbody<>(SB)
	1045noaes:
	1046	JMP	runtime·strhashFallback<ABIInternal>(SB)
	1047
	1048// AX: data
	1049#ifdef GOEXPERIMENT_regabiargs
	1050// BX: hash seed
	1051#else
	1052// h+8(FP): hash seed
	1053#endif
	1054// CX: length
	1055#ifdef GOEXPERIMENT_regabiargs
	1056// At return: AX = return value
	1057#else
	1058// DX: address to put return value
	1059#endif
	1060TEXT aeshashbody<>(SB),NOSPLIT,$0-0
	1061	// Fill an SSE register with our seeds.
	1062#ifdef GOEXPERIMENT_regabiargs
	1063	MOVQ	BX, X0				// 64 bits of per-table hash seed
	1064#else
	1065	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
	1066#endif
	1067	PINSRW	$4, CX, X0			// 16 bits of length
	1068	PSHUFHW $0, X0, X0			// repeat length 4 times total
	1069	MOVO	X0, X1				// save unscrambled seed
	1070	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
	1071	AESENC	X0, X0				// scramble seed
	1072
	1073	CMPQ	CX, $16
	1074	JB	aes0to15
	1075	JE	aes16
	1076	CMPQ	CX, $32
	1077	JBE	aes17to32
	1078	CMPQ	CX, $64
	1079	JBE	aes33to64
	1080	CMPQ	CX, $128
	1081	JBE	aes65to128
	1082	JMP	aes129plus
	1083
	1084aes0to15:
	1085	TESTQ	CX, CX
	1086	JE	aes0
	1087
	1088	ADDQ	$16, AX
	1089	TESTW	$0xff0, AX
	1090	JE	endofpage
	1091
	1092	// 16 bytes loaded at this address won't cross
	1093	// a page boundary, so we can load it directly.
	1094	MOVOU	-16(AX), X1
	1095	ADDQ	CX, CX
	1096	MOVQ	$masks<>(SB), AX
	1097	PAND	(AX)(CX*8), X1
	1098final1:
	1099	PXOR	X0, X1	// xor data with seed
	1100	AESENC	X1, X1	// scramble combo 3 times
	1101	AESENC	X1, X1
	1102	AESENC	X1, X1
	1103#ifdef GOEXPERIMENT_regabiargs
	1104	MOVQ	X1, AX	// return X1
	1105#else
	1106	MOVQ	X1, (DX)
	1107#endif
	1108	RET
	1109
	1110endofpage:
	1111	// address ends in 1111xxxx. Might be up against
	1112	// a page boundary, so load ending at last byte.
	1113	// Then shift bytes down using pshufb.
	1114	MOVOU	-32(AX)(CX*1), X1
	1115	ADDQ	CX, CX
	1116	MOVQ	$shifts<>(SB), AX
	1117	PSHUFB	(AX)(CX*8), X1
	1118	JMP	final1
	1119
	1120aes0:
	1121	// Return scrambled input seed
	1122	AESENC	X0, X0
	1123#ifdef GOEXPERIMENT_regabiargs
	1124	MOVQ	X0, AX	// return X0
	1125#else
	1126	MOVQ	X0, (DX)
	1127#endif
	1128	RET
	1129
	1130aes16:
	1131	MOVOU	(AX), X1
	1132	JMP	final1
	1133
	1134aes17to32:
	1135	// make second starting seed
	1136	PXOR	runtime·aeskeysched+16(SB), X1
	1137	AESENC	X1, X1
	1138
	1139	// load data to be hashed
	1140	MOVOU	(AX), X2
	1141	MOVOU	-16(AX)(CX*1), X3
	1142
	1143	// xor with seed
	1144	PXOR	X0, X2
	1145	PXOR	X1, X3
	1146
	1147	// scramble 3 times
	1148	AESENC	X2, X2
	1149	AESENC	X3, X3
	1150	AESENC	X2, X2
	1151	AESENC	X3, X3
	1152	AESENC	X2, X2
	1153	AESENC	X3, X3
	1154
	1155	// combine results
	1156	PXOR	X3, X2
	1157#ifdef GOEXPERIMENT_regabiargs
	1158	MOVQ	X2, AX	// return X2
	1159#else
	1160	MOVQ	X2, (DX)
	1161#endif
	1162	RET
	1163
	1164aes33to64:
	1165	// make 3 more starting seeds
	1166	MOVO	X1, X2
	1167	MOVO	X1, X3
	1168	PXOR	runtime·aeskeysched+16(SB), X1
	1169	PXOR	runtime·aeskeysched+32(SB), X2
	1170	PXOR	runtime·aeskeysched+48(SB), X3
	1171	AESENC	X1, X1
	1172	AESENC	X2, X2
	1173	AESENC	X3, X3
	1174
	1175	MOVOU	(AX), X4
	1176	MOVOU	16(AX), X5
	1177	MOVOU	-32(AX)(CX*1), X6
	1178	MOVOU	-16(AX)(CX*1), X7
	1179
	1180	PXOR	X0, X4
	1181	PXOR	X1, X5
	1182	PXOR	X2, X6
	1183	PXOR	X3, X7
	1184
	1185	AESENC	X4, X4
	1186	AESENC	X5, X5
	1187	AESENC	X6, X6
	1188	AESENC	X7, X7
	1189
	1190	AESENC	X4, X4
	1191	AESENC	X5, X5
	1192	AESENC	X6, X6
	1193	AESENC	X7, X7
	1194
	1195	AESENC	X4, X4
	1196	AESENC	X5, X5
	1197	AESENC	X6, X6
	1198	AESENC	X7, X7
	1199
	1200	PXOR	X6, X4
	1201	PXOR	X7, X5
	1202	PXOR	X5, X4
	1203#ifdef GOEXPERIMENT_regabiargs
	1204	MOVQ	X4, AX	// return X4
	1205#else
	1206	MOVQ	X4, (DX)
	1207#endif
	1208	RET
	1209
	1210aes65to128:
	1211	// make 7 more starting seeds
	1212	MOVO	X1, X2
	1213	MOVO	X1, X3
	1214	MOVO	X1, X4
	1215	MOVO	X1, X5
	1216	MOVO	X1, X6
	1217	MOVO	X1, X7
	1218	PXOR	runtime·aeskeysched+16(SB), X1
	1219	PXOR	runtime·aeskeysched+32(SB), X2
	1220	PXOR	runtime·aeskeysched+48(SB), X3
	1221	PXOR	runtime·aeskeysched+64(SB), X4
	1222	PXOR	runtime·aeskeysched+80(SB), X5
	1223	PXOR	runtime·aeskeysched+96(SB), X6
	1224	PXOR	runtime·aeskeysched+112(SB), X7
	1225	AESENC	X1, X1
	1226	AESENC	X2, X2
	1227	AESENC	X3, X3
	1228	AESENC	X4, X4
	1229	AESENC	X5, X5
	1230	AESENC	X6, X6
	1231	AESENC	X7, X7
	1232
	1233	// load data
	1234	MOVOU	(AX), X8
	1235	MOVOU	16(AX), X9
	1236	MOVOU	32(AX), X10
	1237	MOVOU	48(AX), X11
	1238	MOVOU	-64(AX)(CX*1), X12
	1239	MOVOU	-48(AX)(CX*1), X13
	1240	MOVOU	-32(AX)(CX*1), X14
	1241	MOVOU	-16(AX)(CX*1), X15
	1242
	1243	// xor with seed
	1244	PXOR	X0, X8
	1245	PXOR	X1, X9
	1246	PXOR	X2, X10
	1247	PXOR	X3, X11
	1248	PXOR	X4, X12
	1249	PXOR	X5, X13
	1250	PXOR	X6, X14
	1251	PXOR	X7, X15
	1252
	1253	// scramble 3 times
	1254	AESENC	X8, X8
	1255	AESENC	X9, X9
	1256	AESENC	X10, X10
	1257	AESENC	X11, X11
	1258	AESENC	X12, X12
	1259	AESENC	X13, X13
	1260	AESENC	X14, X14
	1261	AESENC	X15, X15
	1262
	1263	AESENC	X8, X8
	1264	AESENC	X9, X9
	1265	AESENC	X10, X10
	1266	AESENC	X11, X11
	1267	AESENC	X12, X12
	1268	AESENC	X13, X13
	1269	AESENC	X14, X14
	1270	AESENC	X15, X15
	1271
	1272	AESENC	X8, X8
	1273	AESENC	X9, X9
	1274	AESENC	X10, X10
	1275	AESENC	X11, X11
	1276	AESENC	X12, X12
	1277	AESENC	X13, X13
	1278	AESENC	X14, X14
	1279	AESENC	X15, X15
	1280
	1281	// combine results
	1282	PXOR	X12, X8
	1283	PXOR	X13, X9
	1284	PXOR	X14, X10
	1285	PXOR	X15, X11
	1286	PXOR	X10, X8
	1287	PXOR	X11, X9
	1288	PXOR	X9, X8
	1289#ifdef GOEXPERIMENT_regabig
	1290	// X15 must be zero on return
	1291	PXOR	X15, X15
	1292#endif
	1293#ifdef GOEXPERIMENT_regabiargs
	1294	MOVQ	X8, AX	// return X8
	1295#else
	1296	MOVQ	X8, (DX)
	1297#endif
	1298	RET
	1299
	1300aes129plus:
	1301	// make 7 more starting seeds
	1302	MOVO	X1, X2
	1303	MOVO	X1, X3
	1304	MOVO	X1, X4
	1305	MOVO	X1, X5
	1306	MOVO	X1, X6
	1307	MOVO	X1, X7
	1308	PXOR	runtime·aeskeysched+16(SB), X1
	1309	PXOR	runtime·aeskeysched+32(SB), X2
	1310	PXOR	runtime·aeskeysched+48(SB), X3
	1311	PXOR	runtime·aeskeysched+64(SB), X4
	1312	PXOR	runtime·aeskeysched+80(SB), X5
	1313	PXOR	runtime·aeskeysched+96(SB), X6
	1314	PXOR	runtime·aeskeysched+112(SB), X7
	1315	AESENC	X1, X1
	1316	AESENC	X2, X2
	1317	AESENC	X3, X3
	1318	AESENC	X4, X4
	1319	AESENC	X5, X5
	1320	AESENC	X6, X6
	1321	AESENC	X7, X7
	1322
	1323	// start with last (possibly overlapping) block
	1324	MOVOU	-128(AX)(CX*1), X8
	1325	MOVOU	-112(AX)(CX*1), X9
	1326	MOVOU	-96(AX)(CX*1), X10
	1327	MOVOU	-80(AX)(CX*1), X11
	1328	MOVOU	-64(AX)(CX*1), X12
	1329	MOVOU	-48(AX)(CX*1), X13
	1330	MOVOU	-32(AX)(CX*1), X14
	1331	MOVOU	-16(AX)(CX*1), X15
	1332
	1333	// xor in seed
	1334	PXOR	X0, X8
	1335	PXOR	X1, X9
	1336	PXOR	X2, X10
	1337	PXOR	X3, X11
	1338	PXOR	X4, X12
	1339	PXOR	X5, X13
	1340	PXOR	X6, X14
	1341	PXOR	X7, X15
	1342
	1343	// compute number of remaining 128-byte blocks
	1344	DECQ	CX
	1345	SHRQ	$7, CX
	1346
	1347aesloop:
	1348	// scramble state
	1349	AESENC	X8, X8
	1350	AESENC	X9, X9
	1351	AESENC	X10, X10
	1352	AESENC	X11, X11
	1353	AESENC	X12, X12
	1354	AESENC	X13, X13
	1355	AESENC	X14, X14
	1356	AESENC	X15, X15
	1357
	1358	// scramble state, xor in a block
	1359	MOVOU	(AX), X0
	1360	MOVOU	16(AX), X1
	1361	MOVOU	32(AX), X2
	1362	MOVOU	48(AX), X3
	1363	AESENC	X0, X8
	1364	AESENC	X1, X9
	1365	AESENC	X2, X10
	1366	AESENC	X3, X11
	1367	MOVOU	64(AX), X4
	1368	MOVOU	80(AX), X5
	1369	MOVOU	96(AX), X6
	1370	MOVOU	112(AX), X7
	1371	AESENC	X4, X12
	1372	AESENC	X5, X13
	1373	AESENC	X6, X14
	1374	AESENC	X7, X15
	1375
	1376	ADDQ	$128, AX
	1377	DECQ	CX
	1378	JNE	aesloop
	1379
	1380	// 3 more scrambles to finish
	1381	AESENC	X8, X8
	1382	AESENC	X9, X9
	1383	AESENC	X10, X10
	1384	AESENC	X11, X11
	1385	AESENC	X12, X12
	1386	AESENC	X13, X13
	1387	AESENC	X14, X14
	1388	AESENC	X15, X15
	1389	AESENC	X8, X8
	1390	AESENC	X9, X9
	1391	AESENC	X10, X10
	1392	AESENC	X11, X11
	1393	AESENC	X12, X12
	1394	AESENC	X13, X13
	1395	AESENC	X14, X14
	1396	AESENC	X15, X15
	1397	AESENC	X8, X8
	1398	AESENC	X9, X9
	1399	AESENC	X10, X10
	1400	AESENC	X11, X11
	1401	AESENC	X12, X12
	1402	AESENC	X13, X13
	1403	AESENC	X14, X14
	1404	AESENC	X15, X15
	1405
	1406	PXOR	X12, X8
	1407	PXOR	X13, X9
	1408	PXOR	X14, X10
	1409	PXOR	X15, X11
	1410	PXOR	X10, X8
	1411	PXOR	X11, X9
	1412	PXOR	X9, X8
	1413#ifdef GOEXPERIMENT_regabig
	1414	// X15 must be zero on return
	1415	PXOR	X15, X15
	1416#endif
	1417#ifdef GOEXPERIMENT_regabiargs
	1418	MOVQ	X8, AX	// return X8
	1419#else
	1420	MOVQ	X8, (DX)
	1421#endif
	1422	RET
	1423
	1424// func memhash32(p unsafe.Pointer, h uintptr) uintptr
	1425// ABIInternal for performance.
	1426TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
	1427#ifdef GOEXPERIMENT_regabiargs
	1428	// AX = ptr to data
	1429	// BX = seed
	1430#endif
	1431	CMPB	runtime·useAeshash(SB), $0
	1432	JEQ	noaes
	1433#ifdef GOEXPERIMENT_regabiargs
	1434	MOVQ	BX, X0	// X0 = seed
	1435#else
	1436	MOVQ	p+0(FP), AX	// ptr to data
	1437	MOVQ	h+8(FP), X0	// seed
	1438#endif
	1439	PINSRD	$2, (AX), X0	// data
	1440	AESENC	runtime·aeskeysched+0(SB), X0
	1441	AESENC	runtime·aeskeysched+16(SB), X0
	1442	AESENC	runtime·aeskeysched+32(SB), X0
	1443#ifdef GOEXPERIMENT_regabiargs
	1444	MOVQ	X0, AX	// return X0
	1445#else
	1446	MOVQ	X0, ret+16(FP)
	1447#endif
	1448	RET
	1449noaes:
	1450	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
	1451
	1452// func memhash64(p unsafe.Pointer, h uintptr) uintptr
	1453// ABIInternal for performance.
	1454TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
	1455#ifdef GOEXPERIMENT_regabiargs
	1456	// AX = ptr to data
	1457	// BX = seed
	1458#else
	1459#endif
	1460	CMPB	runtime·useAeshash(SB), $0
	1461	JEQ	noaes
	1462#ifdef GOEXPERIMENT_regabiargs
	1463	MOVQ	BX, X0	// X0 = seed
	1464#else
	1465	MOVQ	p+0(FP), AX	// ptr to data
	1466	MOVQ	h+8(FP), X0	// seed
	1467#endif
	1468	PINSRQ	$1, (AX), X0	// data
	1469	AESENC	runtime·aeskeysched+0(SB), X0
	1470	AESENC	runtime·aeskeysched+16(SB), X0
	1471	AESENC	runtime·aeskeysched+32(SB), X0
	1472#ifdef GOEXPERIMENT_regabiargs
	1473	MOVQ	X0, AX	// return X0
	1474#else
	1475	MOVQ	X0, ret+16(FP)
	1476#endif
	1477	RET
	1478noaes:
	1479	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
	1480
	1481// simple mask to get rid of data in the high part of the register.
	1482DATA masks<>+0x00(SB)/8, $0x0000000000000000
	1483DATA masks<>+0x08(SB)/8, $0x0000000000000000
	1484DATA masks<>+0x10(SB)/8, $0x00000000000000ff
	1485DATA masks<>+0x18(SB)/8, $0x0000000000000000
	1486DATA masks<>+0x20(SB)/8, $0x000000000000ffff
	1487DATA masks<>+0x28(SB)/8, $0x0000000000000000
	1488DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
	1489DATA masks<>+0x38(SB)/8, $0x0000000000000000
	1490DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
	1491DATA masks<>+0x48(SB)/8, $0x0000000000000000
	1492DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
	1493DATA masks<>+0x58(SB)/8, $0x0000000000000000
	1494DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
	1495DATA masks<>+0x68(SB)/8, $0x0000000000000000
	1496DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
	1497DATA masks<>+0x78(SB)/8, $0x0000000000000000
	1498DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
	1499DATA masks<>+0x88(SB)/8, $0x0000000000000000
	1500DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
	1501DATA masks<>+0x98(SB)/8, $0x00000000000000ff
	1502DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
	1503DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
	1504DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
	1505DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
	1506DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
	1507DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
	1508DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
	1509DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
	1510DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
	1511DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
	1512DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
	1513DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
	1514GLOBL masks<>(SB),RODATA,$256
	1515
	1516// func checkASM() bool
	1517TEXT ·checkASM(SB),NOSPLIT,$0-1
	1518	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
	1519	MOVQ	$masks<>(SB), AX
	1520	MOVQ	$shifts<>(SB), BX
	1521	ORQ	BX, AX
	1522	TESTQ	$15, AX
	1523	SETEQ	ret+0(FP)
	1524	RET
	1525
	1526// these are arguments to pshufb. They move data down from
	1527// the high bytes of the register to the low bytes of the register.
	1528// index is how many bytes to move.
	1529DATA shifts<>+0x00(SB)/8, $0x0000000000000000
	1530DATA shifts<>+0x08(SB)/8, $0x0000000000000000
	1531DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
	1532DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
	1533DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
	1534DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
	1535DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
	1536DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
	1537DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
	1538DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
	1539DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
	1540DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
	1541DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
	1542DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
	1543DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
	1544DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
	1545DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
	1546DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
	1547DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
	1548DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
	1549DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
	1550DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
	1551DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
	1552DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
	1553DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
	1554DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
	1555DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
	1556DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
	1557DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
	1558DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
	1559DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
	1560DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
	1561GLOBL shifts<>(SB),RODATA,$256
	1562
	1563TEXT runtime·return0(SB), NOSPLIT, $0
	1564	MOVL	$0, AX
	1565	RET
	1566
	1567
	1568// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
	1569// Must obey the gcc calling convention.
	1570TEXT _cgo_topofstack(SB),NOSPLIT,$0
	1571	get_tls(CX)
	1572	MOVQ	g(CX), AX
	1573	MOVQ	g_m(AX), AX
	1574	MOVQ	m_curg(AX), AX
	1575	MOVQ	(g_stack+stack_hi)(AX), AX
	1576	RET
	1577
	1578// The top-most function running on a goroutine
	1579// returns to goexit+PCQuantum.
	1580TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
	1581	BYTE	$0x90	// NOP
	1582	CALL	runtime·goexit1(SB)	// does not return
	1583	// traceback from goexit1 must hit code range of goexit
	1584	BYTE	$0x90	// NOP
	1585
	1586// This is called from .init_array and follows the platform, not Go, ABI.
	1587TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
	1588	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
	1589	MOVQ	runtime·lastmoduledatap(SB), AX
	1590	MOVQ	DI, moduledata_next(AX)
	1591	MOVQ	DI, runtime·lastmoduledatap(SB)
	1592	POPQ	R15
	1593	RET
	1594
	1595// Initialize special registers then jump to sigpanic.
	1596// This function is injected from the signal handler for panicking
	1597// signals. It is quite painful to set X15 in the signal context,
	1598// so we do it here.
	1599TEXT ·sigpanic0<ABIInternal>(SB),NOSPLIT,$0-0
	1600#ifdef GOEXPERIMENT_regabig
	1601	get_tls(R14)
	1602	MOVQ	g(R14), R14
	1603	XORPS	X15, X15
	1604#endif
	1605	JMP	·sigpanic<ABIInternal>(SB)
	1606
	1607// gcWriteBarrier performs a heap pointer write and informs the GC.
	1608//
	1609// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
	1610// - DI is the destination of the write
	1611// - AX is the value being written at DI
	1612// It clobbers FLAGS. It does not clobber any general-purpose registers,
	1613// but may clobber others (e.g., SSE registers).
	1614// Defined as ABIInternal since it does not use the stack-based Go ABI.
	1615TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
	1616	// Save the registers clobbered by the fast path. This is slightly
	1617	// faster than having the caller spill these.
	1618	MOVQ	R12, 96(SP)
	1619	MOVQ	R13, 104(SP)
	1620	// TODO: Consider passing g.m.p in as an argument so they can be shared
	1621	// across a sequence of write barriers.
	1622#ifdef GOEXPERIMENT_regabig
	1623	MOVQ	g_m(R14), R13
	1624#else
	1625	get_tls(R13)
	1626	MOVQ	g(R13), R13
	1627	MOVQ	g_m(R13), R13
	1628#endif
	1629	MOVQ	m_p(R13), R13
	1630	MOVQ	(p_wbBuf+wbBuf_next)(R13), R12
	1631	// Increment wbBuf.next position.
	1632	LEAQ	16(R12), R12
	1633	MOVQ	R12, (p_wbBuf+wbBuf_next)(R13)
	1634	CMPQ	R12, (p_wbBuf+wbBuf_end)(R13)
	1635	// Record the write.
	1636	MOVQ	AX, -16(R12)	// Record value
	1637	// Note: This turns bad pointer writes into bad
	1638	// pointer reads, which could be confusing. We could avoid
	1639	// reading from obviously bad pointers, which would
	1640	// take care of the vast majority of these. We could
	1641	// patch this up in the signal handler, or use XCHG to
	1642	// combine the read and the write.
	1643	MOVQ	(DI), R13
	1644	MOVQ	R13, -8(R12)	// Record *slot
	1645	// Is the buffer full? (flags set in CMPQ above)
	1646	JEQ	flush
	1647ret:
	1648	MOVQ	96(SP), R12
	1649	MOVQ	104(SP), R13
	1650	// Do the write.
	1651	MOVQ	AX, (DI)
	1652	RET
	1653
	1654flush:
	1655	// Save all general purpose registers since these could be
	1656	// clobbered by wbBufFlush and were not saved by the caller.
	1657	// It is possible for wbBufFlush to clobber other registers
	1658	// (e.g., SSE registers), but the compiler takes care of saving
	1659	// those in the caller if necessary. This strikes a balance
	1660	// with registers that are likely to be used.
	1661	//
	1662	// We don't have type information for these, but all code under
	1663	// here is NOSPLIT, so nothing will observe these.
	1664	//
	1665	// TODO: We could strike a different balance; e.g., saving X0
	1666	// and not saving GP registers that are less likely to be used.
	1667	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
	1668	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
	1669	MOVQ	BX, 16(SP)
	1670	MOVQ	CX, 24(SP)
	1671	MOVQ	DX, 32(SP)
	1672	// DI already saved
	1673	MOVQ	SI, 40(SP)
	1674	MOVQ	BP, 48(SP)
	1675	MOVQ	R8, 56(SP)
	1676	MOVQ	R9, 64(SP)
	1677	MOVQ	R10, 72(SP)
	1678	MOVQ	R11, 80(SP)
	1679	// R12 already saved
	1680	// R13 already saved
	1681	// R14 is g
	1682	MOVQ	R15, 88(SP)
	1683
	1684	// This takes arguments DI and AX
	1685	CALL	runtime·wbBufFlush(SB)
	1686
	1687	MOVQ	0(SP), DI
	1688	MOVQ	8(SP), AX
	1689	MOVQ	16(SP), BX
	1690	MOVQ	24(SP), CX
	1691	MOVQ	32(SP), DX
	1692	MOVQ	40(SP), SI
	1693	MOVQ	48(SP), BP
	1694	MOVQ	56(SP), R8
	1695	MOVQ	64(SP), R9
	1696	MOVQ	72(SP), R10
	1697	MOVQ	80(SP), R11
	1698	MOVQ	88(SP), R15
	1699	JMP	ret
	1700
	1701// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
	1702// Defined as ABIInternal since it does not use the stable Go ABI.
	1703TEXT runtime·gcWriteBarrierCX<ABIInternal>(SB),NOSPLIT,$0
	1704	XCHGQ CX, AX
	1705	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
	1706	XCHGQ CX, AX
	1707	RET
	1708
	1709// gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX.
	1710// Defined as ABIInternal since it does not use the stable Go ABI.
	1711TEXT runtime·gcWriteBarrierDX<ABIInternal>(SB),NOSPLIT,$0
	1712	XCHGQ DX, AX
	1713	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
	1714	XCHGQ DX, AX
	1715	RET
	1716
	1717// gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX.
	1718// Defined as ABIInternal since it does not use the stable Go ABI.
	1719TEXT runtime·gcWriteBarrierBX<ABIInternal>(SB),NOSPLIT,$0
	1720	XCHGQ BX, AX
	1721	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
	1722	XCHGQ BX, AX
	1723	RET
	1724
	1725// gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP.
	1726// Defined as ABIInternal since it does not use the stable Go ABI.
	1727TEXT runtime·gcWriteBarrierBP<ABIInternal>(SB),NOSPLIT,$0
	1728	XCHGQ BP, AX
	1729	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
	1730	XCHGQ BP, AX
	1731	RET
	1732
	1733// gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI.
	1734// Defined as ABIInternal since it does not use the stable Go ABI.
	1735TEXT runtime·gcWriteBarrierSI<ABIInternal>(SB),NOSPLIT,$0
	1736	XCHGQ SI, AX
	1737	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
	1738	XCHGQ SI, AX
	1739	RET
	1740
	1741// gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8.
	1742// Defined as ABIInternal since it does not use the stable Go ABI.
	1743TEXT runtime·gcWriteBarrierR8<ABIInternal>(SB),NOSPLIT,$0
	1744	XCHGQ R8, AX
	1745	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
	1746	XCHGQ R8, AX
	1747	RET
	1748
	1749// gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9.
	1750// Defined as ABIInternal since it does not use the stable Go ABI.
	1751TEXT runtime·gcWriteBarrierR9<ABIInternal>(SB),NOSPLIT,$0
	1752	XCHGQ R9, AX
	1753	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
	1754	XCHGQ R9, AX
	1755	RET
	1756
	1757DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
	1758GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
	1759
	1760// debugCallV2 is the entry point for debugger-injected function
	1761// calls on running goroutines. It informs the runtime that a
	1762// debug call has been injected and creates a call frame for the
	1763// debugger to fill in.
	1764//
	1765// To inject a function call, a debugger should:
	1766// 1. Check that the goroutine is in state _Grunning and that
	1767//		there are at least 256 bytes free on the stack.
	1768// 2. Push the current PC on the stack (updating SP).
	1769// 3. Write the desired argument frame size at SP-16 (using the SP
	1770//		after step 2).
	1771// 4. Save all machine registers (including flags and XMM reigsters)
	1772//		so they can be restored later by the debugger.
	1773// 5. Set the PC to debugCallV2 and resume execution.
	1774//
	1775// If the goroutine is in state _Grunnable, then it's not generally
	1776// safe to inject a call because it may return out via other runtime
	1777// operations. Instead, the debugger should unwind the stack to find
	1778// the return to non-runtime code, add a temporary breakpoint there,
	1779// and inject the call once that breakpoint is hit.
	1780//
	1781// If the goroutine is in any other state, it's not safe to inject a call.
	1782//
	1783// This function communicates back to the debugger by setting R12 and
	1784// invoking INT3 to raise a breakpoint signal. See the comments in the
	1785// implementation for the protocol the debugger is expected to
	1786// follow. InjectDebugCall in the runtime tests demonstrates this protocol.
	1787//
	1788// The debugger must ensure that any pointers passed to the function
	1789// obey escape analysis requirements. Specifically, it must not pass
	1790// a stack pointer to an escaping argument. debugCallV2 cannot check
	1791// this invariant.
	1792//
	1793// This is ABIInternal because Go code injects its PC directly into new
	1794// goroutine stacks.
	1795TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
	1796	// Save all registers that may contain pointers so they can be
	1797	// conservatively scanned.
	1798	//
	1799	// We can't do anything that might clobber any of these
	1800	// registers before this.
	1801	MOVQ	R15, r15-(14*8+8)(SP)
	1802	MOVQ	R14, r14-(13*8+8)(SP)
	1803	MOVQ	R13, r13-(12*8+8)(SP)
	1804	MOVQ	R12, r12-(11*8+8)(SP)
	1805	MOVQ	R11, r11-(10*8+8)(SP)
	1806	MOVQ	R10, r10-(9*8+8)(SP)
	1807	MOVQ	R9, r9-(8*8+8)(SP)
	1808	MOVQ	R8, r8-(7*8+8)(SP)
	1809	MOVQ	DI, di-(6*8+8)(SP)
	1810	MOVQ	SI, si-(5*8+8)(SP)
	1811	MOVQ	BP, bp-(4*8+8)(SP)
	1812	MOVQ	BX, bx-(3*8+8)(SP)
	1813	MOVQ	DX, dx-(2*8+8)(SP)
	1814	// Save the frame size before we clobber it. Either of the last
	1815	// saves could clobber this depending on whether there's a saved BP.
	1816	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
	1817	MOVQ	CX, cx-(1*8+8)(SP)
	1818	MOVQ	AX, ax-(0*8+8)(SP)
	1819
	1820	// Save the argument frame size.
	1821	MOVQ	DX, frameSize-128(SP)
	1822
	1823	// Perform a safe-point check.
	1824	MOVQ	retpc-8(FP), AX	// Caller's PC
	1825	MOVQ	AX, 0(SP)
	1826	CALL	runtime·debugCallCheck(SB)
	1827	MOVQ	8(SP), AX
	1828	TESTQ	AX, AX
	1829	JZ	good
	1830	// The safety check failed. Put the reason string at the top
	1831	// of the stack.
	1832	MOVQ	AX, 0(SP)
	1833	MOVQ	16(SP), AX
	1834	MOVQ	AX, 8(SP)
	1835	// Set R12 to 8 and invoke INT3. The debugger should get the
	1836	// reason a call can't be injected from the top of the stack
	1837	// and resume execution.
	1838	MOVQ	$8, R12
	1839	BYTE	$0xcc
	1840	JMP	restore
	1841
	1842good:
	1843	// Registers are saved and it's safe to make a call.
	1844	// Open up a call frame, moving the stack if necessary.
	1845	//
	1846	// Once the frame is allocated, this will set R12 to 0 and
	1847	// invoke INT3. The debugger should write the argument
	1848	// frame for the call at SP, set up argument registers, push
	1849	// the trapping PC on the stack, set the PC to the function to
	1850	// call, set RDX to point to the closure (if a closure call),
	1851	// and resume execution.
	1852	//
	1853	// If the function returns, this will set R12 to 1 and invoke
	1854	// INT3. The debugger can then inspect any return value saved
	1855	// on the stack at SP and in registers and resume execution again.
	1856	//
	1857	// If the function panics, this will set R12 to 2 and invoke INT3.
	1858	// The interface{} value of the panic will be at SP. The debugger
	1859	// can inspect the panic value and resume execution again.
	1860#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
	1861	CMPQ	AX, $MAXSIZE;			\
	1862	JA	5(PC);				\
	1863	MOVQ	$NAME(SB), AX;			\
	1864	MOVQ	AX, 0(SP);			\
	1865	CALL	runtime·debugCallWrap(SB);	\
	1866	JMP	restore
	1867
	1868	MOVQ	frameSize-128(SP), AX
	1869	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
	1870	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
	1871	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
	1872	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
	1873	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
	1874	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
	1875	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
	1876	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
	1877	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
	1878	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
	1879	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
	1880	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
	1881	// The frame size is too large. Report the error.
	1882	MOVQ	$debugCallFrameTooLarge<>(SB), AX
	1883	MOVQ	AX, 0(SP)
	1884	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
	1885	MOVQ	$8, R12
	1886	BYTE	$0xcc
	1887	JMP	restore
	1888
	1889restore:
	1890	// Calls and failures resume here.
	1891	//
	1892	// Set R12 to 16 and invoke INT3. The debugger should restore
	1893	// all registers except RIP and RSP and resume execution.
	1894	MOVQ	$16, R12
	1895	BYTE	$0xcc
	1896	// We must not modify flags after this point.
	1897
	1898	// Restore pointer-containing registers, which may have been
	1899	// modified from the debugger's copy by stack copying.
	1900	MOVQ	ax-(0*8+8)(SP), AX
	1901	MOVQ	cx-(1*8+8)(SP), CX
	1902	MOVQ	dx-(2*8+8)(SP), DX
	1903	MOVQ	bx-(3*8+8)(SP), BX
	1904	MOVQ	bp-(4*8+8)(SP), BP
	1905	MOVQ	si-(5*8+8)(SP), SI
	1906	MOVQ	di-(6*8+8)(SP), DI
	1907	MOVQ	r8-(7*8+8)(SP), R8
	1908	MOVQ	r9-(8*8+8)(SP), R9
	1909	MOVQ	r10-(9*8+8)(SP), R10
	1910	MOVQ	r11-(10*8+8)(SP), R11
	1911	MOVQ	r12-(11*8+8)(SP), R12
	1912	MOVQ	r13-(12*8+8)(SP), R13
	1913	MOVQ	r14-(13*8+8)(SP), R14
	1914	MOVQ	r15-(14*8+8)(SP), R15
	1915
	1916	RET
	1917
	1918// runtime.debugCallCheck assumes that functions defined with the
	1919// DEBUG_CALL_FN macro are safe points to inject calls.
	1920#define DEBUG_CALL_FN(NAME,MAXSIZE)		\
	1921TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
	1922	NO_LOCAL_POINTERS;			\
	1923	MOVQ	$0, R12;				\
	1924	BYTE	$0xcc;				\
	1925	MOVQ	$1, R12;				\
	1926	BYTE	$0xcc;				\
	1927	RET
	1928DEBUG_CALL_FN(debugCall32<>, 32)
	1929DEBUG_CALL_FN(debugCall64<>, 64)
	1930DEBUG_CALL_FN(debugCall128<>, 128)
	1931DEBUG_CALL_FN(debugCall256<>, 256)
	1932DEBUG_CALL_FN(debugCall512<>, 512)
	1933DEBUG_CALL_FN(debugCall1024<>, 1024)
	1934DEBUG_CALL_FN(debugCall2048<>, 2048)
	1935DEBUG_CALL_FN(debugCall4096<>, 4096)
	1936DEBUG_CALL_FN(debugCall8192<>, 8192)
	1937DEBUG_CALL_FN(debugCall16384<>, 16384)
	1938DEBUG_CALL_FN(debugCall32768<>, 32768)
	1939DEBUG_CALL_FN(debugCall65536<>, 65536)
	1940
	1941// func debugCallPanicked(val interface{})
	1942TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
	1943	// Copy the panic value to the top of stack.
	1944	MOVQ	val_type+0(FP), AX
	1945	MOVQ	AX, 0(SP)
	1946	MOVQ	val_data+8(FP), AX
	1947	MOVQ	AX, 8(SP)
	1948	MOVQ	$2, R12
	1949	BYTE	$0xcc
	1950	RET
	1951
	1952// Note: these functions use a special calling convention to save generated code space.
	1953// Arguments are passed in registers, but the space for those arguments are allocated
	1954// in the caller's stack frame. These stubs write the args into that stack space and
	1955// then tail call to the corresponding runtime handler.
	1956// The tail call makes these stubs disappear in backtraces.
	1957// Defined as ABIInternal since they do not use the stack-based Go ABI.
	1958TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
	1959#ifdef GOEXPERIMENT_regabiargs
	1960	MOVQ	CX, BX
	1961#else
	1962	MOVQ	AX, x+0(FP)
	1963	MOVQ	CX, y+8(FP)
	1964#endif
	1965	JMP	runtime·goPanicIndex<ABIInternal>(SB)
	1966TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
	1967#ifdef GOEXPERIMENT_regabiargs
	1968	MOVQ	CX, BX
	1969#else
	1970	MOVQ	AX, x+0(FP)
	1971	MOVQ	CX, y+8(FP)
	1972#endif
	1973	JMP	runtime·goPanicIndexU<ABIInternal>(SB)
	1974TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
	1975#ifdef GOEXPERIMENT_regabiargs
	1976	MOVQ	CX, AX
	1977	MOVQ	DX, BX
	1978#else
	1979	MOVQ	CX, x+0(FP)
	1980	MOVQ	DX, y+8(FP)
	1981#endif
	1982	JMP	runtime·goPanicSliceAlen<ABIInternal>(SB)
	1983TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
	1984#ifdef GOEXPERIMENT_regabiargs
	1985	MOVQ	CX, AX
	1986	MOVQ	DX, BX
	1987#else
	1988	MOVQ	CX, x+0(FP)
	1989	MOVQ	DX, y+8(FP)
	1990#endif
	1991	JMP	runtime·goPanicSliceAlenU<ABIInternal>(SB)
	1992TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
	1993#ifdef GOEXPERIMENT_regabiargs
	1994	MOVQ	CX, AX
	1995	MOVQ	DX, BX
	1996#else
	1997	MOVQ	CX, x+0(FP)
	1998	MOVQ	DX, y+8(FP)
	1999#endif
	2000	JMP	runtime·goPanicSliceAcap<ABIInternal>(SB)
	2001TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
	2002#ifdef GOEXPERIMENT_regabiargs
	2003	MOVQ	CX, AX
	2004	MOVQ	DX, BX
	2005#else
	2006	MOVQ	CX, x+0(FP)
	2007	MOVQ	DX, y+8(FP)
	2008#endif
	2009	JMP	runtime·goPanicSliceAcapU<ABIInternal>(SB)
	2010TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
	2011#ifdef GOEXPERIMENT_regabiargs
	2012	MOVQ	CX, BX
	2013#else
	2014	MOVQ	AX, x+0(FP)
	2015	MOVQ	CX, y+8(FP)
	2016#endif
	2017	JMP	runtime·goPanicSliceB<ABIInternal>(SB)
	2018TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
	2019#ifdef GOEXPERIMENT_regabiargs
	2020	MOVQ	CX, BX
	2021#else
	2022	MOVQ	AX, x+0(FP)
	2023	MOVQ	CX, y+8(FP)
	2024#endif
	2025	JMP	runtime·goPanicSliceBU<ABIInternal>(SB)
	2026TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
	2027#ifdef GOEXPERIMENT_regabiargs
	2028	MOVQ	DX, AX
	2029#else
	2030	MOVQ	DX, x+0(FP)
	2031	MOVQ	BX, y+8(FP)
	2032#endif
	2033	JMP	runtime·goPanicSlice3Alen<ABIInternal>(SB)
	2034TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
	2035#ifdef GOEXPERIMENT_regabiargs
	2036	MOVQ	DX, AX
	2037#else
	2038	MOVQ	DX, x+0(FP)
	2039	MOVQ	BX, y+8(FP)
	2040#endif
	2041	JMP	runtime·goPanicSlice3AlenU<ABIInternal>(SB)
	2042TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
	2043#ifdef GOEXPERIMENT_regabiargs
	2044	MOVQ	DX, AX
	2045#else
	2046	MOVQ	DX, x+0(FP)
	2047	MOVQ	BX, y+8(FP)
	2048#endif
	2049	JMP	runtime·goPanicSlice3Acap<ABIInternal>(SB)
	2050TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
	2051#ifdef GOEXPERIMENT_regabiargs
	2052	MOVQ	DX, AX
	2053#else
	2054	MOVQ	DX, x+0(FP)
	2055	MOVQ	BX, y+8(FP)
	2056#endif
	2057	JMP	runtime·goPanicSlice3AcapU<ABIInternal>(SB)
	2058TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
	2059#ifdef GOEXPERIMENT_regabiargs
	2060	MOVQ	CX, AX
	2061	MOVQ	DX, BX
	2062#else
	2063	MOVQ	CX, x+0(FP)
	2064	MOVQ	DX, y+8(FP)
	2065#endif
	2066	JMP	runtime·goPanicSlice3B<ABIInternal>(SB)
	2067TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
	2068#ifdef GOEXPERIMENT_regabiargs
	2069	MOVQ	CX, AX
	2070	MOVQ	DX, BX
	2071#else
	2072	MOVQ	CX, x+0(FP)
	2073	MOVQ	DX, y+8(FP)
	2074#endif
	2075	JMP	runtime·goPanicSlice3BU<ABIInternal>(SB)
	2076TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
	2077#ifdef GOEXPERIMENT_regabiargs
	2078	MOVQ	CX, BX
	2079#else
	2080	MOVQ	AX, x+0(FP)
	2081	MOVQ	CX, y+8(FP)
	2082#endif
	2083	JMP	runtime·goPanicSlice3C<ABIInternal>(SB)
	2084TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
	2085#ifdef GOEXPERIMENT_regabiargs
	2086	MOVQ	CX, BX
	2087#else
	2088	MOVQ	AX, x+0(FP)
	2089	MOVQ	CX, y+8(FP)
	2090#endif
	2091	JMP	runtime·goPanicSlice3CU<ABIInternal>(SB)
	2092TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
	2093#ifdef GOEXPERIMENT_regabiargs
	2094	MOVQ	DX, AX
	2095#else
	2096	MOVQ	DX, x+0(FP)
	2097	MOVQ	BX, y+8(FP)
	2098#endif
	2099	JMP	runtime·goPanicSliceConvert<ABIInternal>(SB)
	2100
	2101#ifdef GOOS_android
	2102// Use the free TLS_SLOT_APP slot #2 on Android Q.
	2103// Earlier androids are set up in gcc_android.c.
	2104DATA runtime·tls_g+0(SB)/8, $16
	2105GLOBL runtime·tls_g+0(SB), NOPTR, $8
	2106#endif
	2107
	2108// The compiler and assembler's -spectre=ret mode rewrites
	2109// all indirect CALL AX / JMP AX instructions to be
	2110// CALL retpolineAX / JMP retpolineAX.
	2111// See https://support.google.com/faqs/answer/7625886.
	2112#define RETPOLINE(reg) \
	2113	/*	 CALL setup */		 BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0;	\
	2114	/* nospec: */									\
	2115	/*	 PAUSE */					 BYTE $0xF3; BYTE $0x90;					\
	2116	/*	 JMP nospec */			BYTE $0xEB; BYTE $-(2+2);				\
	2117	/* setup: */									\
	2118	/*	 MOVQ AX, 0(SP) */	BYTE $0x48|((reg&8)>>1); BYTE $0x89;			\
	2119													BYTE $0x04|((reg&7)<<3); BYTE $0x24;			\
	2120	/*	 RET */						 BYTE $0xC3
	2121
	2122TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0)
	2123TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1)
	2124TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2)
	2125TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3)
	2126/* SP is 4, can't happen / magic encodings */
	2127TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5)
	2128TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6)
	2129TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7)
	2130TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8)
	2131TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9)
	2132TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10)
	2133TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11)
	2134TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12)
	2135TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13)
	2136TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14)
	2137TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)

View as plain text