Text file
src/runtime/asm_amd64.s
Documentation: runtime
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "go_tls.h"
7#include "funcdata.h"
8#include "textflag.h"
9#include "cgo/abi_amd64.h"
10
11// _rt0_amd64 is common startup code for most amd64 systems when using
12// internal linking. This is the entry point for the program from the
13// kernel for an ordinary -buildmode=exe program. The stack holds the
14// number of arguments and the C-style argv.
15TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20// main is common startup code for most amd64 systems when using
21// external linking. The C startup code will call the symbol "main"
22// passing argc and argv in the usual C ABI registers DI and SI.
23TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26// _rt0_amd64_lib is common startup code for most amd64 systems when
27// using -buildmode=c-archive or -buildmode=c-shared. The linker will
28// arrange to invoke this function as a global constructor (for
29// c-archive) or when the shared library is loaded (for c-shared).
30// We expect argc and argv to be passed in the usual C ABI registers
31// DI and SI.
32TEXT _rt0_amd64_lib(SB),NOSPLIT,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 // Create a new thread to finish Go runtime initialization.
43 MOVQ _cgo_sys_thread_create(SB), AX
44 TESTQ AX, AX
45 JZ nocgo
46
47 // We're calling back to C.
48 // Align stack per ELF ABI requirements.
49 MOVQ SP, BX // Callee-save in C ABI
50 ANDQ $~15, SP
51 MOVQ $_rt0_amd64_lib_go(SB), DI
52 MOVQ $0, SI
53 CALL AX
54 MOVQ BX, SP
55 JMP restore
56
57nocgo:
58 ADJSP $16
59 MOVQ $0x800000, 0(SP) // stacksize
60 MOVQ $_rt0_amd64_lib_go(SB), AX
61 MOVQ AX, 8(SP) // fn
62 CALL runtime·newosproc0(SB)
63 ADJSP $-16
64
65restore:
66 POP_REGS_HOST_TO_ABI0()
67 RET
68
69// _rt0_amd64_lib_go initializes the Go runtime.
70// This is started in a separate thread by _rt0_amd64_lib.
71TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
72 MOVQ _rt0_amd64_lib_argc<>(SB), DI
73 MOVQ _rt0_amd64_lib_argv<>(SB), SI
74 JMP runtime·rt0_go(SB)
75
76DATA _rt0_amd64_lib_argc<>(SB)/8, $0
77GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
78DATA _rt0_amd64_lib_argv<>(SB)/8, $0
79GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
80
81TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
82 // copy arguments forward on an even stack
83 MOVQ DI, AX // argc
84 MOVQ SI, BX // argv
85 SUBQ $(4*8+7), SP // 2args 2auto
86 ANDQ $~15, SP
87 MOVQ AX, 16(SP)
88 MOVQ BX, 24(SP)
89
90 // create istack out of the given (operating system) stack.
91 // _cgo_init may update stackguard.
92 MOVQ $runtime·g0(SB), DI
93 LEAQ (-64*1024+104)(SP), BX
94 MOVQ BX, g_stackguard0(DI)
95 MOVQ BX, g_stackguard1(DI)
96 MOVQ BX, (g_stack+stack_lo)(DI)
97 MOVQ SP, (g_stack+stack_hi)(DI)
98
99 // find out information about the processor we're on
100 MOVL $0, AX
101 CPUID
102 MOVL AX, SI
103 CMPL AX, $0
104 JE nocpuinfo
105
106 // Figure out how to serialize RDTSC.
107 // On Intel processors LFENCE is enough. AMD requires MFENCE.
108 // Don't know about the rest, so let's do MFENCE.
109 CMPL BX, $0x756E6547 // "Genu"
110 JNE notintel
111 CMPL DX, $0x49656E69 // "ineI"
112 JNE notintel
113 CMPL CX, $0x6C65746E // "ntel"
114 JNE notintel
115 MOVB $1, runtime·isIntel(SB)
116 MOVB $1, runtime·lfenceBeforeRdtsc(SB)
117notintel:
118
119 // Load EAX=1 cpuid flags
120 MOVL $1, AX
121 CPUID
122 MOVL AX, runtime·processorVersionInfo(SB)
123
124nocpuinfo:
125 // if there is an _cgo_init, call it.
126 MOVQ _cgo_init(SB), AX
127 TESTQ AX, AX
128 JZ needtls
129 // arg 1: g0, already in DI
130 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
131#ifdef GOOS_android
132 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
133 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
134 // Compensate for tls_g (+16).
135 MOVQ -16(TLS), CX
136#else
137 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
138 MOVQ $0, CX
139#endif
140#ifdef GOOS_windows
141 // Adjust for the Win64 calling convention.
142 MOVQ CX, R9 // arg 4
143 MOVQ DX, R8 // arg 3
144 MOVQ SI, DX // arg 2
145 MOVQ DI, CX // arg 1
146#endif
147 CALL AX
148
149 // update stackguard after _cgo_init
150 MOVQ $runtime·g0(SB), CX
151 MOVQ (g_stack+stack_lo)(CX), AX
152 ADDQ $const__StackGuard, AX
153 MOVQ AX, g_stackguard0(CX)
154 MOVQ AX, g_stackguard1(CX)
155
156#ifndef GOOS_windows
157 JMP ok
158#endif
159needtls:
160#ifdef GOOS_plan9
161 // skip TLS setup on Plan 9
162 JMP ok
163#endif
164#ifdef GOOS_solaris
165 // skip TLS setup on Solaris
166 JMP ok
167#endif
168#ifdef GOOS_illumos
169 // skip TLS setup on illumos
170 JMP ok
171#endif
172#ifdef GOOS_darwin
173 // skip TLS setup on Darwin
174 JMP ok
175#endif
176#ifdef GOOS_openbsd
177 // skip TLS setup on OpenBSD
178 JMP ok
179#endif
180
181 LEAQ runtime·m0+m_tls(SB), DI
182 CALL runtime·settls(SB)
183
184 // store through it, to make sure it works
185 get_tls(BX)
186 MOVQ $0x123, g(BX)
187 MOVQ runtime·m0+m_tls(SB), AX
188 CMPQ AX, $0x123
189 JEQ 2(PC)
190 CALL runtime·abort(SB)
191ok:
192 // set the per-goroutine and per-mach "registers"
193 get_tls(BX)
194 LEAQ runtime·g0(SB), CX
195 MOVQ CX, g(BX)
196 LEAQ runtime·m0(SB), AX
197
198 // save m->g0 = g0
199 MOVQ CX, m_g0(AX)
200 // save m0 to g0->m
201 MOVQ AX, g_m(CX)
202
203 CLD // convention is D is always left cleared
204 CALL runtime·check(SB)
205
206 MOVL 16(SP), AX // copy argc
207 MOVL AX, 0(SP)
208 MOVQ 24(SP), AX // copy argv
209 MOVQ AX, 8(SP)
210 CALL runtime·args(SB)
211 CALL runtime·osinit(SB)
212 CALL runtime·schedinit(SB)
213
214 // create a new goroutine to start program
215 MOVQ $runtime·mainPC(SB), AX // entry
216 PUSHQ AX
217 PUSHQ $0 // arg size
218 CALL runtime·newproc(SB)
219 POPQ AX
220 POPQ AX
221
222 // start this M
223 CALL runtime·mstart(SB)
224
225 CALL runtime·abort(SB) // mstart should never return
226 RET
227
228 // Prevent dead-code elimination of debugCallV2, which is
229 // intended to be called by debuggers.
230 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
231 RET
232
233// mainPC is a function value for runtime.main, to be passed to newproc.
234// The reference to runtime.main is made via ABIInternal, since the
235// actual function (not the ABI0 wrapper) is needed by newproc.
236DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
237GLOBL runtime·mainPC(SB),RODATA,$8
238
239TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
240 BYTE $0xcc
241 RET
242
243TEXT runtime·asminit(SB),NOSPLIT,$0-0
244 // No per-thread init.
245 RET
246
247TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
248 CALL runtime·mstart0(SB)
249 RET // not reached
250
251/*
252 * go-routine
253 */
254
255// func gogo(buf *gobuf)
256// restore state from Gobuf; longjmp
257TEXT runtime·gogo(SB), NOSPLIT, $0-8
258 MOVQ buf+0(FP), BX // gobuf
259 MOVQ gobuf_g(BX), DX
260 MOVQ 0(DX), CX // make sure g != nil
261 JMP gogo<>(SB)
262
263TEXT gogo<>(SB), NOSPLIT, $0
264 get_tls(CX)
265 MOVQ DX, g(CX)
266 MOVQ DX, R14 // set the g register
267 MOVQ gobuf_sp(BX), SP // restore SP
268 MOVQ gobuf_ret(BX), AX
269 MOVQ gobuf_ctxt(BX), DX
270 MOVQ gobuf_bp(BX), BP
271 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
272 MOVQ $0, gobuf_ret(BX)
273 MOVQ $0, gobuf_ctxt(BX)
274 MOVQ $0, gobuf_bp(BX)
275 MOVQ gobuf_pc(BX), BX
276 JMP BX
277
278// func mcall(fn func(*g))
279// Switch to m->g0's stack, call fn(g).
280// Fn must never return. It should gogo(&g->sched)
281// to keep running g.
282#ifdef GOEXPERIMENT_regabiargs
283TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
284 MOVQ AX, DX // DX = fn
285
286 // save state in g->sched
287 MOVQ 0(SP), BX // caller's PC
288 MOVQ BX, (g_sched+gobuf_pc)(R14)
289 LEAQ fn+0(FP), BX // caller's SP
290 MOVQ BX, (g_sched+gobuf_sp)(R14)
291 MOVQ BP, (g_sched+gobuf_bp)(R14)
292
293 // switch to m->g0 & its stack, call fn
294 MOVQ g_m(R14), BX
295 MOVQ m_g0(BX), SI // SI = g.m.g0
296 CMPQ SI, R14 // if g == m->g0 call badmcall
297 JNE goodm
298 JMP runtime·badmcall(SB)
299goodm:
300 MOVQ R14, AX // AX (and arg 0) = g
301 MOVQ SI, R14 // g = g.m.g0
302 get_tls(CX) // Set G in TLS
303 MOVQ R14, g(CX)
304 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
305 PUSHQ AX // open up space for fn's arg spill slot
306 MOVQ 0(DX), R12
307 CALL R12 // fn(g)
308 POPQ AX
309 JMP runtime·badmcall2(SB)
310 RET
311#else
312TEXT runtime·mcall(SB), NOSPLIT, $0-8
313 MOVQ fn+0(FP), DI
314
315 get_tls(CX)
316 MOVQ g(CX), AX // save state in g->sched
317 MOVQ 0(SP), BX // caller's PC
318 MOVQ BX, (g_sched+gobuf_pc)(AX)
319 LEAQ fn+0(FP), BX // caller's SP
320 MOVQ BX, (g_sched+gobuf_sp)(AX)
321 MOVQ BP, (g_sched+gobuf_bp)(AX)
322
323 // switch to m->g0 & its stack, call fn
324 MOVQ g(CX), BX
325 MOVQ g_m(BX), BX
326 MOVQ m_g0(BX), SI
327 CMPQ SI, AX // if g == m->g0 call badmcall
328 JNE 3(PC)
329 MOVQ $runtime·badmcall(SB), AX
330 JMP AX
331 MOVQ SI, g(CX) // g = m->g0
332 MOVQ SI, R14 // set the g register
333 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
334 PUSHQ AX
335 MOVQ DI, DX
336 MOVQ 0(DI), DI
337 CALL DI
338 POPQ AX
339 MOVQ $runtime·badmcall2(SB), AX
340 JMP AX
341 RET
342#endif
343
344// systemstack_switch is a dummy routine that systemstack leaves at the bottom
345// of the G stack. We need to distinguish the routine that
346// lives at the bottom of the G stack from the one that lives
347// at the top of the system stack because the one at the top of
348// the system stack terminates the stack walk (see topofstack()).
349TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
350 RET
351
352// func systemstack(fn func())
353TEXT runtime·systemstack(SB), NOSPLIT, $0-8
354 MOVQ fn+0(FP), DI // DI = fn
355 get_tls(CX)
356 MOVQ g(CX), AX // AX = g
357 MOVQ g_m(AX), BX // BX = m
358
359 CMPQ AX, m_gsignal(BX)
360 JEQ noswitch
361
362 MOVQ m_g0(BX), DX // DX = g0
363 CMPQ AX, DX
364 JEQ noswitch
365
366 CMPQ AX, m_curg(BX)
367 JNE bad
368
369 // switch stacks
370 // save our state in g->sched. Pretend to
371 // be systemstack_switch if the G stack is scanned.
372 CALL gosave_systemstack_switch<>(SB)
373
374 // switch to g0
375 MOVQ DX, g(CX)
376 MOVQ DX, R14 // set the g register
377 MOVQ (g_sched+gobuf_sp)(DX), BX
378 MOVQ BX, SP
379
380 // call target function
381 MOVQ DI, DX
382 MOVQ 0(DI), DI
383 CALL DI
384
385 // switch back to g
386 get_tls(CX)
387 MOVQ g(CX), AX
388 MOVQ g_m(AX), BX
389 MOVQ m_curg(BX), AX
390 MOVQ AX, g(CX)
391 MOVQ (g_sched+gobuf_sp)(AX), SP
392 MOVQ $0, (g_sched+gobuf_sp)(AX)
393 RET
394
395noswitch:
396 // already on m stack; tail call the function
397 // Using a tail call here cleans up tracebacks since we won't stop
398 // at an intermediate systemstack.
399 MOVQ DI, DX
400 MOVQ 0(DI), DI
401 JMP DI
402
403bad:
404 // Bad: g is not gsignal, not g0, not curg. What is it?
405 MOVQ $runtime·badsystemstack(SB), AX
406 CALL AX
407 INT $3
408
409
410/*
411 * support for morestack
412 */
413
414// Called during function prolog when more stack is needed.
415//
416// The traceback routines see morestack on a g0 as being
417// the top of a stack (for example, morestack calling newstack
418// calling the scheduler calling newm calling gc), so we must
419// record an argument size. For that purpose, it has no arguments.
420TEXT runtime·morestack(SB),NOSPLIT,$0-0
421 // Cannot grow scheduler stack (m->g0).
422 get_tls(CX)
423 MOVQ g(CX), BX
424 MOVQ g_m(BX), BX
425 MOVQ m_g0(BX), SI
426 CMPQ g(CX), SI
427 JNE 3(PC)
428 CALL runtime·badmorestackg0(SB)
429 CALL runtime·abort(SB)
430
431 // Cannot grow signal stack (m->gsignal).
432 MOVQ m_gsignal(BX), SI
433 CMPQ g(CX), SI
434 JNE 3(PC)
435 CALL runtime·badmorestackgsignal(SB)
436 CALL runtime·abort(SB)
437
438 // Called from f.
439 // Set m->morebuf to f's caller.
440 NOP SP // tell vet SP changed - stop checking offsets
441 MOVQ 8(SP), AX // f's caller's PC
442 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
443 LEAQ 16(SP), AX // f's caller's SP
444 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
445 get_tls(CX)
446 MOVQ g(CX), SI
447 MOVQ SI, (m_morebuf+gobuf_g)(BX)
448
449 // Set g->sched to context in f.
450 MOVQ 0(SP), AX // f's PC
451 MOVQ AX, (g_sched+gobuf_pc)(SI)
452 LEAQ 8(SP), AX // f's SP
453 MOVQ AX, (g_sched+gobuf_sp)(SI)
454 MOVQ BP, (g_sched+gobuf_bp)(SI)
455 MOVQ DX, (g_sched+gobuf_ctxt)(SI)
456
457 // Call newstack on m->g0's stack.
458 MOVQ m_g0(BX), BX
459 MOVQ BX, g(CX)
460 MOVQ (g_sched+gobuf_sp)(BX), SP
461 CALL runtime·newstack(SB)
462 CALL runtime·abort(SB) // crash if newstack returns
463 RET
464
465// morestack but not preserving ctxt.
466TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
467 MOVL $0, DX
468 JMP runtime·morestack(SB)
469
470#ifdef GOEXPERIMENT_regabireflect
471// spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
472TEXT ·spillArgs<ABIInternal>(SB),NOSPLIT,$0-0
473 MOVQ AX, 0(R12)
474 MOVQ BX, 8(R12)
475 MOVQ CX, 16(R12)
476 MOVQ DI, 24(R12)
477 MOVQ SI, 32(R12)
478 MOVQ R8, 40(R12)
479 MOVQ R9, 48(R12)
480 MOVQ R10, 56(R12)
481 MOVQ R11, 64(R12)
482 MOVQ X0, 72(R12)
483 MOVQ X1, 80(R12)
484 MOVQ X2, 88(R12)
485 MOVQ X3, 96(R12)
486 MOVQ X4, 104(R12)
487 MOVQ X5, 112(R12)
488 MOVQ X6, 120(R12)
489 MOVQ X7, 128(R12)
490 MOVQ X8, 136(R12)
491 MOVQ X9, 144(R12)
492 MOVQ X10, 152(R12)
493 MOVQ X11, 160(R12)
494 MOVQ X12, 168(R12)
495 MOVQ X13, 176(R12)
496 MOVQ X14, 184(R12)
497 RET
498
499// unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
500TEXT ·unspillArgs<ABIInternal>(SB),NOSPLIT,$0-0
501 MOVQ 0(R12), AX
502 MOVQ 8(R12), BX
503 MOVQ 16(R12), CX
504 MOVQ 24(R12), DI
505 MOVQ 32(R12), SI
506 MOVQ 40(R12), R8
507 MOVQ 48(R12), R9
508 MOVQ 56(R12), R10
509 MOVQ 64(R12), R11
510 MOVQ 72(R12), X0
511 MOVQ 80(R12), X1
512 MOVQ 88(R12), X2
513 MOVQ 96(R12), X3
514 MOVQ 104(R12), X4
515 MOVQ 112(R12), X5
516 MOVQ 120(R12), X6
517 MOVQ 128(R12), X7
518 MOVQ 136(R12), X8
519 MOVQ 144(R12), X9
520 MOVQ 152(R12), X10
521 MOVQ 160(R12), X11
522 MOVQ 168(R12), X12
523 MOVQ 176(R12), X13
524 MOVQ 184(R12), X14
525 RET
526#else
527// spillArgs stores return values from registers to a pointer in R12.
528TEXT ·spillArgs<ABIInternal>(SB),NOSPLIT,$0-0
529 RET
530
531// unspillArgs loads args into registers from a pointer in R12.
532TEXT ·unspillArgs<ABIInternal>(SB),NOSPLIT,$0-0
533 RET
534#endif
535
536// reflectcall: call a function with the given argument list
537// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
538// we don't have variable-sized frames, so we use a small number
539// of constant-sized-frame functions to encode a few bits of size in the pc.
540// Caution: ugly multiline assembly macros in your future!
541
542#define DISPATCH(NAME,MAXSIZE) \
543 CMPQ CX, $MAXSIZE; \
544 JA 3(PC); \
545 MOVQ $NAME(SB), AX; \
546 JMP AX
547// Note: can't just "JMP NAME(SB)" - bad inlining results.
548
549TEXT ·reflectcall(SB), NOSPLIT, $0-48
550 MOVLQZX frameSize+32(FP), CX
551 DISPATCH(runtime·call16, 16)
552 DISPATCH(runtime·call32, 32)
553 DISPATCH(runtime·call64, 64)
554 DISPATCH(runtime·call128, 128)
555 DISPATCH(runtime·call256, 256)
556 DISPATCH(runtime·call512, 512)
557 DISPATCH(runtime·call1024, 1024)
558 DISPATCH(runtime·call2048, 2048)
559 DISPATCH(runtime·call4096, 4096)
560 DISPATCH(runtime·call8192, 8192)
561 DISPATCH(runtime·call16384, 16384)
562 DISPATCH(runtime·call32768, 32768)
563 DISPATCH(runtime·call65536, 65536)
564 DISPATCH(runtime·call131072, 131072)
565 DISPATCH(runtime·call262144, 262144)
566 DISPATCH(runtime·call524288, 524288)
567 DISPATCH(runtime·call1048576, 1048576)
568 DISPATCH(runtime·call2097152, 2097152)
569 DISPATCH(runtime·call4194304, 4194304)
570 DISPATCH(runtime·call8388608, 8388608)
571 DISPATCH(runtime·call16777216, 16777216)
572 DISPATCH(runtime·call33554432, 33554432)
573 DISPATCH(runtime·call67108864, 67108864)
574 DISPATCH(runtime·call134217728, 134217728)
575 DISPATCH(runtime·call268435456, 268435456)
576 DISPATCH(runtime·call536870912, 536870912)
577 DISPATCH(runtime·call1073741824, 1073741824)
578 MOVQ $runtime·badreflectcall(SB), AX
579 JMP AX
580
581#define CALLFN(NAME,MAXSIZE) \
582TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
583 NO_LOCAL_POINTERS; \
584 /* copy arguments to stack */ \
585 MOVQ stackArgs+16(FP), SI; \
586 MOVLQZX stackArgsSize+24(FP), CX; \
587 MOVQ SP, DI; \
588 REP;MOVSB; \
589 /* set up argument registers */ \
590 MOVQ regArgs+40(FP), R12; \
591 CALL ·unspillArgs<ABIInternal>(SB); \
592 /* call function */ \
593 MOVQ f+8(FP), DX; \
594 PCDATA $PCDATA_StackMapIndex, $0; \
595 MOVQ (DX), R12; \
596 CALL R12; \
597 /* copy register return values back */ \
598 MOVQ regArgs+40(FP), R12; \
599 CALL ·spillArgs<ABIInternal>(SB); \
600 MOVLQZX stackArgsSize+24(FP), CX; \
601 MOVLQZX stackRetOffset+28(FP), BX; \
602 MOVQ stackArgs+16(FP), DI; \
603 MOVQ stackArgsType+0(FP), DX; \
604 MOVQ SP, SI; \
605 ADDQ BX, DI; \
606 ADDQ BX, SI; \
607 SUBQ BX, CX; \
608 CALL callRet<>(SB); \
609 RET
610
611// callRet copies return values back at the end of call*. This is a
612// separate function so it can allocate stack space for the arguments
613// to reflectcallmove. It does not follow the Go ABI; it expects its
614// arguments in registers.
615TEXT callRet<>(SB), NOSPLIT, $40-0
616 NO_LOCAL_POINTERS
617 MOVQ DX, 0(SP)
618 MOVQ DI, 8(SP)
619 MOVQ SI, 16(SP)
620 MOVQ CX, 24(SP)
621 MOVQ R12, 32(SP)
622 CALL runtime·reflectcallmove(SB)
623 RET
624
625CALLFN(·call16, 16)
626CALLFN(·call32, 32)
627CALLFN(·call64, 64)
628CALLFN(·call128, 128)
629CALLFN(·call256, 256)
630CALLFN(·call512, 512)
631CALLFN(·call1024, 1024)
632CALLFN(·call2048, 2048)
633CALLFN(·call4096, 4096)
634CALLFN(·call8192, 8192)
635CALLFN(·call16384, 16384)
636CALLFN(·call32768, 32768)
637CALLFN(·call65536, 65536)
638CALLFN(·call131072, 131072)
639CALLFN(·call262144, 262144)
640CALLFN(·call524288, 524288)
641CALLFN(·call1048576, 1048576)
642CALLFN(·call2097152, 2097152)
643CALLFN(·call4194304, 4194304)
644CALLFN(·call8388608, 8388608)
645CALLFN(·call16777216, 16777216)
646CALLFN(·call33554432, 33554432)
647CALLFN(·call67108864, 67108864)
648CALLFN(·call134217728, 134217728)
649CALLFN(·call268435456, 268435456)
650CALLFN(·call536870912, 536870912)
651CALLFN(·call1073741824, 1073741824)
652
653TEXT runtime·procyield(SB),NOSPLIT,$0-0
654 MOVL cycles+0(FP), AX
655again:
656 PAUSE
657 SUBL $1, AX
658 JNZ again
659 RET
660
661
662TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
663 // Stores are already ordered on x86, so this is just a
664 // compile barrier.
665 RET
666
667// func jmpdefer(fv *funcval, argp uintptr)
668// argp is a caller SP.
669// called from deferreturn.
670// 1. pop the caller
671// 2. sub 5 bytes from the callers return
672// 3. jmp to the argument
673TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
674 MOVQ fv+0(FP), DX // fn
675 MOVQ argp+8(FP), BX // caller sp
676 LEAQ -8(BX), SP // caller sp after CALL
677 MOVQ -8(SP), BP // restore BP as if deferreturn returned (harmless if framepointers not in use)
678 SUBQ $5, (SP) // return to CALL again
679 MOVQ 0(DX), BX
680 JMP BX // but first run the deferred function
681
682// Save state of caller into g->sched,
683// but using fake PC from systemstack_switch.
684// Must only be called from functions with no locals ($0)
685// or else unwinding from systemstack_switch is incorrect.
686// Smashes R9.
687TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
688#ifndef GOEXPERIMENT_regabig
689 get_tls(R14)
690 MOVQ g(R14), R14
691#endif
692 MOVQ $runtime·systemstack_switch(SB), R9
693 MOVQ R9, (g_sched+gobuf_pc)(R14)
694 LEAQ 8(SP), R9
695 MOVQ R9, (g_sched+gobuf_sp)(R14)
696 MOVQ $0, (g_sched+gobuf_ret)(R14)
697 MOVQ BP, (g_sched+gobuf_bp)(R14)
698 // Assert ctxt is zero. See func save.
699 MOVQ (g_sched+gobuf_ctxt)(R14), R9
700 TESTQ R9, R9
701 JZ 2(PC)
702 CALL runtime·abort(SB)
703 RET
704
705// func asmcgocall_no_g(fn, arg unsafe.Pointer)
706// Call fn(arg) aligned appropriately for the gcc ABI.
707// Called on a system stack, and there may be no g yet (during needm).
708TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
709 MOVQ fn+0(FP), AX
710 MOVQ arg+8(FP), BX
711 MOVQ SP, DX
712 SUBQ $32, SP
713 ANDQ $~15, SP // alignment
714 MOVQ DX, 8(SP)
715 MOVQ BX, DI // DI = first argument in AMD64 ABI
716 MOVQ BX, CX // CX = first argument in Win64
717 CALL AX
718 MOVQ 8(SP), DX
719 MOVQ DX, SP
720 RET
721
722// func asmcgocall(fn, arg unsafe.Pointer) int32
723// Call fn(arg) on the scheduler stack,
724// aligned appropriately for the gcc ABI.
725// See cgocall.go for more details.
726TEXT ·asmcgocall(SB),NOSPLIT,$0-20
727 MOVQ fn+0(FP), AX
728 MOVQ arg+8(FP), BX
729
730 MOVQ SP, DX
731
732 // Figure out if we need to switch to m->g0 stack.
733 // We get called to create new OS threads too, and those
734 // come in on the m->g0 stack already.
735 get_tls(CX)
736 MOVQ g(CX), R8
737 CMPQ R8, $0
738 JEQ nosave
739 MOVQ g_m(R8), R8
740 MOVQ m_g0(R8), SI
741 MOVQ g(CX), DI
742 CMPQ SI, DI
743 JEQ nosave
744 MOVQ m_gsignal(R8), SI
745 CMPQ SI, DI
746 JEQ nosave
747
748 // Switch to system stack.
749 MOVQ m_g0(R8), SI
750 CALL gosave_systemstack_switch<>(SB)
751 MOVQ SI, g(CX)
752 MOVQ (g_sched+gobuf_sp)(SI), SP
753
754 // Now on a scheduling stack (a pthread-created stack).
755 // Make sure we have enough room for 4 stack-backed fast-call
756 // registers as per windows amd64 calling convention.
757 SUBQ $64, SP
758 ANDQ $~15, SP // alignment for gcc ABI
759 MOVQ DI, 48(SP) // save g
760 MOVQ (g_stack+stack_hi)(DI), DI
761 SUBQ DX, DI
762 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
763 MOVQ BX, DI // DI = first argument in AMD64 ABI
764 MOVQ BX, CX // CX = first argument in Win64
765 CALL AX
766
767 // Restore registers, g, stack pointer.
768 get_tls(CX)
769 MOVQ 48(SP), DI
770 MOVQ (g_stack+stack_hi)(DI), SI
771 SUBQ 40(SP), SI
772 MOVQ DI, g(CX)
773 MOVQ SI, SP
774
775 MOVL AX, ret+16(FP)
776 RET
777
778nosave:
779 // Running on a system stack, perhaps even without a g.
780 // Having no g can happen during thread creation or thread teardown
781 // (see needm/dropm on Solaris, for example).
782 // This code is like the above sequence but without saving/restoring g
783 // and without worrying about the stack moving out from under us
784 // (because we're on a system stack, not a goroutine stack).
785 // The above code could be used directly if already on a system stack,
786 // but then the only path through this code would be a rare case on Solaris.
787 // Using this code for all "already on system stack" calls exercises it more,
788 // which should help keep it correct.
789 SUBQ $64, SP
790 ANDQ $~15, SP
791 MOVQ $0, 48(SP) // where above code stores g, in case someone looks during debugging
792 MOVQ DX, 40(SP) // save original stack pointer
793 MOVQ BX, DI // DI = first argument in AMD64 ABI
794 MOVQ BX, CX // CX = first argument in Win64
795 CALL AX
796 MOVQ 40(SP), SI // restore original stack pointer
797 MOVQ SI, SP
798 MOVL AX, ret+16(FP)
799 RET
800
801#ifdef GOOS_windows
802// Dummy TLS that's used on Windows so that we don't crash trying
803// to restore the G register in needm. needm and its callees are
804// very careful never to actually use the G, the TLS just can't be
805// unset since we're in Go code.
806GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
807#endif
808
809// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
810// See cgocall.go for more details.
811TEXT ·cgocallback(SB),NOSPLIT,$24-24
812 NO_LOCAL_POINTERS
813
814 // If g is nil, Go did not create the current thread.
815 // Call needm to obtain one m for temporary use.
816 // In this case, we're running on the thread stack, so there's
817 // lots of space, but the linker doesn't know. Hide the call from
818 // the linker analysis by using an indirect call through AX.
819 get_tls(CX)
820#ifdef GOOS_windows
821 MOVL $0, BX
822 CMPQ CX, $0
823 JEQ 2(PC)
824#endif
825 MOVQ g(CX), BX
826 CMPQ BX, $0
827 JEQ needm
828 MOVQ g_m(BX), BX
829 MOVQ BX, savedm-8(SP) // saved copy of oldm
830 JMP havem
831needm:
832#ifdef GOOS_windows
833 // Set up a dummy TLS value. needm is careful not to use it,
834 // but it needs to be there to prevent autogenerated code from
835 // crashing when it loads from it.
836 // We don't need to clear it or anything later because needm
837 // will set up TLS properly.
838 MOVQ $zeroTLS<>(SB), DI
839 CALL runtime·settls(SB)
840#endif
841 // On some platforms (Windows) we cannot call needm through
842 // an ABI wrapper because there's no TLS set up, and the ABI
843 // wrapper will try to restore the G register (R14) from TLS.
844 // Clear X15 because Go expects it and we're not calling
845 // through a wrapper, but otherwise avoid setting the G
846 // register in the wrapper and call needm directly. It
847 // takes no arguments and doesn't return any values so
848 // there's no need to handle that. Clear R14 so that there's
849 // a bad value in there, in case needm tries to use it.
850 XORPS X15, X15
851 XORQ R14, R14
852 MOVQ $runtime·needm<ABIInternal>(SB), AX
853 CALL AX
854 MOVQ $0, savedm-8(SP) // dropm on return
855 get_tls(CX)
856 MOVQ g(CX), BX
857 MOVQ g_m(BX), BX
858
859 // Set m->sched.sp = SP, so that if a panic happens
860 // during the function we are about to execute, it will
861 // have a valid SP to run on the g0 stack.
862 // The next few lines (after the havem label)
863 // will save this SP onto the stack and then write
864 // the same SP back to m->sched.sp. That seems redundant,
865 // but if an unrecovered panic happens, unwindm will
866 // restore the g->sched.sp from the stack location
867 // and then systemstack will try to use it. If we don't set it here,
868 // that restored SP will be uninitialized (typically 0) and
869 // will not be usable.
870 MOVQ m_g0(BX), SI
871 MOVQ SP, (g_sched+gobuf_sp)(SI)
872
873havem:
874 // Now there's a valid m, and we're running on its m->g0.
875 // Save current m->g0->sched.sp on stack and then set it to SP.
876 // Save current sp in m->g0->sched.sp in preparation for
877 // switch back to m->curg stack.
878 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
879 MOVQ m_g0(BX), SI
880 MOVQ (g_sched+gobuf_sp)(SI), AX
881 MOVQ AX, 0(SP)
882 MOVQ SP, (g_sched+gobuf_sp)(SI)
883
884 // Switch to m->curg stack and call runtime.cgocallbackg.
885 // Because we are taking over the execution of m->curg
886 // but *not* resuming what had been running, we need to
887 // save that information (m->curg->sched) so we can restore it.
888 // We can restore m->curg->sched.sp easily, because calling
889 // runtime.cgocallbackg leaves SP unchanged upon return.
890 // To save m->curg->sched.pc, we push it onto the curg stack and
891 // open a frame the same size as cgocallback's g0 frame.
892 // Once we switch to the curg stack, the pushed PC will appear
893 // to be the return PC of cgocallback, so that the traceback
894 // will seamlessly trace back into the earlier calls.
895 MOVQ m_curg(BX), SI
896 MOVQ SI, g(CX)
897 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
898 MOVQ (g_sched+gobuf_pc)(SI), BX
899 MOVQ BX, -8(DI) // "push" return PC on the g stack
900 // Gather our arguments into registers.
901 MOVQ fn+0(FP), BX
902 MOVQ frame+8(FP), CX
903 MOVQ ctxt+16(FP), DX
904 // Compute the size of the frame, including return PC and, if
905 // GOEXPERIMENT=framepointer, the saved base pointer
906 LEAQ fn+0(FP), AX
907 SUBQ SP, AX // AX is our actual frame size
908 SUBQ AX, DI // Allocate the same frame size on the g stack
909 MOVQ DI, SP
910
911 MOVQ BX, 0(SP)
912 MOVQ CX, 8(SP)
913 MOVQ DX, 16(SP)
914 MOVQ $runtime·cgocallbackg(SB), AX
915 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
916
917 // Compute the size of the frame again. FP and SP have
918 // completely different values here than they did above,
919 // but only their difference matters.
920 LEAQ fn+0(FP), AX
921 SUBQ SP, AX
922
923 // Restore g->sched (== m->curg->sched) from saved values.
924 get_tls(CX)
925 MOVQ g(CX), SI
926 MOVQ SP, DI
927 ADDQ AX, DI
928 MOVQ -8(DI), BX
929 MOVQ BX, (g_sched+gobuf_pc)(SI)
930 MOVQ DI, (g_sched+gobuf_sp)(SI)
931
932 // Switch back to m->g0's stack and restore m->g0->sched.sp.
933 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
934 // so we do not have to restore it.)
935 MOVQ g(CX), BX
936 MOVQ g_m(BX), BX
937 MOVQ m_g0(BX), SI
938 MOVQ SI, g(CX)
939 MOVQ (g_sched+gobuf_sp)(SI), SP
940 MOVQ 0(SP), AX
941 MOVQ AX, (g_sched+gobuf_sp)(SI)
942
943 // If the m on entry was nil, we called needm above to borrow an m
944 // for the duration of the call. Since the call is over, return it with dropm.
945 MOVQ savedm-8(SP), BX
946 CMPQ BX, $0
947 JNE done
948 MOVQ $runtime·dropm(SB), AX
949 CALL AX
950#ifdef GOOS_windows
951 // We need to clear the TLS pointer in case the next
952 // thread that comes into Go tries to reuse that space
953 // but uses the same M.
954 XORQ DI, DI
955 CALL runtime·settls(SB)
956#endif
957done:
958
959 // Done!
960 RET
961
962// func setg(gg *g)
963// set g. for use by needm.
964TEXT runtime·setg(SB), NOSPLIT, $0-8
965 MOVQ gg+0(FP), BX
966 get_tls(CX)
967 MOVQ BX, g(CX)
968 RET
969
970// void setg_gcc(G*); set g called from gcc.
971TEXT setg_gcc<>(SB),NOSPLIT,$0
972 get_tls(AX)
973 MOVQ DI, g(AX)
974 MOVQ DI, R14 // set the g register
975 RET
976
977TEXT runtime·abort(SB),NOSPLIT,$0-0
978 INT $3
979loop:
980 JMP loop
981
982// check that SP is in range [g->stack.lo, g->stack.hi)
983TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
984 get_tls(CX)
985 MOVQ g(CX), AX
986 CMPQ (g_stack+stack_hi)(AX), SP
987 JHI 2(PC)
988 CALL runtime·abort(SB)
989 CMPQ SP, (g_stack+stack_lo)(AX)
990 JHI 2(PC)
991 CALL runtime·abort(SB)
992 RET
993
994// func cputicks() int64
995TEXT runtime·cputicks(SB),NOSPLIT,$0-0
996 CMPB runtime·lfenceBeforeRdtsc(SB), $1
997 JNE mfence
998 LFENCE
999 JMP done
1000mfence:
1001 MFENCE
1002done:
1003 RDTSC
1004 SHLQ $32, DX
1005 ADDQ DX, AX
1006 MOVQ AX, ret+0(FP)
1007 RET
1008
1009// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1010// hash function using AES hardware instructions
1011TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1012#ifdef GOEXPERIMENT_regabiargs
1013 // AX = ptr to data
1014 // BX = seed
1015 // CX = size
1016#endif
1017 CMPB runtime·useAeshash(SB), $0
1018 JEQ noaes
1019#ifndef GOEXPERIMENT_regabiargs
1020 MOVQ p+0(FP), AX // ptr to data
1021 MOVQ s+16(FP), CX // size
1022 LEAQ ret+24(FP), DX
1023#endif
1024 JMP aeshashbody<>(SB)
1025noaes:
1026 JMP runtime·memhashFallback<ABIInternal>(SB)
1027
1028// func strhash(p unsafe.Pointer, h uintptr) uintptr
1029TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1030#ifdef GOEXPERIMENT_regabiargs
1031 // AX = ptr to string struct
1032 // BX = seed
1033#endif
1034 CMPB runtime·useAeshash(SB), $0
1035 JEQ noaes
1036#ifndef GOEXPERIMENT_regabiargs
1037 MOVQ p+0(FP), AX // ptr to string struct
1038#endif
1039 MOVQ 8(AX), CX // length of string
1040 MOVQ (AX), AX // string data
1041#ifndef GOEXPERIMENT_regabiargs
1042 LEAQ ret+16(FP), DX
1043#endif
1044 JMP aeshashbody<>(SB)
1045noaes:
1046 JMP runtime·strhashFallback<ABIInternal>(SB)
1047
1048// AX: data
1049#ifdef GOEXPERIMENT_regabiargs
1050// BX: hash seed
1051#else
1052// h+8(FP): hash seed
1053#endif
1054// CX: length
1055#ifdef GOEXPERIMENT_regabiargs
1056// At return: AX = return value
1057#else
1058// DX: address to put return value
1059#endif
1060TEXT aeshashbody<>(SB),NOSPLIT,$0-0
1061 // Fill an SSE register with our seeds.
1062#ifdef GOEXPERIMENT_regabiargs
1063 MOVQ BX, X0 // 64 bits of per-table hash seed
1064#else
1065 MOVQ h+8(FP), X0 // 64 bits of per-table hash seed
1066#endif
1067 PINSRW $4, CX, X0 // 16 bits of length
1068 PSHUFHW $0, X0, X0 // repeat length 4 times total
1069 MOVO X0, X1 // save unscrambled seed
1070 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1071 AESENC X0, X0 // scramble seed
1072
1073 CMPQ CX, $16
1074 JB aes0to15
1075 JE aes16
1076 CMPQ CX, $32
1077 JBE aes17to32
1078 CMPQ CX, $64
1079 JBE aes33to64
1080 CMPQ CX, $128
1081 JBE aes65to128
1082 JMP aes129plus
1083
1084aes0to15:
1085 TESTQ CX, CX
1086 JE aes0
1087
1088 ADDQ $16, AX
1089 TESTW $0xff0, AX
1090 JE endofpage
1091
1092 // 16 bytes loaded at this address won't cross
1093 // a page boundary, so we can load it directly.
1094 MOVOU -16(AX), X1
1095 ADDQ CX, CX
1096 MOVQ $masks<>(SB), AX
1097 PAND (AX)(CX*8), X1
1098final1:
1099 PXOR X0, X1 // xor data with seed
1100 AESENC X1, X1 // scramble combo 3 times
1101 AESENC X1, X1
1102 AESENC X1, X1
1103#ifdef GOEXPERIMENT_regabiargs
1104 MOVQ X1, AX // return X1
1105#else
1106 MOVQ X1, (DX)
1107#endif
1108 RET
1109
1110endofpage:
1111 // address ends in 1111xxxx. Might be up against
1112 // a page boundary, so load ending at last byte.
1113 // Then shift bytes down using pshufb.
1114 MOVOU -32(AX)(CX*1), X1
1115 ADDQ CX, CX
1116 MOVQ $shifts<>(SB), AX
1117 PSHUFB (AX)(CX*8), X1
1118 JMP final1
1119
1120aes0:
1121 // Return scrambled input seed
1122 AESENC X0, X0
1123#ifdef GOEXPERIMENT_regabiargs
1124 MOVQ X0, AX // return X0
1125#else
1126 MOVQ X0, (DX)
1127#endif
1128 RET
1129
1130aes16:
1131 MOVOU (AX), X1
1132 JMP final1
1133
1134aes17to32:
1135 // make second starting seed
1136 PXOR runtime·aeskeysched+16(SB), X1
1137 AESENC X1, X1
1138
1139 // load data to be hashed
1140 MOVOU (AX), X2
1141 MOVOU -16(AX)(CX*1), X3
1142
1143 // xor with seed
1144 PXOR X0, X2
1145 PXOR X1, X3
1146
1147 // scramble 3 times
1148 AESENC X2, X2
1149 AESENC X3, X3
1150 AESENC X2, X2
1151 AESENC X3, X3
1152 AESENC X2, X2
1153 AESENC X3, X3
1154
1155 // combine results
1156 PXOR X3, X2
1157#ifdef GOEXPERIMENT_regabiargs
1158 MOVQ X2, AX // return X2
1159#else
1160 MOVQ X2, (DX)
1161#endif
1162 RET
1163
1164aes33to64:
1165 // make 3 more starting seeds
1166 MOVO X1, X2
1167 MOVO X1, X3
1168 PXOR runtime·aeskeysched+16(SB), X1
1169 PXOR runtime·aeskeysched+32(SB), X2
1170 PXOR runtime·aeskeysched+48(SB), X3
1171 AESENC X1, X1
1172 AESENC X2, X2
1173 AESENC X3, X3
1174
1175 MOVOU (AX), X4
1176 MOVOU 16(AX), X5
1177 MOVOU -32(AX)(CX*1), X6
1178 MOVOU -16(AX)(CX*1), X7
1179
1180 PXOR X0, X4
1181 PXOR X1, X5
1182 PXOR X2, X6
1183 PXOR X3, X7
1184
1185 AESENC X4, X4
1186 AESENC X5, X5
1187 AESENC X6, X6
1188 AESENC X7, X7
1189
1190 AESENC X4, X4
1191 AESENC X5, X5
1192 AESENC X6, X6
1193 AESENC X7, X7
1194
1195 AESENC X4, X4
1196 AESENC X5, X5
1197 AESENC X6, X6
1198 AESENC X7, X7
1199
1200 PXOR X6, X4
1201 PXOR X7, X5
1202 PXOR X5, X4
1203#ifdef GOEXPERIMENT_regabiargs
1204 MOVQ X4, AX // return X4
1205#else
1206 MOVQ X4, (DX)
1207#endif
1208 RET
1209
1210aes65to128:
1211 // make 7 more starting seeds
1212 MOVO X1, X2
1213 MOVO X1, X3
1214 MOVO X1, X4
1215 MOVO X1, X5
1216 MOVO X1, X6
1217 MOVO X1, X7
1218 PXOR runtime·aeskeysched+16(SB), X1
1219 PXOR runtime·aeskeysched+32(SB), X2
1220 PXOR runtime·aeskeysched+48(SB), X3
1221 PXOR runtime·aeskeysched+64(SB), X4
1222 PXOR runtime·aeskeysched+80(SB), X5
1223 PXOR runtime·aeskeysched+96(SB), X6
1224 PXOR runtime·aeskeysched+112(SB), X7
1225 AESENC X1, X1
1226 AESENC X2, X2
1227 AESENC X3, X3
1228 AESENC X4, X4
1229 AESENC X5, X5
1230 AESENC X6, X6
1231 AESENC X7, X7
1232
1233 // load data
1234 MOVOU (AX), X8
1235 MOVOU 16(AX), X9
1236 MOVOU 32(AX), X10
1237 MOVOU 48(AX), X11
1238 MOVOU -64(AX)(CX*1), X12
1239 MOVOU -48(AX)(CX*1), X13
1240 MOVOU -32(AX)(CX*1), X14
1241 MOVOU -16(AX)(CX*1), X15
1242
1243 // xor with seed
1244 PXOR X0, X8
1245 PXOR X1, X9
1246 PXOR X2, X10
1247 PXOR X3, X11
1248 PXOR X4, X12
1249 PXOR X5, X13
1250 PXOR X6, X14
1251 PXOR X7, X15
1252
1253 // scramble 3 times
1254 AESENC X8, X8
1255 AESENC X9, X9
1256 AESENC X10, X10
1257 AESENC X11, X11
1258 AESENC X12, X12
1259 AESENC X13, X13
1260 AESENC X14, X14
1261 AESENC X15, X15
1262
1263 AESENC X8, X8
1264 AESENC X9, X9
1265 AESENC X10, X10
1266 AESENC X11, X11
1267 AESENC X12, X12
1268 AESENC X13, X13
1269 AESENC X14, X14
1270 AESENC X15, X15
1271
1272 AESENC X8, X8
1273 AESENC X9, X9
1274 AESENC X10, X10
1275 AESENC X11, X11
1276 AESENC X12, X12
1277 AESENC X13, X13
1278 AESENC X14, X14
1279 AESENC X15, X15
1280
1281 // combine results
1282 PXOR X12, X8
1283 PXOR X13, X9
1284 PXOR X14, X10
1285 PXOR X15, X11
1286 PXOR X10, X8
1287 PXOR X11, X9
1288 PXOR X9, X8
1289#ifdef GOEXPERIMENT_regabig
1290 // X15 must be zero on return
1291 PXOR X15, X15
1292#endif
1293#ifdef GOEXPERIMENT_regabiargs
1294 MOVQ X8, AX // return X8
1295#else
1296 MOVQ X8, (DX)
1297#endif
1298 RET
1299
1300aes129plus:
1301 // make 7 more starting seeds
1302 MOVO X1, X2
1303 MOVO X1, X3
1304 MOVO X1, X4
1305 MOVO X1, X5
1306 MOVO X1, X6
1307 MOVO X1, X7
1308 PXOR runtime·aeskeysched+16(SB), X1
1309 PXOR runtime·aeskeysched+32(SB), X2
1310 PXOR runtime·aeskeysched+48(SB), X3
1311 PXOR runtime·aeskeysched+64(SB), X4
1312 PXOR runtime·aeskeysched+80(SB), X5
1313 PXOR runtime·aeskeysched+96(SB), X6
1314 PXOR runtime·aeskeysched+112(SB), X7
1315 AESENC X1, X1
1316 AESENC X2, X2
1317 AESENC X3, X3
1318 AESENC X4, X4
1319 AESENC X5, X5
1320 AESENC X6, X6
1321 AESENC X7, X7
1322
1323 // start with last (possibly overlapping) block
1324 MOVOU -128(AX)(CX*1), X8
1325 MOVOU -112(AX)(CX*1), X9
1326 MOVOU -96(AX)(CX*1), X10
1327 MOVOU -80(AX)(CX*1), X11
1328 MOVOU -64(AX)(CX*1), X12
1329 MOVOU -48(AX)(CX*1), X13
1330 MOVOU -32(AX)(CX*1), X14
1331 MOVOU -16(AX)(CX*1), X15
1332
1333 // xor in seed
1334 PXOR X0, X8
1335 PXOR X1, X9
1336 PXOR X2, X10
1337 PXOR X3, X11
1338 PXOR X4, X12
1339 PXOR X5, X13
1340 PXOR X6, X14
1341 PXOR X7, X15
1342
1343 // compute number of remaining 128-byte blocks
1344 DECQ CX
1345 SHRQ $7, CX
1346
1347aesloop:
1348 // scramble state
1349 AESENC X8, X8
1350 AESENC X9, X9
1351 AESENC X10, X10
1352 AESENC X11, X11
1353 AESENC X12, X12
1354 AESENC X13, X13
1355 AESENC X14, X14
1356 AESENC X15, X15
1357
1358 // scramble state, xor in a block
1359 MOVOU (AX), X0
1360 MOVOU 16(AX), X1
1361 MOVOU 32(AX), X2
1362 MOVOU 48(AX), X3
1363 AESENC X0, X8
1364 AESENC X1, X9
1365 AESENC X2, X10
1366 AESENC X3, X11
1367 MOVOU 64(AX), X4
1368 MOVOU 80(AX), X5
1369 MOVOU 96(AX), X6
1370 MOVOU 112(AX), X7
1371 AESENC X4, X12
1372 AESENC X5, X13
1373 AESENC X6, X14
1374 AESENC X7, X15
1375
1376 ADDQ $128, AX
1377 DECQ CX
1378 JNE aesloop
1379
1380 // 3 more scrambles to finish
1381 AESENC X8, X8
1382 AESENC X9, X9
1383 AESENC X10, X10
1384 AESENC X11, X11
1385 AESENC X12, X12
1386 AESENC X13, X13
1387 AESENC X14, X14
1388 AESENC X15, X15
1389 AESENC X8, X8
1390 AESENC X9, X9
1391 AESENC X10, X10
1392 AESENC X11, X11
1393 AESENC X12, X12
1394 AESENC X13, X13
1395 AESENC X14, X14
1396 AESENC X15, X15
1397 AESENC X8, X8
1398 AESENC X9, X9
1399 AESENC X10, X10
1400 AESENC X11, X11
1401 AESENC X12, X12
1402 AESENC X13, X13
1403 AESENC X14, X14
1404 AESENC X15, X15
1405
1406 PXOR X12, X8
1407 PXOR X13, X9
1408 PXOR X14, X10
1409 PXOR X15, X11
1410 PXOR X10, X8
1411 PXOR X11, X9
1412 PXOR X9, X8
1413#ifdef GOEXPERIMENT_regabig
1414 // X15 must be zero on return
1415 PXOR X15, X15
1416#endif
1417#ifdef GOEXPERIMENT_regabiargs
1418 MOVQ X8, AX // return X8
1419#else
1420 MOVQ X8, (DX)
1421#endif
1422 RET
1423
1424// func memhash32(p unsafe.Pointer, h uintptr) uintptr
1425// ABIInternal for performance.
1426TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1427#ifdef GOEXPERIMENT_regabiargs
1428 // AX = ptr to data
1429 // BX = seed
1430#endif
1431 CMPB runtime·useAeshash(SB), $0
1432 JEQ noaes
1433#ifdef GOEXPERIMENT_regabiargs
1434 MOVQ BX, X0 // X0 = seed
1435#else
1436 MOVQ p+0(FP), AX // ptr to data
1437 MOVQ h+8(FP), X0 // seed
1438#endif
1439 PINSRD $2, (AX), X0 // data
1440 AESENC runtime·aeskeysched+0(SB), X0
1441 AESENC runtime·aeskeysched+16(SB), X0
1442 AESENC runtime·aeskeysched+32(SB), X0
1443#ifdef GOEXPERIMENT_regabiargs
1444 MOVQ X0, AX // return X0
1445#else
1446 MOVQ X0, ret+16(FP)
1447#endif
1448 RET
1449noaes:
1450 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1451
1452// func memhash64(p unsafe.Pointer, h uintptr) uintptr
1453// ABIInternal for performance.
1454TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1455#ifdef GOEXPERIMENT_regabiargs
1456 // AX = ptr to data
1457 // BX = seed
1458#else
1459#endif
1460 CMPB runtime·useAeshash(SB), $0
1461 JEQ noaes
1462#ifdef GOEXPERIMENT_regabiargs
1463 MOVQ BX, X0 // X0 = seed
1464#else
1465 MOVQ p+0(FP), AX // ptr to data
1466 MOVQ h+8(FP), X0 // seed
1467#endif
1468 PINSRQ $1, (AX), X0 // data
1469 AESENC runtime·aeskeysched+0(SB), X0
1470 AESENC runtime·aeskeysched+16(SB), X0
1471 AESENC runtime·aeskeysched+32(SB), X0
1472#ifdef GOEXPERIMENT_regabiargs
1473 MOVQ X0, AX // return X0
1474#else
1475 MOVQ X0, ret+16(FP)
1476#endif
1477 RET
1478noaes:
1479 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1480
1481// simple mask to get rid of data in the high part of the register.
1482DATA masks<>+0x00(SB)/8, $0x0000000000000000
1483DATA masks<>+0x08(SB)/8, $0x0000000000000000
1484DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1485DATA masks<>+0x18(SB)/8, $0x0000000000000000
1486DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1487DATA masks<>+0x28(SB)/8, $0x0000000000000000
1488DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1489DATA masks<>+0x38(SB)/8, $0x0000000000000000
1490DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1491DATA masks<>+0x48(SB)/8, $0x0000000000000000
1492DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1493DATA masks<>+0x58(SB)/8, $0x0000000000000000
1494DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1495DATA masks<>+0x68(SB)/8, $0x0000000000000000
1496DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1497DATA masks<>+0x78(SB)/8, $0x0000000000000000
1498DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1499DATA masks<>+0x88(SB)/8, $0x0000000000000000
1500DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1501DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1502DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1503DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1504DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1505DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1506DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1507DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1508DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1509DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1510DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1511DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1512DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1513DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1514GLOBL masks<>(SB),RODATA,$256
1515
1516// func checkASM() bool
1517TEXT ·checkASM(SB),NOSPLIT,$0-1
1518 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1519 MOVQ $masks<>(SB), AX
1520 MOVQ $shifts<>(SB), BX
1521 ORQ BX, AX
1522 TESTQ $15, AX
1523 SETEQ ret+0(FP)
1524 RET
1525
1526// these are arguments to pshufb. They move data down from
1527// the high bytes of the register to the low bytes of the register.
1528// index is how many bytes to move.
1529DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1530DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1531DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1532DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1533DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1534DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1535DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1536DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1537DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1538DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1539DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1540DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1541DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1542DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1543DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1544DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1545DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1546DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1547DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1548DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1549DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1550DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1551DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1552DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1553DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1554DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1555DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1556DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1557DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1558DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1559DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1560DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1561GLOBL shifts<>(SB),RODATA,$256
1562
1563TEXT runtime·return0(SB), NOSPLIT, $0
1564 MOVL $0, AX
1565 RET
1566
1567
1568// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1569// Must obey the gcc calling convention.
1570TEXT _cgo_topofstack(SB),NOSPLIT,$0
1571 get_tls(CX)
1572 MOVQ g(CX), AX
1573 MOVQ g_m(AX), AX
1574 MOVQ m_curg(AX), AX
1575 MOVQ (g_stack+stack_hi)(AX), AX
1576 RET
1577
1578// The top-most function running on a goroutine
1579// returns to goexit+PCQuantum.
1580TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
1581 BYTE $0x90 // NOP
1582 CALL runtime·goexit1(SB) // does not return
1583 // traceback from goexit1 must hit code range of goexit
1584 BYTE $0x90 // NOP
1585
1586// This is called from .init_array and follows the platform, not Go, ABI.
1587TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1588 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1589 MOVQ runtime·lastmoduledatap(SB), AX
1590 MOVQ DI, moduledata_next(AX)
1591 MOVQ DI, runtime·lastmoduledatap(SB)
1592 POPQ R15
1593 RET
1594
1595// Initialize special registers then jump to sigpanic.
1596// This function is injected from the signal handler for panicking
1597// signals. It is quite painful to set X15 in the signal context,
1598// so we do it here.
1599TEXT ·sigpanic0<ABIInternal>(SB),NOSPLIT,$0-0
1600#ifdef GOEXPERIMENT_regabig
1601 get_tls(R14)
1602 MOVQ g(R14), R14
1603 XORPS X15, X15
1604#endif
1605 JMP ·sigpanic<ABIInternal>(SB)
1606
1607// gcWriteBarrier performs a heap pointer write and informs the GC.
1608//
1609// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
1610// - DI is the destination of the write
1611// - AX is the value being written at DI
1612// It clobbers FLAGS. It does not clobber any general-purpose registers,
1613// but may clobber others (e.g., SSE registers).
1614// Defined as ABIInternal since it does not use the stack-based Go ABI.
1615TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
1616 // Save the registers clobbered by the fast path. This is slightly
1617 // faster than having the caller spill these.
1618 MOVQ R12, 96(SP)
1619 MOVQ R13, 104(SP)
1620 // TODO: Consider passing g.m.p in as an argument so they can be shared
1621 // across a sequence of write barriers.
1622#ifdef GOEXPERIMENT_regabig
1623 MOVQ g_m(R14), R13
1624#else
1625 get_tls(R13)
1626 MOVQ g(R13), R13
1627 MOVQ g_m(R13), R13
1628#endif
1629 MOVQ m_p(R13), R13
1630 MOVQ (p_wbBuf+wbBuf_next)(R13), R12
1631 // Increment wbBuf.next position.
1632 LEAQ 16(R12), R12
1633 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1634 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1635 // Record the write.
1636 MOVQ AX, -16(R12) // Record value
1637 // Note: This turns bad pointer writes into bad
1638 // pointer reads, which could be confusing. We could avoid
1639 // reading from obviously bad pointers, which would
1640 // take care of the vast majority of these. We could
1641 // patch this up in the signal handler, or use XCHG to
1642 // combine the read and the write.
1643 MOVQ (DI), R13
1644 MOVQ R13, -8(R12) // Record *slot
1645 // Is the buffer full? (flags set in CMPQ above)
1646 JEQ flush
1647ret:
1648 MOVQ 96(SP), R12
1649 MOVQ 104(SP), R13
1650 // Do the write.
1651 MOVQ AX, (DI)
1652 RET
1653
1654flush:
1655 // Save all general purpose registers since these could be
1656 // clobbered by wbBufFlush and were not saved by the caller.
1657 // It is possible for wbBufFlush to clobber other registers
1658 // (e.g., SSE registers), but the compiler takes care of saving
1659 // those in the caller if necessary. This strikes a balance
1660 // with registers that are likely to be used.
1661 //
1662 // We don't have type information for these, but all code under
1663 // here is NOSPLIT, so nothing will observe these.
1664 //
1665 // TODO: We could strike a different balance; e.g., saving X0
1666 // and not saving GP registers that are less likely to be used.
1667 MOVQ DI, 0(SP) // Also first argument to wbBufFlush
1668 MOVQ AX, 8(SP) // Also second argument to wbBufFlush
1669 MOVQ BX, 16(SP)
1670 MOVQ CX, 24(SP)
1671 MOVQ DX, 32(SP)
1672 // DI already saved
1673 MOVQ SI, 40(SP)
1674 MOVQ BP, 48(SP)
1675 MOVQ R8, 56(SP)
1676 MOVQ R9, 64(SP)
1677 MOVQ R10, 72(SP)
1678 MOVQ R11, 80(SP)
1679 // R12 already saved
1680 // R13 already saved
1681 // R14 is g
1682 MOVQ R15, 88(SP)
1683
1684 // This takes arguments DI and AX
1685 CALL runtime·wbBufFlush(SB)
1686
1687 MOVQ 0(SP), DI
1688 MOVQ 8(SP), AX
1689 MOVQ 16(SP), BX
1690 MOVQ 24(SP), CX
1691 MOVQ 32(SP), DX
1692 MOVQ 40(SP), SI
1693 MOVQ 48(SP), BP
1694 MOVQ 56(SP), R8
1695 MOVQ 64(SP), R9
1696 MOVQ 72(SP), R10
1697 MOVQ 80(SP), R11
1698 MOVQ 88(SP), R15
1699 JMP ret
1700
1701// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
1702// Defined as ABIInternal since it does not use the stable Go ABI.
1703TEXT runtime·gcWriteBarrierCX<ABIInternal>(SB),NOSPLIT,$0
1704 XCHGQ CX, AX
1705 CALL runtime·gcWriteBarrier<ABIInternal>(SB)
1706 XCHGQ CX, AX
1707 RET
1708
1709// gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX.
1710// Defined as ABIInternal since it does not use the stable Go ABI.
1711TEXT runtime·gcWriteBarrierDX<ABIInternal>(SB),NOSPLIT,$0
1712 XCHGQ DX, AX
1713 CALL runtime·gcWriteBarrier<ABIInternal>(SB)
1714 XCHGQ DX, AX
1715 RET
1716
1717// gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX.
1718// Defined as ABIInternal since it does not use the stable Go ABI.
1719TEXT runtime·gcWriteBarrierBX<ABIInternal>(SB),NOSPLIT,$0
1720 XCHGQ BX, AX
1721 CALL runtime·gcWriteBarrier<ABIInternal>(SB)
1722 XCHGQ BX, AX
1723 RET
1724
1725// gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP.
1726// Defined as ABIInternal since it does not use the stable Go ABI.
1727TEXT runtime·gcWriteBarrierBP<ABIInternal>(SB),NOSPLIT,$0
1728 XCHGQ BP, AX
1729 CALL runtime·gcWriteBarrier<ABIInternal>(SB)
1730 XCHGQ BP, AX
1731 RET
1732
1733// gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI.
1734// Defined as ABIInternal since it does not use the stable Go ABI.
1735TEXT runtime·gcWriteBarrierSI<ABIInternal>(SB),NOSPLIT,$0
1736 XCHGQ SI, AX
1737 CALL runtime·gcWriteBarrier<ABIInternal>(SB)
1738 XCHGQ SI, AX
1739 RET
1740
1741// gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8.
1742// Defined as ABIInternal since it does not use the stable Go ABI.
1743TEXT runtime·gcWriteBarrierR8<ABIInternal>(SB),NOSPLIT,$0
1744 XCHGQ R8, AX
1745 CALL runtime·gcWriteBarrier<ABIInternal>(SB)
1746 XCHGQ R8, AX
1747 RET
1748
1749// gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9.
1750// Defined as ABIInternal since it does not use the stable Go ABI.
1751TEXT runtime·gcWriteBarrierR9<ABIInternal>(SB),NOSPLIT,$0
1752 XCHGQ R9, AX
1753 CALL runtime·gcWriteBarrier<ABIInternal>(SB)
1754 XCHGQ R9, AX
1755 RET
1756
1757DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1758GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1759
1760// debugCallV2 is the entry point for debugger-injected function
1761// calls on running goroutines. It informs the runtime that a
1762// debug call has been injected and creates a call frame for the
1763// debugger to fill in.
1764//
1765// To inject a function call, a debugger should:
1766// 1. Check that the goroutine is in state _Grunning and that
1767// there are at least 256 bytes free on the stack.
1768// 2. Push the current PC on the stack (updating SP).
1769// 3. Write the desired argument frame size at SP-16 (using the SP
1770// after step 2).
1771// 4. Save all machine registers (including flags and XMM reigsters)
1772// so they can be restored later by the debugger.
1773// 5. Set the PC to debugCallV2 and resume execution.
1774//
1775// If the goroutine is in state _Grunnable, then it's not generally
1776// safe to inject a call because it may return out via other runtime
1777// operations. Instead, the debugger should unwind the stack to find
1778// the return to non-runtime code, add a temporary breakpoint there,
1779// and inject the call once that breakpoint is hit.
1780//
1781// If the goroutine is in any other state, it's not safe to inject a call.
1782//
1783// This function communicates back to the debugger by setting R12 and
1784// invoking INT3 to raise a breakpoint signal. See the comments in the
1785// implementation for the protocol the debugger is expected to
1786// follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1787//
1788// The debugger must ensure that any pointers passed to the function
1789// obey escape analysis requirements. Specifically, it must not pass
1790// a stack pointer to an escaping argument. debugCallV2 cannot check
1791// this invariant.
1792//
1793// This is ABIInternal because Go code injects its PC directly into new
1794// goroutine stacks.
1795TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1796 // Save all registers that may contain pointers so they can be
1797 // conservatively scanned.
1798 //
1799 // We can't do anything that might clobber any of these
1800 // registers before this.
1801 MOVQ R15, r15-(14*8+8)(SP)
1802 MOVQ R14, r14-(13*8+8)(SP)
1803 MOVQ R13, r13-(12*8+8)(SP)
1804 MOVQ R12, r12-(11*8+8)(SP)
1805 MOVQ R11, r11-(10*8+8)(SP)
1806 MOVQ R10, r10-(9*8+8)(SP)
1807 MOVQ R9, r9-(8*8+8)(SP)
1808 MOVQ R8, r8-(7*8+8)(SP)
1809 MOVQ DI, di-(6*8+8)(SP)
1810 MOVQ SI, si-(5*8+8)(SP)
1811 MOVQ BP, bp-(4*8+8)(SP)
1812 MOVQ BX, bx-(3*8+8)(SP)
1813 MOVQ DX, dx-(2*8+8)(SP)
1814 // Save the frame size before we clobber it. Either of the last
1815 // saves could clobber this depending on whether there's a saved BP.
1816 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1817 MOVQ CX, cx-(1*8+8)(SP)
1818 MOVQ AX, ax-(0*8+8)(SP)
1819
1820 // Save the argument frame size.
1821 MOVQ DX, frameSize-128(SP)
1822
1823 // Perform a safe-point check.
1824 MOVQ retpc-8(FP), AX // Caller's PC
1825 MOVQ AX, 0(SP)
1826 CALL runtime·debugCallCheck(SB)
1827 MOVQ 8(SP), AX
1828 TESTQ AX, AX
1829 JZ good
1830 // The safety check failed. Put the reason string at the top
1831 // of the stack.
1832 MOVQ AX, 0(SP)
1833 MOVQ 16(SP), AX
1834 MOVQ AX, 8(SP)
1835 // Set R12 to 8 and invoke INT3. The debugger should get the
1836 // reason a call can't be injected from the top of the stack
1837 // and resume execution.
1838 MOVQ $8, R12
1839 BYTE $0xcc
1840 JMP restore
1841
1842good:
1843 // Registers are saved and it's safe to make a call.
1844 // Open up a call frame, moving the stack if necessary.
1845 //
1846 // Once the frame is allocated, this will set R12 to 0 and
1847 // invoke INT3. The debugger should write the argument
1848 // frame for the call at SP, set up argument registers, push
1849 // the trapping PC on the stack, set the PC to the function to
1850 // call, set RDX to point to the closure (if a closure call),
1851 // and resume execution.
1852 //
1853 // If the function returns, this will set R12 to 1 and invoke
1854 // INT3. The debugger can then inspect any return value saved
1855 // on the stack at SP and in registers and resume execution again.
1856 //
1857 // If the function panics, this will set R12 to 2 and invoke INT3.
1858 // The interface{} value of the panic will be at SP. The debugger
1859 // can inspect the panic value and resume execution again.
1860#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1861 CMPQ AX, $MAXSIZE; \
1862 JA 5(PC); \
1863 MOVQ $NAME(SB), AX; \
1864 MOVQ AX, 0(SP); \
1865 CALL runtime·debugCallWrap(SB); \
1866 JMP restore
1867
1868 MOVQ frameSize-128(SP), AX
1869 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1870 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1871 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1872 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1873 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1874 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1875 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1876 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1877 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1878 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1879 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1880 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1881 // The frame size is too large. Report the error.
1882 MOVQ $debugCallFrameTooLarge<>(SB), AX
1883 MOVQ AX, 0(SP)
1884 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
1885 MOVQ $8, R12
1886 BYTE $0xcc
1887 JMP restore
1888
1889restore:
1890 // Calls and failures resume here.
1891 //
1892 // Set R12 to 16 and invoke INT3. The debugger should restore
1893 // all registers except RIP and RSP and resume execution.
1894 MOVQ $16, R12
1895 BYTE $0xcc
1896 // We must not modify flags after this point.
1897
1898 // Restore pointer-containing registers, which may have been
1899 // modified from the debugger's copy by stack copying.
1900 MOVQ ax-(0*8+8)(SP), AX
1901 MOVQ cx-(1*8+8)(SP), CX
1902 MOVQ dx-(2*8+8)(SP), DX
1903 MOVQ bx-(3*8+8)(SP), BX
1904 MOVQ bp-(4*8+8)(SP), BP
1905 MOVQ si-(5*8+8)(SP), SI
1906 MOVQ di-(6*8+8)(SP), DI
1907 MOVQ r8-(7*8+8)(SP), R8
1908 MOVQ r9-(8*8+8)(SP), R9
1909 MOVQ r10-(9*8+8)(SP), R10
1910 MOVQ r11-(10*8+8)(SP), R11
1911 MOVQ r12-(11*8+8)(SP), R12
1912 MOVQ r13-(12*8+8)(SP), R13
1913 MOVQ r14-(13*8+8)(SP), R14
1914 MOVQ r15-(14*8+8)(SP), R15
1915
1916 RET
1917
1918// runtime.debugCallCheck assumes that functions defined with the
1919// DEBUG_CALL_FN macro are safe points to inject calls.
1920#define DEBUG_CALL_FN(NAME,MAXSIZE) \
1921TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1922 NO_LOCAL_POINTERS; \
1923 MOVQ $0, R12; \
1924 BYTE $0xcc; \
1925 MOVQ $1, R12; \
1926 BYTE $0xcc; \
1927 RET
1928DEBUG_CALL_FN(debugCall32<>, 32)
1929DEBUG_CALL_FN(debugCall64<>, 64)
1930DEBUG_CALL_FN(debugCall128<>, 128)
1931DEBUG_CALL_FN(debugCall256<>, 256)
1932DEBUG_CALL_FN(debugCall512<>, 512)
1933DEBUG_CALL_FN(debugCall1024<>, 1024)
1934DEBUG_CALL_FN(debugCall2048<>, 2048)
1935DEBUG_CALL_FN(debugCall4096<>, 4096)
1936DEBUG_CALL_FN(debugCall8192<>, 8192)
1937DEBUG_CALL_FN(debugCall16384<>, 16384)
1938DEBUG_CALL_FN(debugCall32768<>, 32768)
1939DEBUG_CALL_FN(debugCall65536<>, 65536)
1940
1941// func debugCallPanicked(val interface{})
1942TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1943 // Copy the panic value to the top of stack.
1944 MOVQ val_type+0(FP), AX
1945 MOVQ AX, 0(SP)
1946 MOVQ val_data+8(FP), AX
1947 MOVQ AX, 8(SP)
1948 MOVQ $2, R12
1949 BYTE $0xcc
1950 RET
1951
1952// Note: these functions use a special calling convention to save generated code space.
1953// Arguments are passed in registers, but the space for those arguments are allocated
1954// in the caller's stack frame. These stubs write the args into that stack space and
1955// then tail call to the corresponding runtime handler.
1956// The tail call makes these stubs disappear in backtraces.
1957// Defined as ABIInternal since they do not use the stack-based Go ABI.
1958TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
1959#ifdef GOEXPERIMENT_regabiargs
1960 MOVQ CX, BX
1961#else
1962 MOVQ AX, x+0(FP)
1963 MOVQ CX, y+8(FP)
1964#endif
1965 JMP runtime·goPanicIndex<ABIInternal>(SB)
1966TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
1967#ifdef GOEXPERIMENT_regabiargs
1968 MOVQ CX, BX
1969#else
1970 MOVQ AX, x+0(FP)
1971 MOVQ CX, y+8(FP)
1972#endif
1973 JMP runtime·goPanicIndexU<ABIInternal>(SB)
1974TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
1975#ifdef GOEXPERIMENT_regabiargs
1976 MOVQ CX, AX
1977 MOVQ DX, BX
1978#else
1979 MOVQ CX, x+0(FP)
1980 MOVQ DX, y+8(FP)
1981#endif
1982 JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
1983TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
1984#ifdef GOEXPERIMENT_regabiargs
1985 MOVQ CX, AX
1986 MOVQ DX, BX
1987#else
1988 MOVQ CX, x+0(FP)
1989 MOVQ DX, y+8(FP)
1990#endif
1991 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
1992TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
1993#ifdef GOEXPERIMENT_regabiargs
1994 MOVQ CX, AX
1995 MOVQ DX, BX
1996#else
1997 MOVQ CX, x+0(FP)
1998 MOVQ DX, y+8(FP)
1999#endif
2000 JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
2001TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
2002#ifdef GOEXPERIMENT_regabiargs
2003 MOVQ CX, AX
2004 MOVQ DX, BX
2005#else
2006 MOVQ CX, x+0(FP)
2007 MOVQ DX, y+8(FP)
2008#endif
2009 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
2010TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
2011#ifdef GOEXPERIMENT_regabiargs
2012 MOVQ CX, BX
2013#else
2014 MOVQ AX, x+0(FP)
2015 MOVQ CX, y+8(FP)
2016#endif
2017 JMP runtime·goPanicSliceB<ABIInternal>(SB)
2018TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
2019#ifdef GOEXPERIMENT_regabiargs
2020 MOVQ CX, BX
2021#else
2022 MOVQ AX, x+0(FP)
2023 MOVQ CX, y+8(FP)
2024#endif
2025 JMP runtime·goPanicSliceBU<ABIInternal>(SB)
2026TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
2027#ifdef GOEXPERIMENT_regabiargs
2028 MOVQ DX, AX
2029#else
2030 MOVQ DX, x+0(FP)
2031 MOVQ BX, y+8(FP)
2032#endif
2033 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
2034TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
2035#ifdef GOEXPERIMENT_regabiargs
2036 MOVQ DX, AX
2037#else
2038 MOVQ DX, x+0(FP)
2039 MOVQ BX, y+8(FP)
2040#endif
2041 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
2042TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
2043#ifdef GOEXPERIMENT_regabiargs
2044 MOVQ DX, AX
2045#else
2046 MOVQ DX, x+0(FP)
2047 MOVQ BX, y+8(FP)
2048#endif
2049 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
2050TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
2051#ifdef GOEXPERIMENT_regabiargs
2052 MOVQ DX, AX
2053#else
2054 MOVQ DX, x+0(FP)
2055 MOVQ BX, y+8(FP)
2056#endif
2057 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
2058TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
2059#ifdef GOEXPERIMENT_regabiargs
2060 MOVQ CX, AX
2061 MOVQ DX, BX
2062#else
2063 MOVQ CX, x+0(FP)
2064 MOVQ DX, y+8(FP)
2065#endif
2066 JMP runtime·goPanicSlice3B<ABIInternal>(SB)
2067TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
2068#ifdef GOEXPERIMENT_regabiargs
2069 MOVQ CX, AX
2070 MOVQ DX, BX
2071#else
2072 MOVQ CX, x+0(FP)
2073 MOVQ DX, y+8(FP)
2074#endif
2075 JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
2076TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
2077#ifdef GOEXPERIMENT_regabiargs
2078 MOVQ CX, BX
2079#else
2080 MOVQ AX, x+0(FP)
2081 MOVQ CX, y+8(FP)
2082#endif
2083 JMP runtime·goPanicSlice3C<ABIInternal>(SB)
2084TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
2085#ifdef GOEXPERIMENT_regabiargs
2086 MOVQ CX, BX
2087#else
2088 MOVQ AX, x+0(FP)
2089 MOVQ CX, y+8(FP)
2090#endif
2091 JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
2092TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
2093#ifdef GOEXPERIMENT_regabiargs
2094 MOVQ DX, AX
2095#else
2096 MOVQ DX, x+0(FP)
2097 MOVQ BX, y+8(FP)
2098#endif
2099 JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
2100
2101#ifdef GOOS_android
2102// Use the free TLS_SLOT_APP slot #2 on Android Q.
2103// Earlier androids are set up in gcc_android.c.
2104DATA runtime·tls_g+0(SB)/8, $16
2105GLOBL runtime·tls_g+0(SB), NOPTR, $8
2106#endif
2107
2108// The compiler and assembler's -spectre=ret mode rewrites
2109// all indirect CALL AX / JMP AX instructions to be
2110// CALL retpolineAX / JMP retpolineAX.
2111// See https://support.google.com/faqs/answer/7625886.
2112#define RETPOLINE(reg) \
2113 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2114 /* nospec: */ \
2115 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2116 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2117 /* setup: */ \
2118 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2119 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2120 /* RET */ BYTE $0xC3
2121
2122TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0)
2123TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1)
2124TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2)
2125TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3)
2126/* SP is 4, can't happen / magic encodings */
2127TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5)
2128TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6)
2129TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7)
2130TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8)
2131TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9)
2132TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10)
2133TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11)
2134TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12)
2135TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13)
2136TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14)
2137TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)
View as plain text