1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "textflag.h"
6
7// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
8TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
9 MOVQ nr+0(FP), CX
10 MOVQ xk+8(FP), AX
11 MOVQ dst+16(FP), DX
12 MOVQ src+24(FP), BX
13 MOVUPS 0(AX), X1
14 MOVUPS 0(BX), X0
15 ADDQ $16, AX
16 PXOR X1, X0
17 SUBQ $12, CX
18 JE Lenc196
19 JB Lenc128
20Lenc256:
21 MOVUPS 0(AX), X1
22 AESENC X1, X0
23 MOVUPS 16(AX), X1
24 AESENC X1, X0
25 ADDQ $32, AX
26Lenc196:
27 MOVUPS 0(AX), X1
28 AESENC X1, X0
29 MOVUPS 16(AX), X1
30 AESENC X1, X0
31 ADDQ $32, AX
32Lenc128:
33 MOVUPS 0(AX), X1
34 AESENC X1, X0
35 MOVUPS 16(AX), X1
36 AESENC X1, X0
37 MOVUPS 32(AX), X1
38 AESENC X1, X0
39 MOVUPS 48(AX), X1
40 AESENC X1, X0
41 MOVUPS 64(AX), X1
42 AESENC X1, X0
43 MOVUPS 80(AX), X1
44 AESENC X1, X0
45 MOVUPS 96(AX), X1
46 AESENC X1, X0
47 MOVUPS 112(AX), X1
48 AESENC X1, X0
49 MOVUPS 128(AX), X1
50 AESENC X1, X0
51 MOVUPS 144(AX), X1
52 AESENCLAST X1, X0
53 MOVUPS X0, 0(DX)
54 RET
55
56// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
57TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
58 MOVQ nr+0(FP), CX
59 MOVQ xk+8(FP), AX
60 MOVQ dst+16(FP), DX
61 MOVQ src+24(FP), BX
62 MOVUPS 0(AX), X1
63 MOVUPS 0(BX), X0
64 ADDQ $16, AX
65 PXOR X1, X0
66 SUBQ $12, CX
67 JE Ldec196
68 JB Ldec128
69Ldec256:
70 MOVUPS 0(AX), X1
71 AESDEC X1, X0
72 MOVUPS 16(AX), X1
73 AESDEC X1, X0
74 ADDQ $32, AX
75Ldec196:
76 MOVUPS 0(AX), X1
77 AESDEC X1, X0
78 MOVUPS 16(AX), X1
79 AESDEC X1, X0
80 ADDQ $32, AX
81Ldec128:
82 MOVUPS 0(AX), X1
83 AESDEC X1, X0
84 MOVUPS 16(AX), X1
85 AESDEC X1, X0
86 MOVUPS 32(AX), X1
87 AESDEC X1, X0
88 MOVUPS 48(AX), X1
89 AESDEC X1, X0
90 MOVUPS 64(AX), X1
91 AESDEC X1, X0
92 MOVUPS 80(AX), X1
93 AESDEC X1, X0
94 MOVUPS 96(AX), X1
95 AESDEC X1, X0
96 MOVUPS 112(AX), X1
97 AESDEC X1, X0
98 MOVUPS 128(AX), X1
99 AESDEC X1, X0
100 MOVUPS 144(AX), X1
101 AESDECLAST X1, X0
102 MOVUPS X0, 0(DX)
103 RET
104
105// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
106// Note that round keys are stored in uint128 format, not uint32
107TEXT ·expandKeyAsm(SB),NOSPLIT,$0
108 MOVQ nr+0(FP), CX
109 MOVQ key+8(FP), AX
110 MOVQ enc+16(FP), BX
111 MOVQ dec+24(FP), DX
112 MOVUPS (AX), X0
113 // enc
114 MOVUPS X0, (BX)
115 ADDQ $16, BX
116 PXOR X4, X4 // _expand_key_* expect X4 to be zero
117 CMPL CX, $12
118 JE Lexp_enc196
119 JB Lexp_enc128
120Lexp_enc256:
121 MOVUPS 16(AX), X2
122 MOVUPS X2, (BX)
123 ADDQ $16, BX
124 AESKEYGENASSIST $0x01, X2, X1
125 CALL _expand_key_256a<>(SB)
126 AESKEYGENASSIST $0x01, X0, X1
127 CALL _expand_key_256b<>(SB)
128 AESKEYGENASSIST $0x02, X2, X1
129 CALL _expand_key_256a<>(SB)
130 AESKEYGENASSIST $0x02, X0, X1
131 CALL _expand_key_256b<>(SB)
132 AESKEYGENASSIST $0x04, X2, X1
133 CALL _expand_key_256a<>(SB)
134 AESKEYGENASSIST $0x04, X0, X1
135 CALL _expand_key_256b<>(SB)
136 AESKEYGENASSIST $0x08, X2, X1
137 CALL _expand_key_256a<>(SB)
138 AESKEYGENASSIST $0x08, X0, X1
139 CALL _expand_key_256b<>(SB)
140 AESKEYGENASSIST $0x10, X2, X1
141 CALL _expand_key_256a<>(SB)
142 AESKEYGENASSIST $0x10, X0, X1
143 CALL _expand_key_256b<>(SB)
144 AESKEYGENASSIST $0x20, X2, X1
145 CALL _expand_key_256a<>(SB)
146 AESKEYGENASSIST $0x20, X0, X1
147 CALL _expand_key_256b<>(SB)
148 AESKEYGENASSIST $0x40, X2, X1
149 CALL _expand_key_256a<>(SB)
150 JMP Lexp_dec
151Lexp_enc196:
152 MOVQ 16(AX), X2
153 AESKEYGENASSIST $0x01, X2, X1
154 CALL _expand_key_192a<>(SB)
155 AESKEYGENASSIST $0x02, X2, X1
156 CALL _expand_key_192b<>(SB)
157 AESKEYGENASSIST $0x04, X2, X1
158 CALL _expand_key_192a<>(SB)
159 AESKEYGENASSIST $0x08, X2, X1
160 CALL _expand_key_192b<>(SB)
161 AESKEYGENASSIST $0x10, X2, X1
162 CALL _expand_key_192a<>(SB)
163 AESKEYGENASSIST $0x20, X2, X1
164 CALL _expand_key_192b<>(SB)
165 AESKEYGENASSIST $0x40, X2, X1
166 CALL _expand_key_192a<>(SB)
167 AESKEYGENASSIST $0x80, X2, X1
168 CALL _expand_key_192b<>(SB)
169 JMP Lexp_dec
170Lexp_enc128:
171 AESKEYGENASSIST $0x01, X0, X1
172 CALL _expand_key_128<>(SB)
173 AESKEYGENASSIST $0x02, X0, X1
174 CALL _expand_key_128<>(SB)
175 AESKEYGENASSIST $0x04, X0, X1
176 CALL _expand_key_128<>(SB)
177 AESKEYGENASSIST $0x08, X0, X1
178 CALL _expand_key_128<>(SB)
179 AESKEYGENASSIST $0x10, X0, X1
180 CALL _expand_key_128<>(SB)
181 AESKEYGENASSIST $0x20, X0, X1
182 CALL _expand_key_128<>(SB)
183 AESKEYGENASSIST $0x40, X0, X1
184 CALL _expand_key_128<>(SB)
185 AESKEYGENASSIST $0x80, X0, X1
186 CALL _expand_key_128<>(SB)
187 AESKEYGENASSIST $0x1b, X0, X1
188 CALL _expand_key_128<>(SB)
189 AESKEYGENASSIST $0x36, X0, X1
190 CALL _expand_key_128<>(SB)
191Lexp_dec:
192 // dec
193 SUBQ $16, BX
194 MOVUPS (BX), X1
195 MOVUPS X1, (DX)
196 DECQ CX
197Lexp_dec_loop:
198 MOVUPS -16(BX), X1
199 AESIMC X1, X0
200 MOVUPS X0, 16(DX)
201 SUBQ $16, BX
202 ADDQ $16, DX
203 DECQ CX
204 JNZ Lexp_dec_loop
205 MOVUPS -16(BX), X0
206 MOVUPS X0, 16(DX)
207 RET
208
209TEXT _expand_key_128<>(SB),NOSPLIT,$0
210 PSHUFD $0xff, X1, X1
211 SHUFPS $0x10, X0, X4
212 PXOR X4, X0
213 SHUFPS $0x8c, X0, X4
214 PXOR X4, X0
215 PXOR X1, X0
216 MOVUPS X0, (BX)
217 ADDQ $16, BX
218 RET
219
220TEXT _expand_key_192a<>(SB),NOSPLIT,$0
221 PSHUFD $0x55, X1, X1
222 SHUFPS $0x10, X0, X4
223 PXOR X4, X0
224 SHUFPS $0x8c, X0, X4
225 PXOR X4, X0
226 PXOR X1, X0
227
228 MOVAPS X2, X5
229 MOVAPS X2, X6
230 PSLLDQ $0x4, X5
231 PSHUFD $0xff, X0, X3
232 PXOR X3, X2
233 PXOR X5, X2
234
235 MOVAPS X0, X1
236 SHUFPS $0x44, X0, X6
237 MOVUPS X6, (BX)
238 SHUFPS $0x4e, X2, X1
239 MOVUPS X1, 16(BX)
240 ADDQ $32, BX
241 RET
242
243TEXT _expand_key_192b<>(SB),NOSPLIT,$0
244 PSHUFD $0x55, X1, X1
245 SHUFPS $0x10, X0, X4
246 PXOR X4, X0
247 SHUFPS $0x8c, X0, X4
248 PXOR X4, X0
249 PXOR X1, X0
250
251 MOVAPS X2, X5
252 PSLLDQ $0x4, X5
253 PSHUFD $0xff, X0, X3
254 PXOR X3, X2
255 PXOR X5, X2
256
257 MOVUPS X0, (BX)
258 ADDQ $16, BX
259 RET
260
261TEXT _expand_key_256a<>(SB),NOSPLIT,$0
262 JMP _expand_key_128<>(SB)
263
264TEXT _expand_key_256b<>(SB),NOSPLIT,$0
265 PSHUFD $0xaa, X1, X1
266 SHUFPS $0x10, X2, X4
267 PXOR X4, X2
268 SHUFPS $0x8c, X2, X4
269 PXOR X4, X2
270 PXOR X1, X2
271
272 MOVUPS X2, (BX)
273 ADDQ $16, BX
274 RET
View as plain text