Browse code

Merge pull request #40429 from thaJeztah/bump_golang_1.13.7

Update Golang 1.13.7, golang.org/x/crypto (CVE-2020-0601, CVE-2020-7919)

Akihiro Suda authored on 2020/02/04 02:09:20
Showing 34 changed files
... ...
@@ -1,7 +1,7 @@
1 1
 # syntax=docker/dockerfile:1.1.3-experimental
2 2
 
3 3
 ARG CROSS="false"
4
-ARG GO_VERSION=1.13.6
4
+ARG GO_VERSION=1.13.7
5 5
 ARG DEBIAN_FRONTEND=noninteractive
6 6
 ARG VPNKIT_DIGEST=e508a17cfacc8fd39261d5b4e397df2b953690da577e2c987a47630cd0c42f8e
7 7
 ARG DOCKER_BUILDTAGS="apparmor seccomp selinux"
... ...
@@ -1,4 +1,4 @@
1
-ARG GO_VERSION=1.13.6
1
+ARG GO_VERSION=1.13.7
2 2
 
3 3
 FROM golang:${GO_VERSION}-alpine AS base
4 4
 ENV GO111MODULE=off
... ...
@@ -5,7 +5,7 @@
5 5
 
6 6
 # This represents the bare minimum required to build and test Docker.
7 7
 
8
-ARG GO_VERSION=1.13.6
8
+ARG GO_VERSION=1.13.7
9 9
 
10 10
 FROM golang:${GO_VERSION}-stretch
11 11
 ENV GO111MODULE=off
... ...
@@ -165,7 +165,7 @@ FROM microsoft/windowsservercore
165 165
 # Use PowerShell as the default shell
166 166
 SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
167 167
 
168
-ARG GO_VERSION=1.13.6
168
+ARG GO_VERSION=1.13.7
169 169
 ARG GOTESTSUM_COMMIT=v0.3.5
170 170
 
171 171
 # Environment variable notes:
... ...
@@ -134,7 +134,7 @@ github.com/golang/protobuf                          aa810b61a9c79d51363740d207bb
134 134
 github.com/cloudflare/cfssl                         5d63dbd981b5c408effbb58c442d54761ff94fbd # 1.3.2
135 135
 github.com/fernet/fernet-go                         9eac43b88a5efb8651d24de9b68e87567e029736
136 136
 github.com/google/certificate-transparency-go       37a384cd035e722ea46e55029093e26687138edf # v1.0.20
137
-golang.org/x/crypto                                 88737f569e3a9c7ab309cdc09a07fe7fc87233c3
137
+golang.org/x/crypto                                 69ecbb4d6d5dab05e49161c6e77ea40a030884e1
138 138
 golang.org/x/time                                   fbb02b2291d28baffd63558aa44b4b56f178d650
139 139
 github.com/hashicorp/go-memdb                       cb9a474f84cc5e41b273b20c6927680b2a8776ad
140 140
 github.com/hashicorp/go-immutable-radix             826af9ccf0feeee615d546d69b11f8e98da8c8f1 git://github.com/tonistiigi/go-immutable-radix.git
... ...
@@ -470,7 +470,8 @@ func (s *String) ReadASN1GeneralizedTime(out *time.Time) bool {
470 470
 // It reports whether the read was successful.
471 471
 func (s *String) ReadASN1BitString(out *encoding_asn1.BitString) bool {
472 472
 	var bytes String
473
-	if !s.ReadASN1(&bytes, asn1.BIT_STRING) || len(bytes) == 0 {
473
+	if !s.ReadASN1(&bytes, asn1.BIT_STRING) || len(bytes) == 0 ||
474
+		len(bytes)*8/8 != len(bytes) {
474 475
 		return false
475 476
 	}
476 477
 
... ...
@@ -740,7 +741,7 @@ func (s *String) readASN1(out *String, outTag *asn1.Tag, skipHeader bool) bool {
740 740
 		length = headerLen + len32
741 741
 	}
742 742
 
743
-	if uint32(int(length)) != length || !s.ReadBytes((*[]byte)(out), int(length)) {
743
+	if int(length) < 0 || !s.ReadBytes((*[]byte)(out), int(length)) {
744 744
 		return false
745 745
 	}
746 746
 	if skipHeader && !out.Skip(int(headerLen)) {
... ...
@@ -24,7 +24,7 @@ type String []byte
24 24
 // read advances a String by n bytes and returns them. If less than n bytes
25 25
 // remain, it returns nil.
26 26
 func (s *String) read(n int) []byte {
27
-	if len(*s) < n {
27
+	if len(*s) < n || n < 0 {
28 28
 		return nil
29 29
 	}
30 30
 	v := (*s)[:n]
... ...
@@ -105,11 +105,6 @@ func (s *String) readLengthPrefixed(lenLen int, outChild *String) bool {
105 105
 		length = length << 8
106 106
 		length = length | uint32(b)
107 107
 	}
108
-	if int(length) < 0 {
109
-		// This currently cannot overflow because we read uint24 at most, but check
110
-		// anyway in case that changes in the future.
111
-		return false
112
-	}
113 108
 	v := s.read(int(length))
114 109
 	if v == nil {
115 110
 		return false
116 111
deleted file mode 100644
... ...
@@ -1,8 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 6a from the public
6
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
7
-
8
-#define REDMASK51     0x0007FFFFFFFFFFFF
9 1
deleted file mode 100644
... ...
@@ -1,20 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 6a from the public
6
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
7
-
8
-// +build amd64,!gccgo,!appengine
9
-
10
-// These constants cannot be encoded in non-MOVQ immediates.
11
-// We access them directly from memory instead.
12
-
13
-DATA ·_121666_213(SB)/8, $996687872
14
-GLOBL ·_121666_213(SB), 8, $8
15
-
16
-DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
17
-GLOBL ·_2P0(SB), 8, $8
18
-
19
-DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
20
-GLOBL ·_2P1234(SB), 8, $8
21 1
deleted file mode 100644
... ...
@@ -1,65 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// +build amd64,!gccgo,!appengine
6
-
7
-// func cswap(inout *[4][5]uint64, v uint64)
8
-TEXT ·cswap(SB),7,$0
9
-	MOVQ inout+0(FP),DI
10
-	MOVQ v+8(FP),SI
11
-
12
-	SUBQ $1, SI
13
-	NOTQ SI
14
-	MOVQ SI, X15
15
-	PSHUFD $0x44, X15, X15
16
-
17
-	MOVOU 0(DI), X0
18
-	MOVOU 16(DI), X2
19
-	MOVOU 32(DI), X4
20
-	MOVOU 48(DI), X6
21
-	MOVOU 64(DI), X8
22
-	MOVOU 80(DI), X1
23
-	MOVOU 96(DI), X3
24
-	MOVOU 112(DI), X5
25
-	MOVOU 128(DI), X7
26
-	MOVOU 144(DI), X9
27
-
28
-	MOVO X1, X10
29
-	MOVO X3, X11
30
-	MOVO X5, X12
31
-	MOVO X7, X13
32
-	MOVO X9, X14
33
-
34
-	PXOR X0, X10
35
-	PXOR X2, X11
36
-	PXOR X4, X12
37
-	PXOR X6, X13
38
-	PXOR X8, X14
39
-	PAND X15, X10
40
-	PAND X15, X11
41
-	PAND X15, X12
42
-	PAND X15, X13
43
-	PAND X15, X14
44
-	PXOR X10, X0
45
-	PXOR X10, X1
46
-	PXOR X11, X2
47
-	PXOR X11, X3
48
-	PXOR X12, X4
49
-	PXOR X12, X5
50
-	PXOR X13, X6
51
-	PXOR X13, X7
52
-	PXOR X14, X8
53
-	PXOR X14, X9
54
-
55
-	MOVOU X0, 0(DI)
56
-	MOVOU X2, 16(DI)
57
-	MOVOU X4, 32(DI)
58
-	MOVOU X6, 48(DI)
59
-	MOVOU X8, 64(DI)
60
-	MOVOU X1, 80(DI)
61
-	MOVOU X3, 96(DI)
62
-	MOVOU X5, 112(DI)
63
-	MOVOU X7, 128(DI)
64
-	MOVOU X9, 144(DI)
65
-	RET
66 1
deleted file mode 100644
... ...
@@ -1,834 +0,0 @@
1
-// Copyright 2013 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// We have an implementation in amd64 assembly so this code is only run on
6
-// non-amd64 platforms. The amd64 assembly does not support gccgo.
7
-// +build !amd64 gccgo appengine
8
-
9
-package curve25519
10
-
11
-import (
12
-	"encoding/binary"
13
-)
14
-
15
-// This code is a port of the public domain, "ref10" implementation of
16
-// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
17
-
18
-// fieldElement represents an element of the field GF(2^255 - 19). An element
19
-// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
20
-// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
21
-// context.
22
-type fieldElement [10]int32
23
-
24
-func feZero(fe *fieldElement) {
25
-	for i := range fe {
26
-		fe[i] = 0
27
-	}
28
-}
29
-
30
-func feOne(fe *fieldElement) {
31
-	feZero(fe)
32
-	fe[0] = 1
33
-}
34
-
35
-func feAdd(dst, a, b *fieldElement) {
36
-	for i := range dst {
37
-		dst[i] = a[i] + b[i]
38
-	}
39
-}
40
-
41
-func feSub(dst, a, b *fieldElement) {
42
-	for i := range dst {
43
-		dst[i] = a[i] - b[i]
44
-	}
45
-}
46
-
47
-func feCopy(dst, src *fieldElement) {
48
-	for i := range dst {
49
-		dst[i] = src[i]
50
-	}
51
-}
52
-
53
-// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
54
-//
55
-// Preconditions: b in {0,1}.
56
-func feCSwap(f, g *fieldElement, b int32) {
57
-	b = -b
58
-	for i := range f {
59
-		t := b & (f[i] ^ g[i])
60
-		f[i] ^= t
61
-		g[i] ^= t
62
-	}
63
-}
64
-
65
-// load3 reads a 24-bit, little-endian value from in.
66
-func load3(in []byte) int64 {
67
-	var r int64
68
-	r = int64(in[0])
69
-	r |= int64(in[1]) << 8
70
-	r |= int64(in[2]) << 16
71
-	return r
72
-}
73
-
74
-// load4 reads a 32-bit, little-endian value from in.
75
-func load4(in []byte) int64 {
76
-	return int64(binary.LittleEndian.Uint32(in))
77
-}
78
-
79
-func feFromBytes(dst *fieldElement, src *[32]byte) {
80
-	h0 := load4(src[:])
81
-	h1 := load3(src[4:]) << 6
82
-	h2 := load3(src[7:]) << 5
83
-	h3 := load3(src[10:]) << 3
84
-	h4 := load3(src[13:]) << 2
85
-	h5 := load4(src[16:])
86
-	h6 := load3(src[20:]) << 7
87
-	h7 := load3(src[23:]) << 5
88
-	h8 := load3(src[26:]) << 4
89
-	h9 := (load3(src[29:]) & 0x7fffff) << 2
90
-
91
-	var carry [10]int64
92
-	carry[9] = (h9 + 1<<24) >> 25
93
-	h0 += carry[9] * 19
94
-	h9 -= carry[9] << 25
95
-	carry[1] = (h1 + 1<<24) >> 25
96
-	h2 += carry[1]
97
-	h1 -= carry[1] << 25
98
-	carry[3] = (h3 + 1<<24) >> 25
99
-	h4 += carry[3]
100
-	h3 -= carry[3] << 25
101
-	carry[5] = (h5 + 1<<24) >> 25
102
-	h6 += carry[5]
103
-	h5 -= carry[5] << 25
104
-	carry[7] = (h7 + 1<<24) >> 25
105
-	h8 += carry[7]
106
-	h7 -= carry[7] << 25
107
-
108
-	carry[0] = (h0 + 1<<25) >> 26
109
-	h1 += carry[0]
110
-	h0 -= carry[0] << 26
111
-	carry[2] = (h2 + 1<<25) >> 26
112
-	h3 += carry[2]
113
-	h2 -= carry[2] << 26
114
-	carry[4] = (h4 + 1<<25) >> 26
115
-	h5 += carry[4]
116
-	h4 -= carry[4] << 26
117
-	carry[6] = (h6 + 1<<25) >> 26
118
-	h7 += carry[6]
119
-	h6 -= carry[6] << 26
120
-	carry[8] = (h8 + 1<<25) >> 26
121
-	h9 += carry[8]
122
-	h8 -= carry[8] << 26
123
-
124
-	dst[0] = int32(h0)
125
-	dst[1] = int32(h1)
126
-	dst[2] = int32(h2)
127
-	dst[3] = int32(h3)
128
-	dst[4] = int32(h4)
129
-	dst[5] = int32(h5)
130
-	dst[6] = int32(h6)
131
-	dst[7] = int32(h7)
132
-	dst[8] = int32(h8)
133
-	dst[9] = int32(h9)
134
-}
135
-
136
-// feToBytes marshals h to s.
137
-// Preconditions:
138
-//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
139
-//
140
-// Write p=2^255-19; q=floor(h/p).
141
-// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
142
-//
143
-// Proof:
144
-//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
145
-//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
146
-//
147
-//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
148
-//   Then 0<y<1.
149
-//
150
-//   Write r=h-pq.
151
-//   Have 0<=r<=p-1=2^255-20.
152
-//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
153
-//
154
-//   Write x=r+19(2^-255)r+y.
155
-//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
156
-//
157
-//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
158
-//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
159
-func feToBytes(s *[32]byte, h *fieldElement) {
160
-	var carry [10]int32
161
-
162
-	q := (19*h[9] + (1 << 24)) >> 25
163
-	q = (h[0] + q) >> 26
164
-	q = (h[1] + q) >> 25
165
-	q = (h[2] + q) >> 26
166
-	q = (h[3] + q) >> 25
167
-	q = (h[4] + q) >> 26
168
-	q = (h[5] + q) >> 25
169
-	q = (h[6] + q) >> 26
170
-	q = (h[7] + q) >> 25
171
-	q = (h[8] + q) >> 26
172
-	q = (h[9] + q) >> 25
173
-
174
-	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
175
-	h[0] += 19 * q
176
-	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
177
-
178
-	carry[0] = h[0] >> 26
179
-	h[1] += carry[0]
180
-	h[0] -= carry[0] << 26
181
-	carry[1] = h[1] >> 25
182
-	h[2] += carry[1]
183
-	h[1] -= carry[1] << 25
184
-	carry[2] = h[2] >> 26
185
-	h[3] += carry[2]
186
-	h[2] -= carry[2] << 26
187
-	carry[3] = h[3] >> 25
188
-	h[4] += carry[3]
189
-	h[3] -= carry[3] << 25
190
-	carry[4] = h[4] >> 26
191
-	h[5] += carry[4]
192
-	h[4] -= carry[4] << 26
193
-	carry[5] = h[5] >> 25
194
-	h[6] += carry[5]
195
-	h[5] -= carry[5] << 25
196
-	carry[6] = h[6] >> 26
197
-	h[7] += carry[6]
198
-	h[6] -= carry[6] << 26
199
-	carry[7] = h[7] >> 25
200
-	h[8] += carry[7]
201
-	h[7] -= carry[7] << 25
202
-	carry[8] = h[8] >> 26
203
-	h[9] += carry[8]
204
-	h[8] -= carry[8] << 26
205
-	carry[9] = h[9] >> 25
206
-	h[9] -= carry[9] << 25
207
-	// h10 = carry9
208
-
209
-	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
210
-	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
211
-	// evidently 2^255 h10-2^255 q = 0.
212
-	// Goal: Output h[0]+...+2^230 h[9].
213
-
214
-	s[0] = byte(h[0] >> 0)
215
-	s[1] = byte(h[0] >> 8)
216
-	s[2] = byte(h[0] >> 16)
217
-	s[3] = byte((h[0] >> 24) | (h[1] << 2))
218
-	s[4] = byte(h[1] >> 6)
219
-	s[5] = byte(h[1] >> 14)
220
-	s[6] = byte((h[1] >> 22) | (h[2] << 3))
221
-	s[7] = byte(h[2] >> 5)
222
-	s[8] = byte(h[2] >> 13)
223
-	s[9] = byte((h[2] >> 21) | (h[3] << 5))
224
-	s[10] = byte(h[3] >> 3)
225
-	s[11] = byte(h[3] >> 11)
226
-	s[12] = byte((h[3] >> 19) | (h[4] << 6))
227
-	s[13] = byte(h[4] >> 2)
228
-	s[14] = byte(h[4] >> 10)
229
-	s[15] = byte(h[4] >> 18)
230
-	s[16] = byte(h[5] >> 0)
231
-	s[17] = byte(h[5] >> 8)
232
-	s[18] = byte(h[5] >> 16)
233
-	s[19] = byte((h[5] >> 24) | (h[6] << 1))
234
-	s[20] = byte(h[6] >> 7)
235
-	s[21] = byte(h[6] >> 15)
236
-	s[22] = byte((h[6] >> 23) | (h[7] << 3))
237
-	s[23] = byte(h[7] >> 5)
238
-	s[24] = byte(h[7] >> 13)
239
-	s[25] = byte((h[7] >> 21) | (h[8] << 4))
240
-	s[26] = byte(h[8] >> 4)
241
-	s[27] = byte(h[8] >> 12)
242
-	s[28] = byte((h[8] >> 20) | (h[9] << 6))
243
-	s[29] = byte(h[9] >> 2)
244
-	s[30] = byte(h[9] >> 10)
245
-	s[31] = byte(h[9] >> 18)
246
-}
247
-
248
-// feMul calculates h = f * g
249
-// Can overlap h with f or g.
250
-//
251
-// Preconditions:
252
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
253
-//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
254
-//
255
-// Postconditions:
256
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
257
-//
258
-// Notes on implementation strategy:
259
-//
260
-// Using schoolbook multiplication.
261
-// Karatsuba would save a little in some cost models.
262
-//
263
-// Most multiplications by 2 and 19 are 32-bit precomputations;
264
-// cheaper than 64-bit postcomputations.
265
-//
266
-// There is one remaining multiplication by 19 in the carry chain;
267
-// one *19 precomputation can be merged into this,
268
-// but the resulting data flow is considerably less clean.
269
-//
270
-// There are 12 carries below.
271
-// 10 of them are 2-way parallelizable and vectorizable.
272
-// Can get away with 11 carries, but then data flow is much deeper.
273
-//
274
-// With tighter constraints on inputs can squeeze carries into int32.
275
-func feMul(h, f, g *fieldElement) {
276
-	f0 := f[0]
277
-	f1 := f[1]
278
-	f2 := f[2]
279
-	f3 := f[3]
280
-	f4 := f[4]
281
-	f5 := f[5]
282
-	f6 := f[6]
283
-	f7 := f[7]
284
-	f8 := f[8]
285
-	f9 := f[9]
286
-	g0 := g[0]
287
-	g1 := g[1]
288
-	g2 := g[2]
289
-	g3 := g[3]
290
-	g4 := g[4]
291
-	g5 := g[5]
292
-	g6 := g[6]
293
-	g7 := g[7]
294
-	g8 := g[8]
295
-	g9 := g[9]
296
-	g1_19 := 19 * g1 // 1.4*2^29
297
-	g2_19 := 19 * g2 // 1.4*2^30; still ok
298
-	g3_19 := 19 * g3
299
-	g4_19 := 19 * g4
300
-	g5_19 := 19 * g5
301
-	g6_19 := 19 * g6
302
-	g7_19 := 19 * g7
303
-	g8_19 := 19 * g8
304
-	g9_19 := 19 * g9
305
-	f1_2 := 2 * f1
306
-	f3_2 := 2 * f3
307
-	f5_2 := 2 * f5
308
-	f7_2 := 2 * f7
309
-	f9_2 := 2 * f9
310
-	f0g0 := int64(f0) * int64(g0)
311
-	f0g1 := int64(f0) * int64(g1)
312
-	f0g2 := int64(f0) * int64(g2)
313
-	f0g3 := int64(f0) * int64(g3)
314
-	f0g4 := int64(f0) * int64(g4)
315
-	f0g5 := int64(f0) * int64(g5)
316
-	f0g6 := int64(f0) * int64(g6)
317
-	f0g7 := int64(f0) * int64(g7)
318
-	f0g8 := int64(f0) * int64(g8)
319
-	f0g9 := int64(f0) * int64(g9)
320
-	f1g0 := int64(f1) * int64(g0)
321
-	f1g1_2 := int64(f1_2) * int64(g1)
322
-	f1g2 := int64(f1) * int64(g2)
323
-	f1g3_2 := int64(f1_2) * int64(g3)
324
-	f1g4 := int64(f1) * int64(g4)
325
-	f1g5_2 := int64(f1_2) * int64(g5)
326
-	f1g6 := int64(f1) * int64(g6)
327
-	f1g7_2 := int64(f1_2) * int64(g7)
328
-	f1g8 := int64(f1) * int64(g8)
329
-	f1g9_38 := int64(f1_2) * int64(g9_19)
330
-	f2g0 := int64(f2) * int64(g0)
331
-	f2g1 := int64(f2) * int64(g1)
332
-	f2g2 := int64(f2) * int64(g2)
333
-	f2g3 := int64(f2) * int64(g3)
334
-	f2g4 := int64(f2) * int64(g4)
335
-	f2g5 := int64(f2) * int64(g5)
336
-	f2g6 := int64(f2) * int64(g6)
337
-	f2g7 := int64(f2) * int64(g7)
338
-	f2g8_19 := int64(f2) * int64(g8_19)
339
-	f2g9_19 := int64(f2) * int64(g9_19)
340
-	f3g0 := int64(f3) * int64(g0)
341
-	f3g1_2 := int64(f3_2) * int64(g1)
342
-	f3g2 := int64(f3) * int64(g2)
343
-	f3g3_2 := int64(f3_2) * int64(g3)
344
-	f3g4 := int64(f3) * int64(g4)
345
-	f3g5_2 := int64(f3_2) * int64(g5)
346
-	f3g6 := int64(f3) * int64(g6)
347
-	f3g7_38 := int64(f3_2) * int64(g7_19)
348
-	f3g8_19 := int64(f3) * int64(g8_19)
349
-	f3g9_38 := int64(f3_2) * int64(g9_19)
350
-	f4g0 := int64(f4) * int64(g0)
351
-	f4g1 := int64(f4) * int64(g1)
352
-	f4g2 := int64(f4) * int64(g2)
353
-	f4g3 := int64(f4) * int64(g3)
354
-	f4g4 := int64(f4) * int64(g4)
355
-	f4g5 := int64(f4) * int64(g5)
356
-	f4g6_19 := int64(f4) * int64(g6_19)
357
-	f4g7_19 := int64(f4) * int64(g7_19)
358
-	f4g8_19 := int64(f4) * int64(g8_19)
359
-	f4g9_19 := int64(f4) * int64(g9_19)
360
-	f5g0 := int64(f5) * int64(g0)
361
-	f5g1_2 := int64(f5_2) * int64(g1)
362
-	f5g2 := int64(f5) * int64(g2)
363
-	f5g3_2 := int64(f5_2) * int64(g3)
364
-	f5g4 := int64(f5) * int64(g4)
365
-	f5g5_38 := int64(f5_2) * int64(g5_19)
366
-	f5g6_19 := int64(f5) * int64(g6_19)
367
-	f5g7_38 := int64(f5_2) * int64(g7_19)
368
-	f5g8_19 := int64(f5) * int64(g8_19)
369
-	f5g9_38 := int64(f5_2) * int64(g9_19)
370
-	f6g0 := int64(f6) * int64(g0)
371
-	f6g1 := int64(f6) * int64(g1)
372
-	f6g2 := int64(f6) * int64(g2)
373
-	f6g3 := int64(f6) * int64(g3)
374
-	f6g4_19 := int64(f6) * int64(g4_19)
375
-	f6g5_19 := int64(f6) * int64(g5_19)
376
-	f6g6_19 := int64(f6) * int64(g6_19)
377
-	f6g7_19 := int64(f6) * int64(g7_19)
378
-	f6g8_19 := int64(f6) * int64(g8_19)
379
-	f6g9_19 := int64(f6) * int64(g9_19)
380
-	f7g0 := int64(f7) * int64(g0)
381
-	f7g1_2 := int64(f7_2) * int64(g1)
382
-	f7g2 := int64(f7) * int64(g2)
383
-	f7g3_38 := int64(f7_2) * int64(g3_19)
384
-	f7g4_19 := int64(f7) * int64(g4_19)
385
-	f7g5_38 := int64(f7_2) * int64(g5_19)
386
-	f7g6_19 := int64(f7) * int64(g6_19)
387
-	f7g7_38 := int64(f7_2) * int64(g7_19)
388
-	f7g8_19 := int64(f7) * int64(g8_19)
389
-	f7g9_38 := int64(f7_2) * int64(g9_19)
390
-	f8g0 := int64(f8) * int64(g0)
391
-	f8g1 := int64(f8) * int64(g1)
392
-	f8g2_19 := int64(f8) * int64(g2_19)
393
-	f8g3_19 := int64(f8) * int64(g3_19)
394
-	f8g4_19 := int64(f8) * int64(g4_19)
395
-	f8g5_19 := int64(f8) * int64(g5_19)
396
-	f8g6_19 := int64(f8) * int64(g6_19)
397
-	f8g7_19 := int64(f8) * int64(g7_19)
398
-	f8g8_19 := int64(f8) * int64(g8_19)
399
-	f8g9_19 := int64(f8) * int64(g9_19)
400
-	f9g0 := int64(f9) * int64(g0)
401
-	f9g1_38 := int64(f9_2) * int64(g1_19)
402
-	f9g2_19 := int64(f9) * int64(g2_19)
403
-	f9g3_38 := int64(f9_2) * int64(g3_19)
404
-	f9g4_19 := int64(f9) * int64(g4_19)
405
-	f9g5_38 := int64(f9_2) * int64(g5_19)
406
-	f9g6_19 := int64(f9) * int64(g6_19)
407
-	f9g7_38 := int64(f9_2) * int64(g7_19)
408
-	f9g8_19 := int64(f9) * int64(g8_19)
409
-	f9g9_38 := int64(f9_2) * int64(g9_19)
410
-	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
411
-	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
412
-	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
413
-	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
414
-	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
415
-	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
416
-	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
417
-	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
418
-	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
419
-	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
420
-	var carry [10]int64
421
-
422
-	// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
423
-	//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
424
-	// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
425
-	//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
426
-
427
-	carry[0] = (h0 + (1 << 25)) >> 26
428
-	h1 += carry[0]
429
-	h0 -= carry[0] << 26
430
-	carry[4] = (h4 + (1 << 25)) >> 26
431
-	h5 += carry[4]
432
-	h4 -= carry[4] << 26
433
-	// |h0| <= 2^25
434
-	// |h4| <= 2^25
435
-	// |h1| <= 1.51*2^58
436
-	// |h5| <= 1.51*2^58
437
-
438
-	carry[1] = (h1 + (1 << 24)) >> 25
439
-	h2 += carry[1]
440
-	h1 -= carry[1] << 25
441
-	carry[5] = (h5 + (1 << 24)) >> 25
442
-	h6 += carry[5]
443
-	h5 -= carry[5] << 25
444
-	// |h1| <= 2^24; from now on fits into int32
445
-	// |h5| <= 2^24; from now on fits into int32
446
-	// |h2| <= 1.21*2^59
447
-	// |h6| <= 1.21*2^59
448
-
449
-	carry[2] = (h2 + (1 << 25)) >> 26
450
-	h3 += carry[2]
451
-	h2 -= carry[2] << 26
452
-	carry[6] = (h6 + (1 << 25)) >> 26
453
-	h7 += carry[6]
454
-	h6 -= carry[6] << 26
455
-	// |h2| <= 2^25; from now on fits into int32 unchanged
456
-	// |h6| <= 2^25; from now on fits into int32 unchanged
457
-	// |h3| <= 1.51*2^58
458
-	// |h7| <= 1.51*2^58
459
-
460
-	carry[3] = (h3 + (1 << 24)) >> 25
461
-	h4 += carry[3]
462
-	h3 -= carry[3] << 25
463
-	carry[7] = (h7 + (1 << 24)) >> 25
464
-	h8 += carry[7]
465
-	h7 -= carry[7] << 25
466
-	// |h3| <= 2^24; from now on fits into int32 unchanged
467
-	// |h7| <= 2^24; from now on fits into int32 unchanged
468
-	// |h4| <= 1.52*2^33
469
-	// |h8| <= 1.52*2^33
470
-
471
-	carry[4] = (h4 + (1 << 25)) >> 26
472
-	h5 += carry[4]
473
-	h4 -= carry[4] << 26
474
-	carry[8] = (h8 + (1 << 25)) >> 26
475
-	h9 += carry[8]
476
-	h8 -= carry[8] << 26
477
-	// |h4| <= 2^25; from now on fits into int32 unchanged
478
-	// |h8| <= 2^25; from now on fits into int32 unchanged
479
-	// |h5| <= 1.01*2^24
480
-	// |h9| <= 1.51*2^58
481
-
482
-	carry[9] = (h9 + (1 << 24)) >> 25
483
-	h0 += carry[9] * 19
484
-	h9 -= carry[9] << 25
485
-	// |h9| <= 2^24; from now on fits into int32 unchanged
486
-	// |h0| <= 1.8*2^37
487
-
488
-	carry[0] = (h0 + (1 << 25)) >> 26
489
-	h1 += carry[0]
490
-	h0 -= carry[0] << 26
491
-	// |h0| <= 2^25; from now on fits into int32 unchanged
492
-	// |h1| <= 1.01*2^24
493
-
494
-	h[0] = int32(h0)
495
-	h[1] = int32(h1)
496
-	h[2] = int32(h2)
497
-	h[3] = int32(h3)
498
-	h[4] = int32(h4)
499
-	h[5] = int32(h5)
500
-	h[6] = int32(h6)
501
-	h[7] = int32(h7)
502
-	h[8] = int32(h8)
503
-	h[9] = int32(h9)
504
-}
505
-
506
-// feSquare calculates h = f*f. Can overlap h with f.
507
-//
508
-// Preconditions:
509
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
510
-//
511
-// Postconditions:
512
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
513
-func feSquare(h, f *fieldElement) {
514
-	f0 := f[0]
515
-	f1 := f[1]
516
-	f2 := f[2]
517
-	f3 := f[3]
518
-	f4 := f[4]
519
-	f5 := f[5]
520
-	f6 := f[6]
521
-	f7 := f[7]
522
-	f8 := f[8]
523
-	f9 := f[9]
524
-	f0_2 := 2 * f0
525
-	f1_2 := 2 * f1
526
-	f2_2 := 2 * f2
527
-	f3_2 := 2 * f3
528
-	f4_2 := 2 * f4
529
-	f5_2 := 2 * f5
530
-	f6_2 := 2 * f6
531
-	f7_2 := 2 * f7
532
-	f5_38 := 38 * f5 // 1.31*2^30
533
-	f6_19 := 19 * f6 // 1.31*2^30
534
-	f7_38 := 38 * f7 // 1.31*2^30
535
-	f8_19 := 19 * f8 // 1.31*2^30
536
-	f9_38 := 38 * f9 // 1.31*2^30
537
-	f0f0 := int64(f0) * int64(f0)
538
-	f0f1_2 := int64(f0_2) * int64(f1)
539
-	f0f2_2 := int64(f0_2) * int64(f2)
540
-	f0f3_2 := int64(f0_2) * int64(f3)
541
-	f0f4_2 := int64(f0_2) * int64(f4)
542
-	f0f5_2 := int64(f0_2) * int64(f5)
543
-	f0f6_2 := int64(f0_2) * int64(f6)
544
-	f0f7_2 := int64(f0_2) * int64(f7)
545
-	f0f8_2 := int64(f0_2) * int64(f8)
546
-	f0f9_2 := int64(f0_2) * int64(f9)
547
-	f1f1_2 := int64(f1_2) * int64(f1)
548
-	f1f2_2 := int64(f1_2) * int64(f2)
549
-	f1f3_4 := int64(f1_2) * int64(f3_2)
550
-	f1f4_2 := int64(f1_2) * int64(f4)
551
-	f1f5_4 := int64(f1_2) * int64(f5_2)
552
-	f1f6_2 := int64(f1_2) * int64(f6)
553
-	f1f7_4 := int64(f1_2) * int64(f7_2)
554
-	f1f8_2 := int64(f1_2) * int64(f8)
555
-	f1f9_76 := int64(f1_2) * int64(f9_38)
556
-	f2f2 := int64(f2) * int64(f2)
557
-	f2f3_2 := int64(f2_2) * int64(f3)
558
-	f2f4_2 := int64(f2_2) * int64(f4)
559
-	f2f5_2 := int64(f2_2) * int64(f5)
560
-	f2f6_2 := int64(f2_2) * int64(f6)
561
-	f2f7_2 := int64(f2_2) * int64(f7)
562
-	f2f8_38 := int64(f2_2) * int64(f8_19)
563
-	f2f9_38 := int64(f2) * int64(f9_38)
564
-	f3f3_2 := int64(f3_2) * int64(f3)
565
-	f3f4_2 := int64(f3_2) * int64(f4)
566
-	f3f5_4 := int64(f3_2) * int64(f5_2)
567
-	f3f6_2 := int64(f3_2) * int64(f6)
568
-	f3f7_76 := int64(f3_2) * int64(f7_38)
569
-	f3f8_38 := int64(f3_2) * int64(f8_19)
570
-	f3f9_76 := int64(f3_2) * int64(f9_38)
571
-	f4f4 := int64(f4) * int64(f4)
572
-	f4f5_2 := int64(f4_2) * int64(f5)
573
-	f4f6_38 := int64(f4_2) * int64(f6_19)
574
-	f4f7_38 := int64(f4) * int64(f7_38)
575
-	f4f8_38 := int64(f4_2) * int64(f8_19)
576
-	f4f9_38 := int64(f4) * int64(f9_38)
577
-	f5f5_38 := int64(f5) * int64(f5_38)
578
-	f5f6_38 := int64(f5_2) * int64(f6_19)
579
-	f5f7_76 := int64(f5_2) * int64(f7_38)
580
-	f5f8_38 := int64(f5_2) * int64(f8_19)
581
-	f5f9_76 := int64(f5_2) * int64(f9_38)
582
-	f6f6_19 := int64(f6) * int64(f6_19)
583
-	f6f7_38 := int64(f6) * int64(f7_38)
584
-	f6f8_38 := int64(f6_2) * int64(f8_19)
585
-	f6f9_38 := int64(f6) * int64(f9_38)
586
-	f7f7_38 := int64(f7) * int64(f7_38)
587
-	f7f8_38 := int64(f7_2) * int64(f8_19)
588
-	f7f9_76 := int64(f7_2) * int64(f9_38)
589
-	f8f8_19 := int64(f8) * int64(f8_19)
590
-	f8f9_38 := int64(f8) * int64(f9_38)
591
-	f9f9_38 := int64(f9) * int64(f9_38)
592
-	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
593
-	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
594
-	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
595
-	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
596
-	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
597
-	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
598
-	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
599
-	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
600
-	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
601
-	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
602
-	var carry [10]int64
603
-
604
-	carry[0] = (h0 + (1 << 25)) >> 26
605
-	h1 += carry[0]
606
-	h0 -= carry[0] << 26
607
-	carry[4] = (h4 + (1 << 25)) >> 26
608
-	h5 += carry[4]
609
-	h4 -= carry[4] << 26
610
-
611
-	carry[1] = (h1 + (1 << 24)) >> 25
612
-	h2 += carry[1]
613
-	h1 -= carry[1] << 25
614
-	carry[5] = (h5 + (1 << 24)) >> 25
615
-	h6 += carry[5]
616
-	h5 -= carry[5] << 25
617
-
618
-	carry[2] = (h2 + (1 << 25)) >> 26
619
-	h3 += carry[2]
620
-	h2 -= carry[2] << 26
621
-	carry[6] = (h6 + (1 << 25)) >> 26
622
-	h7 += carry[6]
623
-	h6 -= carry[6] << 26
624
-
625
-	carry[3] = (h3 + (1 << 24)) >> 25
626
-	h4 += carry[3]
627
-	h3 -= carry[3] << 25
628
-	carry[7] = (h7 + (1 << 24)) >> 25
629
-	h8 += carry[7]
630
-	h7 -= carry[7] << 25
631
-
632
-	carry[4] = (h4 + (1 << 25)) >> 26
633
-	h5 += carry[4]
634
-	h4 -= carry[4] << 26
635
-	carry[8] = (h8 + (1 << 25)) >> 26
636
-	h9 += carry[8]
637
-	h8 -= carry[8] << 26
638
-
639
-	carry[9] = (h9 + (1 << 24)) >> 25
640
-	h0 += carry[9] * 19
641
-	h9 -= carry[9] << 25
642
-
643
-	carry[0] = (h0 + (1 << 25)) >> 26
644
-	h1 += carry[0]
645
-	h0 -= carry[0] << 26
646
-
647
-	h[0] = int32(h0)
648
-	h[1] = int32(h1)
649
-	h[2] = int32(h2)
650
-	h[3] = int32(h3)
651
-	h[4] = int32(h4)
652
-	h[5] = int32(h5)
653
-	h[6] = int32(h6)
654
-	h[7] = int32(h7)
655
-	h[8] = int32(h8)
656
-	h[9] = int32(h9)
657
-}
658
-
659
-// feMul121666 calculates h = f * 121666. Can overlap h with f.
660
-//
661
-// Preconditions:
662
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
663
-//
664
-// Postconditions:
665
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
666
-func feMul121666(h, f *fieldElement) {
667
-	h0 := int64(f[0]) * 121666
668
-	h1 := int64(f[1]) * 121666
669
-	h2 := int64(f[2]) * 121666
670
-	h3 := int64(f[3]) * 121666
671
-	h4 := int64(f[4]) * 121666
672
-	h5 := int64(f[5]) * 121666
673
-	h6 := int64(f[6]) * 121666
674
-	h7 := int64(f[7]) * 121666
675
-	h8 := int64(f[8]) * 121666
676
-	h9 := int64(f[9]) * 121666
677
-	var carry [10]int64
678
-
679
-	carry[9] = (h9 + (1 << 24)) >> 25
680
-	h0 += carry[9] * 19
681
-	h9 -= carry[9] << 25
682
-	carry[1] = (h1 + (1 << 24)) >> 25
683
-	h2 += carry[1]
684
-	h1 -= carry[1] << 25
685
-	carry[3] = (h3 + (1 << 24)) >> 25
686
-	h4 += carry[3]
687
-	h3 -= carry[3] << 25
688
-	carry[5] = (h5 + (1 << 24)) >> 25
689
-	h6 += carry[5]
690
-	h5 -= carry[5] << 25
691
-	carry[7] = (h7 + (1 << 24)) >> 25
692
-	h8 += carry[7]
693
-	h7 -= carry[7] << 25
694
-
695
-	carry[0] = (h0 + (1 << 25)) >> 26
696
-	h1 += carry[0]
697
-	h0 -= carry[0] << 26
698
-	carry[2] = (h2 + (1 << 25)) >> 26
699
-	h3 += carry[2]
700
-	h2 -= carry[2] << 26
701
-	carry[4] = (h4 + (1 << 25)) >> 26
702
-	h5 += carry[4]
703
-	h4 -= carry[4] << 26
704
-	carry[6] = (h6 + (1 << 25)) >> 26
705
-	h7 += carry[6]
706
-	h6 -= carry[6] << 26
707
-	carry[8] = (h8 + (1 << 25)) >> 26
708
-	h9 += carry[8]
709
-	h8 -= carry[8] << 26
710
-
711
-	h[0] = int32(h0)
712
-	h[1] = int32(h1)
713
-	h[2] = int32(h2)
714
-	h[3] = int32(h3)
715
-	h[4] = int32(h4)
716
-	h[5] = int32(h5)
717
-	h[6] = int32(h6)
718
-	h[7] = int32(h7)
719
-	h[8] = int32(h8)
720
-	h[9] = int32(h9)
721
-}
722
-
723
-// feInvert sets out = z^-1.
724
-func feInvert(out, z *fieldElement) {
725
-	var t0, t1, t2, t3 fieldElement
726
-	var i int
727
-
728
-	feSquare(&t0, z)
729
-	for i = 1; i < 1; i++ {
730
-		feSquare(&t0, &t0)
731
-	}
732
-	feSquare(&t1, &t0)
733
-	for i = 1; i < 2; i++ {
734
-		feSquare(&t1, &t1)
735
-	}
736
-	feMul(&t1, z, &t1)
737
-	feMul(&t0, &t0, &t1)
738
-	feSquare(&t2, &t0)
739
-	for i = 1; i < 1; i++ {
740
-		feSquare(&t2, &t2)
741
-	}
742
-	feMul(&t1, &t1, &t2)
743
-	feSquare(&t2, &t1)
744
-	for i = 1; i < 5; i++ {
745
-		feSquare(&t2, &t2)
746
-	}
747
-	feMul(&t1, &t2, &t1)
748
-	feSquare(&t2, &t1)
749
-	for i = 1; i < 10; i++ {
750
-		feSquare(&t2, &t2)
751
-	}
752
-	feMul(&t2, &t2, &t1)
753
-	feSquare(&t3, &t2)
754
-	for i = 1; i < 20; i++ {
755
-		feSquare(&t3, &t3)
756
-	}
757
-	feMul(&t2, &t3, &t2)
758
-	feSquare(&t2, &t2)
759
-	for i = 1; i < 10; i++ {
760
-		feSquare(&t2, &t2)
761
-	}
762
-	feMul(&t1, &t2, &t1)
763
-	feSquare(&t2, &t1)
764
-	for i = 1; i < 50; i++ {
765
-		feSquare(&t2, &t2)
766
-	}
767
-	feMul(&t2, &t2, &t1)
768
-	feSquare(&t3, &t2)
769
-	for i = 1; i < 100; i++ {
770
-		feSquare(&t3, &t3)
771
-	}
772
-	feMul(&t2, &t3, &t2)
773
-	feSquare(&t2, &t2)
774
-	for i = 1; i < 50; i++ {
775
-		feSquare(&t2, &t2)
776
-	}
777
-	feMul(&t1, &t2, &t1)
778
-	feSquare(&t1, &t1)
779
-	for i = 1; i < 5; i++ {
780
-		feSquare(&t1, &t1)
781
-	}
782
-	feMul(out, &t1, &t0)
783
-}
784
-
785
-func scalarMult(out, in, base *[32]byte) {
786
-	var e [32]byte
787
-
788
-	copy(e[:], in[:])
789
-	e[0] &= 248
790
-	e[31] &= 127
791
-	e[31] |= 64
792
-
793
-	var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
794
-	feFromBytes(&x1, base)
795
-	feOne(&x2)
796
-	feCopy(&x3, &x1)
797
-	feOne(&z3)
798
-
799
-	swap := int32(0)
800
-	for pos := 254; pos >= 0; pos-- {
801
-		b := e[pos/8] >> uint(pos&7)
802
-		b &= 1
803
-		swap ^= int32(b)
804
-		feCSwap(&x2, &x3, swap)
805
-		feCSwap(&z2, &z3, swap)
806
-		swap = int32(b)
807
-
808
-		feSub(&tmp0, &x3, &z3)
809
-		feSub(&tmp1, &x2, &z2)
810
-		feAdd(&x2, &x2, &z2)
811
-		feAdd(&z2, &x3, &z3)
812
-		feMul(&z3, &tmp0, &x2)
813
-		feMul(&z2, &z2, &tmp1)
814
-		feSquare(&tmp0, &tmp1)
815
-		feSquare(&tmp1, &x2)
816
-		feAdd(&x3, &z3, &z2)
817
-		feSub(&z2, &z3, &z2)
818
-		feMul(&x2, &tmp1, &tmp0)
819
-		feSub(&tmp1, &tmp1, &tmp0)
820
-		feSquare(&z2, &z2)
821
-		feMul121666(&z3, &tmp1)
822
-		feSquare(&x3, &x3)
823
-		feAdd(&tmp0, &tmp0, &z3)
824
-		feMul(&z3, &x1, &z2)
825
-		feMul(&z2, &tmp1, &tmp0)
826
-	}
827
-
828
-	feCSwap(&x2, &x3, swap)
829
-	feCSwap(&z2, &z3, swap)
830
-
831
-	feInvert(&z2, &z2)
832
-	feMul(&x2, &x2, &z2)
833
-	feToBytes(out, &x2)
834
-}
835 1
deleted file mode 100644
... ...
@@ -1,23 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// Package curve25519 provides an implementation of scalar multiplication on
6
-// the elliptic curve known as curve25519. See https://cr.yp.to/ecdh.html
7
-package curve25519 // import "golang.org/x/crypto/curve25519"
8
-
9
-// basePoint is the x coordinate of the generator of the curve.
10
-var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
11
-
12
-// ScalarMult sets dst to the product in*base where dst and base are the x
13
-// coordinates of group points and all values are in little-endian form.
14
-func ScalarMult(dst, in, base *[32]byte) {
15
-	scalarMult(dst, in, base)
16
-}
17
-
18
-// ScalarBaseMult sets dst to the product in*base where dst and base are the x
19
-// coordinates of group points, base is the standard generator and all values
20
-// are in little-endian form.
21
-func ScalarBaseMult(dst, in *[32]byte) {
22
-	ScalarMult(dst, in, &basePoint)
23
-}
24 1
deleted file mode 100644
... ...
@@ -1,73 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 6a from the public
6
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
7
-
8
-// +build amd64,!gccgo,!appengine
9
-
10
-#include "const_amd64.h"
11
-
12
-// func freeze(inout *[5]uint64)
13
-TEXT ·freeze(SB),7,$0-8
14
-	MOVQ inout+0(FP), DI
15
-
16
-	MOVQ 0(DI),SI
17
-	MOVQ 8(DI),DX
18
-	MOVQ 16(DI),CX
19
-	MOVQ 24(DI),R8
20
-	MOVQ 32(DI),R9
21
-	MOVQ $REDMASK51,AX
22
-	MOVQ AX,R10
23
-	SUBQ $18,R10
24
-	MOVQ $3,R11
25
-REDUCELOOP:
26
-	MOVQ SI,R12
27
-	SHRQ $51,R12
28
-	ANDQ AX,SI
29
-	ADDQ R12,DX
30
-	MOVQ DX,R12
31
-	SHRQ $51,R12
32
-	ANDQ AX,DX
33
-	ADDQ R12,CX
34
-	MOVQ CX,R12
35
-	SHRQ $51,R12
36
-	ANDQ AX,CX
37
-	ADDQ R12,R8
38
-	MOVQ R8,R12
39
-	SHRQ $51,R12
40
-	ANDQ AX,R8
41
-	ADDQ R12,R9
42
-	MOVQ R9,R12
43
-	SHRQ $51,R12
44
-	ANDQ AX,R9
45
-	IMUL3Q $19,R12,R12
46
-	ADDQ R12,SI
47
-	SUBQ $1,R11
48
-	JA REDUCELOOP
49
-	MOVQ $1,R12
50
-	CMPQ R10,SI
51
-	CMOVQLT R11,R12
52
-	CMPQ AX,DX
53
-	CMOVQNE R11,R12
54
-	CMPQ AX,CX
55
-	CMOVQNE R11,R12
56
-	CMPQ AX,R8
57
-	CMOVQNE R11,R12
58
-	CMPQ AX,R9
59
-	CMOVQNE R11,R12
60
-	NEGQ R12
61
-	ANDQ R12,AX
62
-	ANDQ R12,R10
63
-	SUBQ R10,SI
64
-	SUBQ AX,DX
65
-	SUBQ AX,CX
66
-	SUBQ AX,R8
67
-	SUBQ AX,R9
68
-	MOVQ SI,0(DI)
69
-	MOVQ DX,8(DI)
70
-	MOVQ CX,16(DI)
71
-	MOVQ R8,24(DI)
72
-	MOVQ R9,32(DI)
73
-	RET
74 1
deleted file mode 100644
... ...
@@ -1,1377 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 6a from the public
6
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
7
-
8
-// +build amd64,!gccgo,!appengine
9
-
10
-#include "const_amd64.h"
11
-
12
-// func ladderstep(inout *[5][5]uint64)
13
-TEXT ·ladderstep(SB),0,$296-8
14
-	MOVQ inout+0(FP),DI
15
-
16
-	MOVQ 40(DI),SI
17
-	MOVQ 48(DI),DX
18
-	MOVQ 56(DI),CX
19
-	MOVQ 64(DI),R8
20
-	MOVQ 72(DI),R9
21
-	MOVQ SI,AX
22
-	MOVQ DX,R10
23
-	MOVQ CX,R11
24
-	MOVQ R8,R12
25
-	MOVQ R9,R13
26
-	ADDQ ·_2P0(SB),AX
27
-	ADDQ ·_2P1234(SB),R10
28
-	ADDQ ·_2P1234(SB),R11
29
-	ADDQ ·_2P1234(SB),R12
30
-	ADDQ ·_2P1234(SB),R13
31
-	ADDQ 80(DI),SI
32
-	ADDQ 88(DI),DX
33
-	ADDQ 96(DI),CX
34
-	ADDQ 104(DI),R8
35
-	ADDQ 112(DI),R9
36
-	SUBQ 80(DI),AX
37
-	SUBQ 88(DI),R10
38
-	SUBQ 96(DI),R11
39
-	SUBQ 104(DI),R12
40
-	SUBQ 112(DI),R13
41
-	MOVQ SI,0(SP)
42
-	MOVQ DX,8(SP)
43
-	MOVQ CX,16(SP)
44
-	MOVQ R8,24(SP)
45
-	MOVQ R9,32(SP)
46
-	MOVQ AX,40(SP)
47
-	MOVQ R10,48(SP)
48
-	MOVQ R11,56(SP)
49
-	MOVQ R12,64(SP)
50
-	MOVQ R13,72(SP)
51
-	MOVQ 40(SP),AX
52
-	MULQ 40(SP)
53
-	MOVQ AX,SI
54
-	MOVQ DX,CX
55
-	MOVQ 40(SP),AX
56
-	SHLQ $1,AX
57
-	MULQ 48(SP)
58
-	MOVQ AX,R8
59
-	MOVQ DX,R9
60
-	MOVQ 40(SP),AX
61
-	SHLQ $1,AX
62
-	MULQ 56(SP)
63
-	MOVQ AX,R10
64
-	MOVQ DX,R11
65
-	MOVQ 40(SP),AX
66
-	SHLQ $1,AX
67
-	MULQ 64(SP)
68
-	MOVQ AX,R12
69
-	MOVQ DX,R13
70
-	MOVQ 40(SP),AX
71
-	SHLQ $1,AX
72
-	MULQ 72(SP)
73
-	MOVQ AX,R14
74
-	MOVQ DX,R15
75
-	MOVQ 48(SP),AX
76
-	MULQ 48(SP)
77
-	ADDQ AX,R10
78
-	ADCQ DX,R11
79
-	MOVQ 48(SP),AX
80
-	SHLQ $1,AX
81
-	MULQ 56(SP)
82
-	ADDQ AX,R12
83
-	ADCQ DX,R13
84
-	MOVQ 48(SP),AX
85
-	SHLQ $1,AX
86
-	MULQ 64(SP)
87
-	ADDQ AX,R14
88
-	ADCQ DX,R15
89
-	MOVQ 48(SP),DX
90
-	IMUL3Q $38,DX,AX
91
-	MULQ 72(SP)
92
-	ADDQ AX,SI
93
-	ADCQ DX,CX
94
-	MOVQ 56(SP),AX
95
-	MULQ 56(SP)
96
-	ADDQ AX,R14
97
-	ADCQ DX,R15
98
-	MOVQ 56(SP),DX
99
-	IMUL3Q $38,DX,AX
100
-	MULQ 64(SP)
101
-	ADDQ AX,SI
102
-	ADCQ DX,CX
103
-	MOVQ 56(SP),DX
104
-	IMUL3Q $38,DX,AX
105
-	MULQ 72(SP)
106
-	ADDQ AX,R8
107
-	ADCQ DX,R9
108
-	MOVQ 64(SP),DX
109
-	IMUL3Q $19,DX,AX
110
-	MULQ 64(SP)
111
-	ADDQ AX,R8
112
-	ADCQ DX,R9
113
-	MOVQ 64(SP),DX
114
-	IMUL3Q $38,DX,AX
115
-	MULQ 72(SP)
116
-	ADDQ AX,R10
117
-	ADCQ DX,R11
118
-	MOVQ 72(SP),DX
119
-	IMUL3Q $19,DX,AX
120
-	MULQ 72(SP)
121
-	ADDQ AX,R12
122
-	ADCQ DX,R13
123
-	MOVQ $REDMASK51,DX
124
-	SHLQ $13,CX:SI
125
-	ANDQ DX,SI
126
-	SHLQ $13,R9:R8
127
-	ANDQ DX,R8
128
-	ADDQ CX,R8
129
-	SHLQ $13,R11:R10
130
-	ANDQ DX,R10
131
-	ADDQ R9,R10
132
-	SHLQ $13,R13:R12
133
-	ANDQ DX,R12
134
-	ADDQ R11,R12
135
-	SHLQ $13,R15:R14
136
-	ANDQ DX,R14
137
-	ADDQ R13,R14
138
-	IMUL3Q $19,R15,CX
139
-	ADDQ CX,SI
140
-	MOVQ SI,CX
141
-	SHRQ $51,CX
142
-	ADDQ R8,CX
143
-	ANDQ DX,SI
144
-	MOVQ CX,R8
145
-	SHRQ $51,CX
146
-	ADDQ R10,CX
147
-	ANDQ DX,R8
148
-	MOVQ CX,R9
149
-	SHRQ $51,CX
150
-	ADDQ R12,CX
151
-	ANDQ DX,R9
152
-	MOVQ CX,AX
153
-	SHRQ $51,CX
154
-	ADDQ R14,CX
155
-	ANDQ DX,AX
156
-	MOVQ CX,R10
157
-	SHRQ $51,CX
158
-	IMUL3Q $19,CX,CX
159
-	ADDQ CX,SI
160
-	ANDQ DX,R10
161
-	MOVQ SI,80(SP)
162
-	MOVQ R8,88(SP)
163
-	MOVQ R9,96(SP)
164
-	MOVQ AX,104(SP)
165
-	MOVQ R10,112(SP)
166
-	MOVQ 0(SP),AX
167
-	MULQ 0(SP)
168
-	MOVQ AX,SI
169
-	MOVQ DX,CX
170
-	MOVQ 0(SP),AX
171
-	SHLQ $1,AX
172
-	MULQ 8(SP)
173
-	MOVQ AX,R8
174
-	MOVQ DX,R9
175
-	MOVQ 0(SP),AX
176
-	SHLQ $1,AX
177
-	MULQ 16(SP)
178
-	MOVQ AX,R10
179
-	MOVQ DX,R11
180
-	MOVQ 0(SP),AX
181
-	SHLQ $1,AX
182
-	MULQ 24(SP)
183
-	MOVQ AX,R12
184
-	MOVQ DX,R13
185
-	MOVQ 0(SP),AX
186
-	SHLQ $1,AX
187
-	MULQ 32(SP)
188
-	MOVQ AX,R14
189
-	MOVQ DX,R15
190
-	MOVQ 8(SP),AX
191
-	MULQ 8(SP)
192
-	ADDQ AX,R10
193
-	ADCQ DX,R11
194
-	MOVQ 8(SP),AX
195
-	SHLQ $1,AX
196
-	MULQ 16(SP)
197
-	ADDQ AX,R12
198
-	ADCQ DX,R13
199
-	MOVQ 8(SP),AX
200
-	SHLQ $1,AX
201
-	MULQ 24(SP)
202
-	ADDQ AX,R14
203
-	ADCQ DX,R15
204
-	MOVQ 8(SP),DX
205
-	IMUL3Q $38,DX,AX
206
-	MULQ 32(SP)
207
-	ADDQ AX,SI
208
-	ADCQ DX,CX
209
-	MOVQ 16(SP),AX
210
-	MULQ 16(SP)
211
-	ADDQ AX,R14
212
-	ADCQ DX,R15
213
-	MOVQ 16(SP),DX
214
-	IMUL3Q $38,DX,AX
215
-	MULQ 24(SP)
216
-	ADDQ AX,SI
217
-	ADCQ DX,CX
218
-	MOVQ 16(SP),DX
219
-	IMUL3Q $38,DX,AX
220
-	MULQ 32(SP)
221
-	ADDQ AX,R8
222
-	ADCQ DX,R9
223
-	MOVQ 24(SP),DX
224
-	IMUL3Q $19,DX,AX
225
-	MULQ 24(SP)
226
-	ADDQ AX,R8
227
-	ADCQ DX,R9
228
-	MOVQ 24(SP),DX
229
-	IMUL3Q $38,DX,AX
230
-	MULQ 32(SP)
231
-	ADDQ AX,R10
232
-	ADCQ DX,R11
233
-	MOVQ 32(SP),DX
234
-	IMUL3Q $19,DX,AX
235
-	MULQ 32(SP)
236
-	ADDQ AX,R12
237
-	ADCQ DX,R13
238
-	MOVQ $REDMASK51,DX
239
-	SHLQ $13,CX:SI
240
-	ANDQ DX,SI
241
-	SHLQ $13,R9:R8
242
-	ANDQ DX,R8
243
-	ADDQ CX,R8
244
-	SHLQ $13,R11:R10
245
-	ANDQ DX,R10
246
-	ADDQ R9,R10
247
-	SHLQ $13,R13:R12
248
-	ANDQ DX,R12
249
-	ADDQ R11,R12
250
-	SHLQ $13,R15:R14
251
-	ANDQ DX,R14
252
-	ADDQ R13,R14
253
-	IMUL3Q $19,R15,CX
254
-	ADDQ CX,SI
255
-	MOVQ SI,CX
256
-	SHRQ $51,CX
257
-	ADDQ R8,CX
258
-	ANDQ DX,SI
259
-	MOVQ CX,R8
260
-	SHRQ $51,CX
261
-	ADDQ R10,CX
262
-	ANDQ DX,R8
263
-	MOVQ CX,R9
264
-	SHRQ $51,CX
265
-	ADDQ R12,CX
266
-	ANDQ DX,R9
267
-	MOVQ CX,AX
268
-	SHRQ $51,CX
269
-	ADDQ R14,CX
270
-	ANDQ DX,AX
271
-	MOVQ CX,R10
272
-	SHRQ $51,CX
273
-	IMUL3Q $19,CX,CX
274
-	ADDQ CX,SI
275
-	ANDQ DX,R10
276
-	MOVQ SI,120(SP)
277
-	MOVQ R8,128(SP)
278
-	MOVQ R9,136(SP)
279
-	MOVQ AX,144(SP)
280
-	MOVQ R10,152(SP)
281
-	MOVQ SI,SI
282
-	MOVQ R8,DX
283
-	MOVQ R9,CX
284
-	MOVQ AX,R8
285
-	MOVQ R10,R9
286
-	ADDQ ·_2P0(SB),SI
287
-	ADDQ ·_2P1234(SB),DX
288
-	ADDQ ·_2P1234(SB),CX
289
-	ADDQ ·_2P1234(SB),R8
290
-	ADDQ ·_2P1234(SB),R9
291
-	SUBQ 80(SP),SI
292
-	SUBQ 88(SP),DX
293
-	SUBQ 96(SP),CX
294
-	SUBQ 104(SP),R8
295
-	SUBQ 112(SP),R9
296
-	MOVQ SI,160(SP)
297
-	MOVQ DX,168(SP)
298
-	MOVQ CX,176(SP)
299
-	MOVQ R8,184(SP)
300
-	MOVQ R9,192(SP)
301
-	MOVQ 120(DI),SI
302
-	MOVQ 128(DI),DX
303
-	MOVQ 136(DI),CX
304
-	MOVQ 144(DI),R8
305
-	MOVQ 152(DI),R9
306
-	MOVQ SI,AX
307
-	MOVQ DX,R10
308
-	MOVQ CX,R11
309
-	MOVQ R8,R12
310
-	MOVQ R9,R13
311
-	ADDQ ·_2P0(SB),AX
312
-	ADDQ ·_2P1234(SB),R10
313
-	ADDQ ·_2P1234(SB),R11
314
-	ADDQ ·_2P1234(SB),R12
315
-	ADDQ ·_2P1234(SB),R13
316
-	ADDQ 160(DI),SI
317
-	ADDQ 168(DI),DX
318
-	ADDQ 176(DI),CX
319
-	ADDQ 184(DI),R8
320
-	ADDQ 192(DI),R9
321
-	SUBQ 160(DI),AX
322
-	SUBQ 168(DI),R10
323
-	SUBQ 176(DI),R11
324
-	SUBQ 184(DI),R12
325
-	SUBQ 192(DI),R13
326
-	MOVQ SI,200(SP)
327
-	MOVQ DX,208(SP)
328
-	MOVQ CX,216(SP)
329
-	MOVQ R8,224(SP)
330
-	MOVQ R9,232(SP)
331
-	MOVQ AX,240(SP)
332
-	MOVQ R10,248(SP)
333
-	MOVQ R11,256(SP)
334
-	MOVQ R12,264(SP)
335
-	MOVQ R13,272(SP)
336
-	MOVQ 224(SP),SI
337
-	IMUL3Q $19,SI,AX
338
-	MOVQ AX,280(SP)
339
-	MULQ 56(SP)
340
-	MOVQ AX,SI
341
-	MOVQ DX,CX
342
-	MOVQ 232(SP),DX
343
-	IMUL3Q $19,DX,AX
344
-	MOVQ AX,288(SP)
345
-	MULQ 48(SP)
346
-	ADDQ AX,SI
347
-	ADCQ DX,CX
348
-	MOVQ 200(SP),AX
349
-	MULQ 40(SP)
350
-	ADDQ AX,SI
351
-	ADCQ DX,CX
352
-	MOVQ 200(SP),AX
353
-	MULQ 48(SP)
354
-	MOVQ AX,R8
355
-	MOVQ DX,R9
356
-	MOVQ 200(SP),AX
357
-	MULQ 56(SP)
358
-	MOVQ AX,R10
359
-	MOVQ DX,R11
360
-	MOVQ 200(SP),AX
361
-	MULQ 64(SP)
362
-	MOVQ AX,R12
363
-	MOVQ DX,R13
364
-	MOVQ 200(SP),AX
365
-	MULQ 72(SP)
366
-	MOVQ AX,R14
367
-	MOVQ DX,R15
368
-	MOVQ 208(SP),AX
369
-	MULQ 40(SP)
370
-	ADDQ AX,R8
371
-	ADCQ DX,R9
372
-	MOVQ 208(SP),AX
373
-	MULQ 48(SP)
374
-	ADDQ AX,R10
375
-	ADCQ DX,R11
376
-	MOVQ 208(SP),AX
377
-	MULQ 56(SP)
378
-	ADDQ AX,R12
379
-	ADCQ DX,R13
380
-	MOVQ 208(SP),AX
381
-	MULQ 64(SP)
382
-	ADDQ AX,R14
383
-	ADCQ DX,R15
384
-	MOVQ 208(SP),DX
385
-	IMUL3Q $19,DX,AX
386
-	MULQ 72(SP)
387
-	ADDQ AX,SI
388
-	ADCQ DX,CX
389
-	MOVQ 216(SP),AX
390
-	MULQ 40(SP)
391
-	ADDQ AX,R10
392
-	ADCQ DX,R11
393
-	MOVQ 216(SP),AX
394
-	MULQ 48(SP)
395
-	ADDQ AX,R12
396
-	ADCQ DX,R13
397
-	MOVQ 216(SP),AX
398
-	MULQ 56(SP)
399
-	ADDQ AX,R14
400
-	ADCQ DX,R15
401
-	MOVQ 216(SP),DX
402
-	IMUL3Q $19,DX,AX
403
-	MULQ 64(SP)
404
-	ADDQ AX,SI
405
-	ADCQ DX,CX
406
-	MOVQ 216(SP),DX
407
-	IMUL3Q $19,DX,AX
408
-	MULQ 72(SP)
409
-	ADDQ AX,R8
410
-	ADCQ DX,R9
411
-	MOVQ 224(SP),AX
412
-	MULQ 40(SP)
413
-	ADDQ AX,R12
414
-	ADCQ DX,R13
415
-	MOVQ 224(SP),AX
416
-	MULQ 48(SP)
417
-	ADDQ AX,R14
418
-	ADCQ DX,R15
419
-	MOVQ 280(SP),AX
420
-	MULQ 64(SP)
421
-	ADDQ AX,R8
422
-	ADCQ DX,R9
423
-	MOVQ 280(SP),AX
424
-	MULQ 72(SP)
425
-	ADDQ AX,R10
426
-	ADCQ DX,R11
427
-	MOVQ 232(SP),AX
428
-	MULQ 40(SP)
429
-	ADDQ AX,R14
430
-	ADCQ DX,R15
431
-	MOVQ 288(SP),AX
432
-	MULQ 56(SP)
433
-	ADDQ AX,R8
434
-	ADCQ DX,R9
435
-	MOVQ 288(SP),AX
436
-	MULQ 64(SP)
437
-	ADDQ AX,R10
438
-	ADCQ DX,R11
439
-	MOVQ 288(SP),AX
440
-	MULQ 72(SP)
441
-	ADDQ AX,R12
442
-	ADCQ DX,R13
443
-	MOVQ $REDMASK51,DX
444
-	SHLQ $13,CX:SI
445
-	ANDQ DX,SI
446
-	SHLQ $13,R9:R8
447
-	ANDQ DX,R8
448
-	ADDQ CX,R8
449
-	SHLQ $13,R11:R10
450
-	ANDQ DX,R10
451
-	ADDQ R9,R10
452
-	SHLQ $13,R13:R12
453
-	ANDQ DX,R12
454
-	ADDQ R11,R12
455
-	SHLQ $13,R15:R14
456
-	ANDQ DX,R14
457
-	ADDQ R13,R14
458
-	IMUL3Q $19,R15,CX
459
-	ADDQ CX,SI
460
-	MOVQ SI,CX
461
-	SHRQ $51,CX
462
-	ADDQ R8,CX
463
-	MOVQ CX,R8
464
-	SHRQ $51,CX
465
-	ANDQ DX,SI
466
-	ADDQ R10,CX
467
-	MOVQ CX,R9
468
-	SHRQ $51,CX
469
-	ANDQ DX,R8
470
-	ADDQ R12,CX
471
-	MOVQ CX,AX
472
-	SHRQ $51,CX
473
-	ANDQ DX,R9
474
-	ADDQ R14,CX
475
-	MOVQ CX,R10
476
-	SHRQ $51,CX
477
-	ANDQ DX,AX
478
-	IMUL3Q $19,CX,CX
479
-	ADDQ CX,SI
480
-	ANDQ DX,R10
481
-	MOVQ SI,40(SP)
482
-	MOVQ R8,48(SP)
483
-	MOVQ R9,56(SP)
484
-	MOVQ AX,64(SP)
485
-	MOVQ R10,72(SP)
486
-	MOVQ 264(SP),SI
487
-	IMUL3Q $19,SI,AX
488
-	MOVQ AX,200(SP)
489
-	MULQ 16(SP)
490
-	MOVQ AX,SI
491
-	MOVQ DX,CX
492
-	MOVQ 272(SP),DX
493
-	IMUL3Q $19,DX,AX
494
-	MOVQ AX,208(SP)
495
-	MULQ 8(SP)
496
-	ADDQ AX,SI
497
-	ADCQ DX,CX
498
-	MOVQ 240(SP),AX
499
-	MULQ 0(SP)
500
-	ADDQ AX,SI
501
-	ADCQ DX,CX
502
-	MOVQ 240(SP),AX
503
-	MULQ 8(SP)
504
-	MOVQ AX,R8
505
-	MOVQ DX,R9
506
-	MOVQ 240(SP),AX
507
-	MULQ 16(SP)
508
-	MOVQ AX,R10
509
-	MOVQ DX,R11
510
-	MOVQ 240(SP),AX
511
-	MULQ 24(SP)
512
-	MOVQ AX,R12
513
-	MOVQ DX,R13
514
-	MOVQ 240(SP),AX
515
-	MULQ 32(SP)
516
-	MOVQ AX,R14
517
-	MOVQ DX,R15
518
-	MOVQ 248(SP),AX
519
-	MULQ 0(SP)
520
-	ADDQ AX,R8
521
-	ADCQ DX,R9
522
-	MOVQ 248(SP),AX
523
-	MULQ 8(SP)
524
-	ADDQ AX,R10
525
-	ADCQ DX,R11
526
-	MOVQ 248(SP),AX
527
-	MULQ 16(SP)
528
-	ADDQ AX,R12
529
-	ADCQ DX,R13
530
-	MOVQ 248(SP),AX
531
-	MULQ 24(SP)
532
-	ADDQ AX,R14
533
-	ADCQ DX,R15
534
-	MOVQ 248(SP),DX
535
-	IMUL3Q $19,DX,AX
536
-	MULQ 32(SP)
537
-	ADDQ AX,SI
538
-	ADCQ DX,CX
539
-	MOVQ 256(SP),AX
540
-	MULQ 0(SP)
541
-	ADDQ AX,R10
542
-	ADCQ DX,R11
543
-	MOVQ 256(SP),AX
544
-	MULQ 8(SP)
545
-	ADDQ AX,R12
546
-	ADCQ DX,R13
547
-	MOVQ 256(SP),AX
548
-	MULQ 16(SP)
549
-	ADDQ AX,R14
550
-	ADCQ DX,R15
551
-	MOVQ 256(SP),DX
552
-	IMUL3Q $19,DX,AX
553
-	MULQ 24(SP)
554
-	ADDQ AX,SI
555
-	ADCQ DX,CX
556
-	MOVQ 256(SP),DX
557
-	IMUL3Q $19,DX,AX
558
-	MULQ 32(SP)
559
-	ADDQ AX,R8
560
-	ADCQ DX,R9
561
-	MOVQ 264(SP),AX
562
-	MULQ 0(SP)
563
-	ADDQ AX,R12
564
-	ADCQ DX,R13
565
-	MOVQ 264(SP),AX
566
-	MULQ 8(SP)
567
-	ADDQ AX,R14
568
-	ADCQ DX,R15
569
-	MOVQ 200(SP),AX
570
-	MULQ 24(SP)
571
-	ADDQ AX,R8
572
-	ADCQ DX,R9
573
-	MOVQ 200(SP),AX
574
-	MULQ 32(SP)
575
-	ADDQ AX,R10
576
-	ADCQ DX,R11
577
-	MOVQ 272(SP),AX
578
-	MULQ 0(SP)
579
-	ADDQ AX,R14
580
-	ADCQ DX,R15
581
-	MOVQ 208(SP),AX
582
-	MULQ 16(SP)
583
-	ADDQ AX,R8
584
-	ADCQ DX,R9
585
-	MOVQ 208(SP),AX
586
-	MULQ 24(SP)
587
-	ADDQ AX,R10
588
-	ADCQ DX,R11
589
-	MOVQ 208(SP),AX
590
-	MULQ 32(SP)
591
-	ADDQ AX,R12
592
-	ADCQ DX,R13
593
-	MOVQ $REDMASK51,DX
594
-	SHLQ $13,CX:SI
595
-	ANDQ DX,SI
596
-	SHLQ $13,R9:R8
597
-	ANDQ DX,R8
598
-	ADDQ CX,R8
599
-	SHLQ $13,R11:R10
600
-	ANDQ DX,R10
601
-	ADDQ R9,R10
602
-	SHLQ $13,R13:R12
603
-	ANDQ DX,R12
604
-	ADDQ R11,R12
605
-	SHLQ $13,R15:R14
606
-	ANDQ DX,R14
607
-	ADDQ R13,R14
608
-	IMUL3Q $19,R15,CX
609
-	ADDQ CX,SI
610
-	MOVQ SI,CX
611
-	SHRQ $51,CX
612
-	ADDQ R8,CX
613
-	MOVQ CX,R8
614
-	SHRQ $51,CX
615
-	ANDQ DX,SI
616
-	ADDQ R10,CX
617
-	MOVQ CX,R9
618
-	SHRQ $51,CX
619
-	ANDQ DX,R8
620
-	ADDQ R12,CX
621
-	MOVQ CX,AX
622
-	SHRQ $51,CX
623
-	ANDQ DX,R9
624
-	ADDQ R14,CX
625
-	MOVQ CX,R10
626
-	SHRQ $51,CX
627
-	ANDQ DX,AX
628
-	IMUL3Q $19,CX,CX
629
-	ADDQ CX,SI
630
-	ANDQ DX,R10
631
-	MOVQ SI,DX
632
-	MOVQ R8,CX
633
-	MOVQ R9,R11
634
-	MOVQ AX,R12
635
-	MOVQ R10,R13
636
-	ADDQ ·_2P0(SB),DX
637
-	ADDQ ·_2P1234(SB),CX
638
-	ADDQ ·_2P1234(SB),R11
639
-	ADDQ ·_2P1234(SB),R12
640
-	ADDQ ·_2P1234(SB),R13
641
-	ADDQ 40(SP),SI
642
-	ADDQ 48(SP),R8
643
-	ADDQ 56(SP),R9
644
-	ADDQ 64(SP),AX
645
-	ADDQ 72(SP),R10
646
-	SUBQ 40(SP),DX
647
-	SUBQ 48(SP),CX
648
-	SUBQ 56(SP),R11
649
-	SUBQ 64(SP),R12
650
-	SUBQ 72(SP),R13
651
-	MOVQ SI,120(DI)
652
-	MOVQ R8,128(DI)
653
-	MOVQ R9,136(DI)
654
-	MOVQ AX,144(DI)
655
-	MOVQ R10,152(DI)
656
-	MOVQ DX,160(DI)
657
-	MOVQ CX,168(DI)
658
-	MOVQ R11,176(DI)
659
-	MOVQ R12,184(DI)
660
-	MOVQ R13,192(DI)
661
-	MOVQ 120(DI),AX
662
-	MULQ 120(DI)
663
-	MOVQ AX,SI
664
-	MOVQ DX,CX
665
-	MOVQ 120(DI),AX
666
-	SHLQ $1,AX
667
-	MULQ 128(DI)
668
-	MOVQ AX,R8
669
-	MOVQ DX,R9
670
-	MOVQ 120(DI),AX
671
-	SHLQ $1,AX
672
-	MULQ 136(DI)
673
-	MOVQ AX,R10
674
-	MOVQ DX,R11
675
-	MOVQ 120(DI),AX
676
-	SHLQ $1,AX
677
-	MULQ 144(DI)
678
-	MOVQ AX,R12
679
-	MOVQ DX,R13
680
-	MOVQ 120(DI),AX
681
-	SHLQ $1,AX
682
-	MULQ 152(DI)
683
-	MOVQ AX,R14
684
-	MOVQ DX,R15
685
-	MOVQ 128(DI),AX
686
-	MULQ 128(DI)
687
-	ADDQ AX,R10
688
-	ADCQ DX,R11
689
-	MOVQ 128(DI),AX
690
-	SHLQ $1,AX
691
-	MULQ 136(DI)
692
-	ADDQ AX,R12
693
-	ADCQ DX,R13
694
-	MOVQ 128(DI),AX
695
-	SHLQ $1,AX
696
-	MULQ 144(DI)
697
-	ADDQ AX,R14
698
-	ADCQ DX,R15
699
-	MOVQ 128(DI),DX
700
-	IMUL3Q $38,DX,AX
701
-	MULQ 152(DI)
702
-	ADDQ AX,SI
703
-	ADCQ DX,CX
704
-	MOVQ 136(DI),AX
705
-	MULQ 136(DI)
706
-	ADDQ AX,R14
707
-	ADCQ DX,R15
708
-	MOVQ 136(DI),DX
709
-	IMUL3Q $38,DX,AX
710
-	MULQ 144(DI)
711
-	ADDQ AX,SI
712
-	ADCQ DX,CX
713
-	MOVQ 136(DI),DX
714
-	IMUL3Q $38,DX,AX
715
-	MULQ 152(DI)
716
-	ADDQ AX,R8
717
-	ADCQ DX,R9
718
-	MOVQ 144(DI),DX
719
-	IMUL3Q $19,DX,AX
720
-	MULQ 144(DI)
721
-	ADDQ AX,R8
722
-	ADCQ DX,R9
723
-	MOVQ 144(DI),DX
724
-	IMUL3Q $38,DX,AX
725
-	MULQ 152(DI)
726
-	ADDQ AX,R10
727
-	ADCQ DX,R11
728
-	MOVQ 152(DI),DX
729
-	IMUL3Q $19,DX,AX
730
-	MULQ 152(DI)
731
-	ADDQ AX,R12
732
-	ADCQ DX,R13
733
-	MOVQ $REDMASK51,DX
734
-	SHLQ $13,CX:SI
735
-	ANDQ DX,SI
736
-	SHLQ $13,R9:R8
737
-	ANDQ DX,R8
738
-	ADDQ CX,R8
739
-	SHLQ $13,R11:R10
740
-	ANDQ DX,R10
741
-	ADDQ R9,R10
742
-	SHLQ $13,R13:R12
743
-	ANDQ DX,R12
744
-	ADDQ R11,R12
745
-	SHLQ $13,R15:R14
746
-	ANDQ DX,R14
747
-	ADDQ R13,R14
748
-	IMUL3Q $19,R15,CX
749
-	ADDQ CX,SI
750
-	MOVQ SI,CX
751
-	SHRQ $51,CX
752
-	ADDQ R8,CX
753
-	ANDQ DX,SI
754
-	MOVQ CX,R8
755
-	SHRQ $51,CX
756
-	ADDQ R10,CX
757
-	ANDQ DX,R8
758
-	MOVQ CX,R9
759
-	SHRQ $51,CX
760
-	ADDQ R12,CX
761
-	ANDQ DX,R9
762
-	MOVQ CX,AX
763
-	SHRQ $51,CX
764
-	ADDQ R14,CX
765
-	ANDQ DX,AX
766
-	MOVQ CX,R10
767
-	SHRQ $51,CX
768
-	IMUL3Q $19,CX,CX
769
-	ADDQ CX,SI
770
-	ANDQ DX,R10
771
-	MOVQ SI,120(DI)
772
-	MOVQ R8,128(DI)
773
-	MOVQ R9,136(DI)
774
-	MOVQ AX,144(DI)
775
-	MOVQ R10,152(DI)
776
-	MOVQ 160(DI),AX
777
-	MULQ 160(DI)
778
-	MOVQ AX,SI
779
-	MOVQ DX,CX
780
-	MOVQ 160(DI),AX
781
-	SHLQ $1,AX
782
-	MULQ 168(DI)
783
-	MOVQ AX,R8
784
-	MOVQ DX,R9
785
-	MOVQ 160(DI),AX
786
-	SHLQ $1,AX
787
-	MULQ 176(DI)
788
-	MOVQ AX,R10
789
-	MOVQ DX,R11
790
-	MOVQ 160(DI),AX
791
-	SHLQ $1,AX
792
-	MULQ 184(DI)
793
-	MOVQ AX,R12
794
-	MOVQ DX,R13
795
-	MOVQ 160(DI),AX
796
-	SHLQ $1,AX
797
-	MULQ 192(DI)
798
-	MOVQ AX,R14
799
-	MOVQ DX,R15
800
-	MOVQ 168(DI),AX
801
-	MULQ 168(DI)
802
-	ADDQ AX,R10
803
-	ADCQ DX,R11
804
-	MOVQ 168(DI),AX
805
-	SHLQ $1,AX
806
-	MULQ 176(DI)
807
-	ADDQ AX,R12
808
-	ADCQ DX,R13
809
-	MOVQ 168(DI),AX
810
-	SHLQ $1,AX
811
-	MULQ 184(DI)
812
-	ADDQ AX,R14
813
-	ADCQ DX,R15
814
-	MOVQ 168(DI),DX
815
-	IMUL3Q $38,DX,AX
816
-	MULQ 192(DI)
817
-	ADDQ AX,SI
818
-	ADCQ DX,CX
819
-	MOVQ 176(DI),AX
820
-	MULQ 176(DI)
821
-	ADDQ AX,R14
822
-	ADCQ DX,R15
823
-	MOVQ 176(DI),DX
824
-	IMUL3Q $38,DX,AX
825
-	MULQ 184(DI)
826
-	ADDQ AX,SI
827
-	ADCQ DX,CX
828
-	MOVQ 176(DI),DX
829
-	IMUL3Q $38,DX,AX
830
-	MULQ 192(DI)
831
-	ADDQ AX,R8
832
-	ADCQ DX,R9
833
-	MOVQ 184(DI),DX
834
-	IMUL3Q $19,DX,AX
835
-	MULQ 184(DI)
836
-	ADDQ AX,R8
837
-	ADCQ DX,R9
838
-	MOVQ 184(DI),DX
839
-	IMUL3Q $38,DX,AX
840
-	MULQ 192(DI)
841
-	ADDQ AX,R10
842
-	ADCQ DX,R11
843
-	MOVQ 192(DI),DX
844
-	IMUL3Q $19,DX,AX
845
-	MULQ 192(DI)
846
-	ADDQ AX,R12
847
-	ADCQ DX,R13
848
-	MOVQ $REDMASK51,DX
849
-	SHLQ $13,CX:SI
850
-	ANDQ DX,SI
851
-	SHLQ $13,R9:R8
852
-	ANDQ DX,R8
853
-	ADDQ CX,R8
854
-	SHLQ $13,R11:R10
855
-	ANDQ DX,R10
856
-	ADDQ R9,R10
857
-	SHLQ $13,R13:R12
858
-	ANDQ DX,R12
859
-	ADDQ R11,R12
860
-	SHLQ $13,R15:R14
861
-	ANDQ DX,R14
862
-	ADDQ R13,R14
863
-	IMUL3Q $19,R15,CX
864
-	ADDQ CX,SI
865
-	MOVQ SI,CX
866
-	SHRQ $51,CX
867
-	ADDQ R8,CX
868
-	ANDQ DX,SI
869
-	MOVQ CX,R8
870
-	SHRQ $51,CX
871
-	ADDQ R10,CX
872
-	ANDQ DX,R8
873
-	MOVQ CX,R9
874
-	SHRQ $51,CX
875
-	ADDQ R12,CX
876
-	ANDQ DX,R9
877
-	MOVQ CX,AX
878
-	SHRQ $51,CX
879
-	ADDQ R14,CX
880
-	ANDQ DX,AX
881
-	MOVQ CX,R10
882
-	SHRQ $51,CX
883
-	IMUL3Q $19,CX,CX
884
-	ADDQ CX,SI
885
-	ANDQ DX,R10
886
-	MOVQ SI,160(DI)
887
-	MOVQ R8,168(DI)
888
-	MOVQ R9,176(DI)
889
-	MOVQ AX,184(DI)
890
-	MOVQ R10,192(DI)
891
-	MOVQ 184(DI),SI
892
-	IMUL3Q $19,SI,AX
893
-	MOVQ AX,0(SP)
894
-	MULQ 16(DI)
895
-	MOVQ AX,SI
896
-	MOVQ DX,CX
897
-	MOVQ 192(DI),DX
898
-	IMUL3Q $19,DX,AX
899
-	MOVQ AX,8(SP)
900
-	MULQ 8(DI)
901
-	ADDQ AX,SI
902
-	ADCQ DX,CX
903
-	MOVQ 160(DI),AX
904
-	MULQ 0(DI)
905
-	ADDQ AX,SI
906
-	ADCQ DX,CX
907
-	MOVQ 160(DI),AX
908
-	MULQ 8(DI)
909
-	MOVQ AX,R8
910
-	MOVQ DX,R9
911
-	MOVQ 160(DI),AX
912
-	MULQ 16(DI)
913
-	MOVQ AX,R10
914
-	MOVQ DX,R11
915
-	MOVQ 160(DI),AX
916
-	MULQ 24(DI)
917
-	MOVQ AX,R12
918
-	MOVQ DX,R13
919
-	MOVQ 160(DI),AX
920
-	MULQ 32(DI)
921
-	MOVQ AX,R14
922
-	MOVQ DX,R15
923
-	MOVQ 168(DI),AX
924
-	MULQ 0(DI)
925
-	ADDQ AX,R8
926
-	ADCQ DX,R9
927
-	MOVQ 168(DI),AX
928
-	MULQ 8(DI)
929
-	ADDQ AX,R10
930
-	ADCQ DX,R11
931
-	MOVQ 168(DI),AX
932
-	MULQ 16(DI)
933
-	ADDQ AX,R12
934
-	ADCQ DX,R13
935
-	MOVQ 168(DI),AX
936
-	MULQ 24(DI)
937
-	ADDQ AX,R14
938
-	ADCQ DX,R15
939
-	MOVQ 168(DI),DX
940
-	IMUL3Q $19,DX,AX
941
-	MULQ 32(DI)
942
-	ADDQ AX,SI
943
-	ADCQ DX,CX
944
-	MOVQ 176(DI),AX
945
-	MULQ 0(DI)
946
-	ADDQ AX,R10
947
-	ADCQ DX,R11
948
-	MOVQ 176(DI),AX
949
-	MULQ 8(DI)
950
-	ADDQ AX,R12
951
-	ADCQ DX,R13
952
-	MOVQ 176(DI),AX
953
-	MULQ 16(DI)
954
-	ADDQ AX,R14
955
-	ADCQ DX,R15
956
-	MOVQ 176(DI),DX
957
-	IMUL3Q $19,DX,AX
958
-	MULQ 24(DI)
959
-	ADDQ AX,SI
960
-	ADCQ DX,CX
961
-	MOVQ 176(DI),DX
962
-	IMUL3Q $19,DX,AX
963
-	MULQ 32(DI)
964
-	ADDQ AX,R8
965
-	ADCQ DX,R9
966
-	MOVQ 184(DI),AX
967
-	MULQ 0(DI)
968
-	ADDQ AX,R12
969
-	ADCQ DX,R13
970
-	MOVQ 184(DI),AX
971
-	MULQ 8(DI)
972
-	ADDQ AX,R14
973
-	ADCQ DX,R15
974
-	MOVQ 0(SP),AX
975
-	MULQ 24(DI)
976
-	ADDQ AX,R8
977
-	ADCQ DX,R9
978
-	MOVQ 0(SP),AX
979
-	MULQ 32(DI)
980
-	ADDQ AX,R10
981
-	ADCQ DX,R11
982
-	MOVQ 192(DI),AX
983
-	MULQ 0(DI)
984
-	ADDQ AX,R14
985
-	ADCQ DX,R15
986
-	MOVQ 8(SP),AX
987
-	MULQ 16(DI)
988
-	ADDQ AX,R8
989
-	ADCQ DX,R9
990
-	MOVQ 8(SP),AX
991
-	MULQ 24(DI)
992
-	ADDQ AX,R10
993
-	ADCQ DX,R11
994
-	MOVQ 8(SP),AX
995
-	MULQ 32(DI)
996
-	ADDQ AX,R12
997
-	ADCQ DX,R13
998
-	MOVQ $REDMASK51,DX
999
-	SHLQ $13,CX:SI
1000
-	ANDQ DX,SI
1001
-	SHLQ $13,R9:R8
1002
-	ANDQ DX,R8
1003
-	ADDQ CX,R8
1004
-	SHLQ $13,R11:R10
1005
-	ANDQ DX,R10
1006
-	ADDQ R9,R10
1007
-	SHLQ $13,R13:R12
1008
-	ANDQ DX,R12
1009
-	ADDQ R11,R12
1010
-	SHLQ $13,R15:R14
1011
-	ANDQ DX,R14
1012
-	ADDQ R13,R14
1013
-	IMUL3Q $19,R15,CX
1014
-	ADDQ CX,SI
1015
-	MOVQ SI,CX
1016
-	SHRQ $51,CX
1017
-	ADDQ R8,CX
1018
-	MOVQ CX,R8
1019
-	SHRQ $51,CX
1020
-	ANDQ DX,SI
1021
-	ADDQ R10,CX
1022
-	MOVQ CX,R9
1023
-	SHRQ $51,CX
1024
-	ANDQ DX,R8
1025
-	ADDQ R12,CX
1026
-	MOVQ CX,AX
1027
-	SHRQ $51,CX
1028
-	ANDQ DX,R9
1029
-	ADDQ R14,CX
1030
-	MOVQ CX,R10
1031
-	SHRQ $51,CX
1032
-	ANDQ DX,AX
1033
-	IMUL3Q $19,CX,CX
1034
-	ADDQ CX,SI
1035
-	ANDQ DX,R10
1036
-	MOVQ SI,160(DI)
1037
-	MOVQ R8,168(DI)
1038
-	MOVQ R9,176(DI)
1039
-	MOVQ AX,184(DI)
1040
-	MOVQ R10,192(DI)
1041
-	MOVQ 144(SP),SI
1042
-	IMUL3Q $19,SI,AX
1043
-	MOVQ AX,0(SP)
1044
-	MULQ 96(SP)
1045
-	MOVQ AX,SI
1046
-	MOVQ DX,CX
1047
-	MOVQ 152(SP),DX
1048
-	IMUL3Q $19,DX,AX
1049
-	MOVQ AX,8(SP)
1050
-	MULQ 88(SP)
1051
-	ADDQ AX,SI
1052
-	ADCQ DX,CX
1053
-	MOVQ 120(SP),AX
1054
-	MULQ 80(SP)
1055
-	ADDQ AX,SI
1056
-	ADCQ DX,CX
1057
-	MOVQ 120(SP),AX
1058
-	MULQ 88(SP)
1059
-	MOVQ AX,R8
1060
-	MOVQ DX,R9
1061
-	MOVQ 120(SP),AX
1062
-	MULQ 96(SP)
1063
-	MOVQ AX,R10
1064
-	MOVQ DX,R11
1065
-	MOVQ 120(SP),AX
1066
-	MULQ 104(SP)
1067
-	MOVQ AX,R12
1068
-	MOVQ DX,R13
1069
-	MOVQ 120(SP),AX
1070
-	MULQ 112(SP)
1071
-	MOVQ AX,R14
1072
-	MOVQ DX,R15
1073
-	MOVQ 128(SP),AX
1074
-	MULQ 80(SP)
1075
-	ADDQ AX,R8
1076
-	ADCQ DX,R9
1077
-	MOVQ 128(SP),AX
1078
-	MULQ 88(SP)
1079
-	ADDQ AX,R10
1080
-	ADCQ DX,R11
1081
-	MOVQ 128(SP),AX
1082
-	MULQ 96(SP)
1083
-	ADDQ AX,R12
1084
-	ADCQ DX,R13
1085
-	MOVQ 128(SP),AX
1086
-	MULQ 104(SP)
1087
-	ADDQ AX,R14
1088
-	ADCQ DX,R15
1089
-	MOVQ 128(SP),DX
1090
-	IMUL3Q $19,DX,AX
1091
-	MULQ 112(SP)
1092
-	ADDQ AX,SI
1093
-	ADCQ DX,CX
1094
-	MOVQ 136(SP),AX
1095
-	MULQ 80(SP)
1096
-	ADDQ AX,R10
1097
-	ADCQ DX,R11
1098
-	MOVQ 136(SP),AX
1099
-	MULQ 88(SP)
1100
-	ADDQ AX,R12
1101
-	ADCQ DX,R13
1102
-	MOVQ 136(SP),AX
1103
-	MULQ 96(SP)
1104
-	ADDQ AX,R14
1105
-	ADCQ DX,R15
1106
-	MOVQ 136(SP),DX
1107
-	IMUL3Q $19,DX,AX
1108
-	MULQ 104(SP)
1109
-	ADDQ AX,SI
1110
-	ADCQ DX,CX
1111
-	MOVQ 136(SP),DX
1112
-	IMUL3Q $19,DX,AX
1113
-	MULQ 112(SP)
1114
-	ADDQ AX,R8
1115
-	ADCQ DX,R9
1116
-	MOVQ 144(SP),AX
1117
-	MULQ 80(SP)
1118
-	ADDQ AX,R12
1119
-	ADCQ DX,R13
1120
-	MOVQ 144(SP),AX
1121
-	MULQ 88(SP)
1122
-	ADDQ AX,R14
1123
-	ADCQ DX,R15
1124
-	MOVQ 0(SP),AX
1125
-	MULQ 104(SP)
1126
-	ADDQ AX,R8
1127
-	ADCQ DX,R9
1128
-	MOVQ 0(SP),AX
1129
-	MULQ 112(SP)
1130
-	ADDQ AX,R10
1131
-	ADCQ DX,R11
1132
-	MOVQ 152(SP),AX
1133
-	MULQ 80(SP)
1134
-	ADDQ AX,R14
1135
-	ADCQ DX,R15
1136
-	MOVQ 8(SP),AX
1137
-	MULQ 96(SP)
1138
-	ADDQ AX,R8
1139
-	ADCQ DX,R9
1140
-	MOVQ 8(SP),AX
1141
-	MULQ 104(SP)
1142
-	ADDQ AX,R10
1143
-	ADCQ DX,R11
1144
-	MOVQ 8(SP),AX
1145
-	MULQ 112(SP)
1146
-	ADDQ AX,R12
1147
-	ADCQ DX,R13
1148
-	MOVQ $REDMASK51,DX
1149
-	SHLQ $13,CX:SI
1150
-	ANDQ DX,SI
1151
-	SHLQ $13,R9:R8
1152
-	ANDQ DX,R8
1153
-	ADDQ CX,R8
1154
-	SHLQ $13,R11:R10
1155
-	ANDQ DX,R10
1156
-	ADDQ R9,R10
1157
-	SHLQ $13,R13:R12
1158
-	ANDQ DX,R12
1159
-	ADDQ R11,R12
1160
-	SHLQ $13,R15:R14
1161
-	ANDQ DX,R14
1162
-	ADDQ R13,R14
1163
-	IMUL3Q $19,R15,CX
1164
-	ADDQ CX,SI
1165
-	MOVQ SI,CX
1166
-	SHRQ $51,CX
1167
-	ADDQ R8,CX
1168
-	MOVQ CX,R8
1169
-	SHRQ $51,CX
1170
-	ANDQ DX,SI
1171
-	ADDQ R10,CX
1172
-	MOVQ CX,R9
1173
-	SHRQ $51,CX
1174
-	ANDQ DX,R8
1175
-	ADDQ R12,CX
1176
-	MOVQ CX,AX
1177
-	SHRQ $51,CX
1178
-	ANDQ DX,R9
1179
-	ADDQ R14,CX
1180
-	MOVQ CX,R10
1181
-	SHRQ $51,CX
1182
-	ANDQ DX,AX
1183
-	IMUL3Q $19,CX,CX
1184
-	ADDQ CX,SI
1185
-	ANDQ DX,R10
1186
-	MOVQ SI,40(DI)
1187
-	MOVQ R8,48(DI)
1188
-	MOVQ R9,56(DI)
1189
-	MOVQ AX,64(DI)
1190
-	MOVQ R10,72(DI)
1191
-	MOVQ 160(SP),AX
1192
-	MULQ ·_121666_213(SB)
1193
-	SHRQ $13,AX
1194
-	MOVQ AX,SI
1195
-	MOVQ DX,CX
1196
-	MOVQ 168(SP),AX
1197
-	MULQ ·_121666_213(SB)
1198
-	SHRQ $13,AX
1199
-	ADDQ AX,CX
1200
-	MOVQ DX,R8
1201
-	MOVQ 176(SP),AX
1202
-	MULQ ·_121666_213(SB)
1203
-	SHRQ $13,AX
1204
-	ADDQ AX,R8
1205
-	MOVQ DX,R9
1206
-	MOVQ 184(SP),AX
1207
-	MULQ ·_121666_213(SB)
1208
-	SHRQ $13,AX
1209
-	ADDQ AX,R9
1210
-	MOVQ DX,R10
1211
-	MOVQ 192(SP),AX
1212
-	MULQ ·_121666_213(SB)
1213
-	SHRQ $13,AX
1214
-	ADDQ AX,R10
1215
-	IMUL3Q $19,DX,DX
1216
-	ADDQ DX,SI
1217
-	ADDQ 80(SP),SI
1218
-	ADDQ 88(SP),CX
1219
-	ADDQ 96(SP),R8
1220
-	ADDQ 104(SP),R9
1221
-	ADDQ 112(SP),R10
1222
-	MOVQ SI,80(DI)
1223
-	MOVQ CX,88(DI)
1224
-	MOVQ R8,96(DI)
1225
-	MOVQ R9,104(DI)
1226
-	MOVQ R10,112(DI)
1227
-	MOVQ 104(DI),SI
1228
-	IMUL3Q $19,SI,AX
1229
-	MOVQ AX,0(SP)
1230
-	MULQ 176(SP)
1231
-	MOVQ AX,SI
1232
-	MOVQ DX,CX
1233
-	MOVQ 112(DI),DX
1234
-	IMUL3Q $19,DX,AX
1235
-	MOVQ AX,8(SP)
1236
-	MULQ 168(SP)
1237
-	ADDQ AX,SI
1238
-	ADCQ DX,CX
1239
-	MOVQ 80(DI),AX
1240
-	MULQ 160(SP)
1241
-	ADDQ AX,SI
1242
-	ADCQ DX,CX
1243
-	MOVQ 80(DI),AX
1244
-	MULQ 168(SP)
1245
-	MOVQ AX,R8
1246
-	MOVQ DX,R9
1247
-	MOVQ 80(DI),AX
1248
-	MULQ 176(SP)
1249
-	MOVQ AX,R10
1250
-	MOVQ DX,R11
1251
-	MOVQ 80(DI),AX
1252
-	MULQ 184(SP)
1253
-	MOVQ AX,R12
1254
-	MOVQ DX,R13
1255
-	MOVQ 80(DI),AX
1256
-	MULQ 192(SP)
1257
-	MOVQ AX,R14
1258
-	MOVQ DX,R15
1259
-	MOVQ 88(DI),AX
1260
-	MULQ 160(SP)
1261
-	ADDQ AX,R8
1262
-	ADCQ DX,R9
1263
-	MOVQ 88(DI),AX
1264
-	MULQ 168(SP)
1265
-	ADDQ AX,R10
1266
-	ADCQ DX,R11
1267
-	MOVQ 88(DI),AX
1268
-	MULQ 176(SP)
1269
-	ADDQ AX,R12
1270
-	ADCQ DX,R13
1271
-	MOVQ 88(DI),AX
1272
-	MULQ 184(SP)
1273
-	ADDQ AX,R14
1274
-	ADCQ DX,R15
1275
-	MOVQ 88(DI),DX
1276
-	IMUL3Q $19,DX,AX
1277
-	MULQ 192(SP)
1278
-	ADDQ AX,SI
1279
-	ADCQ DX,CX
1280
-	MOVQ 96(DI),AX
1281
-	MULQ 160(SP)
1282
-	ADDQ AX,R10
1283
-	ADCQ DX,R11
1284
-	MOVQ 96(DI),AX
1285
-	MULQ 168(SP)
1286
-	ADDQ AX,R12
1287
-	ADCQ DX,R13
1288
-	MOVQ 96(DI),AX
1289
-	MULQ 176(SP)
1290
-	ADDQ AX,R14
1291
-	ADCQ DX,R15
1292
-	MOVQ 96(DI),DX
1293
-	IMUL3Q $19,DX,AX
1294
-	MULQ 184(SP)
1295
-	ADDQ AX,SI
1296
-	ADCQ DX,CX
1297
-	MOVQ 96(DI),DX
1298
-	IMUL3Q $19,DX,AX
1299
-	MULQ 192(SP)
1300
-	ADDQ AX,R8
1301
-	ADCQ DX,R9
1302
-	MOVQ 104(DI),AX
1303
-	MULQ 160(SP)
1304
-	ADDQ AX,R12
1305
-	ADCQ DX,R13
1306
-	MOVQ 104(DI),AX
1307
-	MULQ 168(SP)
1308
-	ADDQ AX,R14
1309
-	ADCQ DX,R15
1310
-	MOVQ 0(SP),AX
1311
-	MULQ 184(SP)
1312
-	ADDQ AX,R8
1313
-	ADCQ DX,R9
1314
-	MOVQ 0(SP),AX
1315
-	MULQ 192(SP)
1316
-	ADDQ AX,R10
1317
-	ADCQ DX,R11
1318
-	MOVQ 112(DI),AX
1319
-	MULQ 160(SP)
1320
-	ADDQ AX,R14
1321
-	ADCQ DX,R15
1322
-	MOVQ 8(SP),AX
1323
-	MULQ 176(SP)
1324
-	ADDQ AX,R8
1325
-	ADCQ DX,R9
1326
-	MOVQ 8(SP),AX
1327
-	MULQ 184(SP)
1328
-	ADDQ AX,R10
1329
-	ADCQ DX,R11
1330
-	MOVQ 8(SP),AX
1331
-	MULQ 192(SP)
1332
-	ADDQ AX,R12
1333
-	ADCQ DX,R13
1334
-	MOVQ $REDMASK51,DX
1335
-	SHLQ $13,CX:SI
1336
-	ANDQ DX,SI
1337
-	SHLQ $13,R9:R8
1338
-	ANDQ DX,R8
1339
-	ADDQ CX,R8
1340
-	SHLQ $13,R11:R10
1341
-	ANDQ DX,R10
1342
-	ADDQ R9,R10
1343
-	SHLQ $13,R13:R12
1344
-	ANDQ DX,R12
1345
-	ADDQ R11,R12
1346
-	SHLQ $13,R15:R14
1347
-	ANDQ DX,R14
1348
-	ADDQ R13,R14
1349
-	IMUL3Q $19,R15,CX
1350
-	ADDQ CX,SI
1351
-	MOVQ SI,CX
1352
-	SHRQ $51,CX
1353
-	ADDQ R8,CX
1354
-	MOVQ CX,R8
1355
-	SHRQ $51,CX
1356
-	ANDQ DX,SI
1357
-	ADDQ R10,CX
1358
-	MOVQ CX,R9
1359
-	SHRQ $51,CX
1360
-	ANDQ DX,R8
1361
-	ADDQ R12,CX
1362
-	MOVQ CX,AX
1363
-	SHRQ $51,CX
1364
-	ANDQ DX,R9
1365
-	ADDQ R14,CX
1366
-	MOVQ CX,R10
1367
-	SHRQ $51,CX
1368
-	ANDQ DX,AX
1369
-	IMUL3Q $19,CX,CX
1370
-	ADDQ CX,SI
1371
-	ANDQ DX,R10
1372
-	MOVQ SI,80(DI)
1373
-	MOVQ R8,88(DI)
1374
-	MOVQ R9,96(DI)
1375
-	MOVQ AX,104(DI)
1376
-	MOVQ R10,112(DI)
1377
-	RET
1378 1
deleted file mode 100644
... ...
@@ -1,240 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// +build amd64,!gccgo,!appengine
6
-
7
-package curve25519
8
-
9
-// These functions are implemented in the .s files. The names of the functions
10
-// in the rest of the file are also taken from the SUPERCOP sources to help
11
-// people following along.
12
-
13
-//go:noescape
14
-
15
-func cswap(inout *[5]uint64, v uint64)
16
-
17
-//go:noescape
18
-
19
-func ladderstep(inout *[5][5]uint64)
20
-
21
-//go:noescape
22
-
23
-func freeze(inout *[5]uint64)
24
-
25
-//go:noescape
26
-
27
-func mul(dest, a, b *[5]uint64)
28
-
29
-//go:noescape
30
-
31
-func square(out, in *[5]uint64)
32
-
33
-// mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
34
-func mladder(xr, zr *[5]uint64, s *[32]byte) {
35
-	var work [5][5]uint64
36
-
37
-	work[0] = *xr
38
-	setint(&work[1], 1)
39
-	setint(&work[2], 0)
40
-	work[3] = *xr
41
-	setint(&work[4], 1)
42
-
43
-	j := uint(6)
44
-	var prevbit byte
45
-
46
-	for i := 31; i >= 0; i-- {
47
-		for j < 8 {
48
-			bit := ((*s)[i] >> j) & 1
49
-			swap := bit ^ prevbit
50
-			prevbit = bit
51
-			cswap(&work[1], uint64(swap))
52
-			ladderstep(&work)
53
-			j--
54
-		}
55
-		j = 7
56
-	}
57
-
58
-	*xr = work[1]
59
-	*zr = work[2]
60
-}
61
-
62
-func scalarMult(out, in, base *[32]byte) {
63
-	var e [32]byte
64
-	copy(e[:], (*in)[:])
65
-	e[0] &= 248
66
-	e[31] &= 127
67
-	e[31] |= 64
68
-
69
-	var t, z [5]uint64
70
-	unpack(&t, base)
71
-	mladder(&t, &z, &e)
72
-	invert(&z, &z)
73
-	mul(&t, &t, &z)
74
-	pack(out, &t)
75
-}
76
-
77
-func setint(r *[5]uint64, v uint64) {
78
-	r[0] = v
79
-	r[1] = 0
80
-	r[2] = 0
81
-	r[3] = 0
82
-	r[4] = 0
83
-}
84
-
85
-// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
86
-// order.
87
-func unpack(r *[5]uint64, x *[32]byte) {
88
-	r[0] = uint64(x[0]) |
89
-		uint64(x[1])<<8 |
90
-		uint64(x[2])<<16 |
91
-		uint64(x[3])<<24 |
92
-		uint64(x[4])<<32 |
93
-		uint64(x[5])<<40 |
94
-		uint64(x[6]&7)<<48
95
-
96
-	r[1] = uint64(x[6])>>3 |
97
-		uint64(x[7])<<5 |
98
-		uint64(x[8])<<13 |
99
-		uint64(x[9])<<21 |
100
-		uint64(x[10])<<29 |
101
-		uint64(x[11])<<37 |
102
-		uint64(x[12]&63)<<45
103
-
104
-	r[2] = uint64(x[12])>>6 |
105
-		uint64(x[13])<<2 |
106
-		uint64(x[14])<<10 |
107
-		uint64(x[15])<<18 |
108
-		uint64(x[16])<<26 |
109
-		uint64(x[17])<<34 |
110
-		uint64(x[18])<<42 |
111
-		uint64(x[19]&1)<<50
112
-
113
-	r[3] = uint64(x[19])>>1 |
114
-		uint64(x[20])<<7 |
115
-		uint64(x[21])<<15 |
116
-		uint64(x[22])<<23 |
117
-		uint64(x[23])<<31 |
118
-		uint64(x[24])<<39 |
119
-		uint64(x[25]&15)<<47
120
-
121
-	r[4] = uint64(x[25])>>4 |
122
-		uint64(x[26])<<4 |
123
-		uint64(x[27])<<12 |
124
-		uint64(x[28])<<20 |
125
-		uint64(x[29])<<28 |
126
-		uint64(x[30])<<36 |
127
-		uint64(x[31]&127)<<44
128
-}
129
-
130
-// pack sets out = x where out is the usual, little-endian form of the 5,
131
-// 51-bit limbs in x.
132
-func pack(out *[32]byte, x *[5]uint64) {
133
-	t := *x
134
-	freeze(&t)
135
-
136
-	out[0] = byte(t[0])
137
-	out[1] = byte(t[0] >> 8)
138
-	out[2] = byte(t[0] >> 16)
139
-	out[3] = byte(t[0] >> 24)
140
-	out[4] = byte(t[0] >> 32)
141
-	out[5] = byte(t[0] >> 40)
142
-	out[6] = byte(t[0] >> 48)
143
-
144
-	out[6] ^= byte(t[1]<<3) & 0xf8
145
-	out[7] = byte(t[1] >> 5)
146
-	out[8] = byte(t[1] >> 13)
147
-	out[9] = byte(t[1] >> 21)
148
-	out[10] = byte(t[1] >> 29)
149
-	out[11] = byte(t[1] >> 37)
150
-	out[12] = byte(t[1] >> 45)
151
-
152
-	out[12] ^= byte(t[2]<<6) & 0xc0
153
-	out[13] = byte(t[2] >> 2)
154
-	out[14] = byte(t[2] >> 10)
155
-	out[15] = byte(t[2] >> 18)
156
-	out[16] = byte(t[2] >> 26)
157
-	out[17] = byte(t[2] >> 34)
158
-	out[18] = byte(t[2] >> 42)
159
-	out[19] = byte(t[2] >> 50)
160
-
161
-	out[19] ^= byte(t[3]<<1) & 0xfe
162
-	out[20] = byte(t[3] >> 7)
163
-	out[21] = byte(t[3] >> 15)
164
-	out[22] = byte(t[3] >> 23)
165
-	out[23] = byte(t[3] >> 31)
166
-	out[24] = byte(t[3] >> 39)
167
-	out[25] = byte(t[3] >> 47)
168
-
169
-	out[25] ^= byte(t[4]<<4) & 0xf0
170
-	out[26] = byte(t[4] >> 4)
171
-	out[27] = byte(t[4] >> 12)
172
-	out[28] = byte(t[4] >> 20)
173
-	out[29] = byte(t[4] >> 28)
174
-	out[30] = byte(t[4] >> 36)
175
-	out[31] = byte(t[4] >> 44)
176
-}
177
-
178
-// invert calculates r = x^-1 mod p using Fermat's little theorem.
179
-func invert(r *[5]uint64, x *[5]uint64) {
180
-	var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64
181
-
182
-	square(&z2, x)        /* 2 */
183
-	square(&t, &z2)       /* 4 */
184
-	square(&t, &t)        /* 8 */
185
-	mul(&z9, &t, x)       /* 9 */
186
-	mul(&z11, &z9, &z2)   /* 11 */
187
-	square(&t, &z11)      /* 22 */
188
-	mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */
189
-
190
-	square(&t, &z2_5_0)      /* 2^6 - 2^1 */
191
-	for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
192
-		square(&t, &t)
193
-	}
194
-	mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */
195
-
196
-	square(&t, &z2_10_0)      /* 2^11 - 2^1 */
197
-	for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
198
-		square(&t, &t)
199
-	}
200
-	mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */
201
-
202
-	square(&t, &z2_20_0)      /* 2^21 - 2^1 */
203
-	for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
204
-		square(&t, &t)
205
-	}
206
-	mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */
207
-
208
-	square(&t, &t)            /* 2^41 - 2^1 */
209
-	for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
210
-		square(&t, &t)
211
-	}
212
-	mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */
213
-
214
-	square(&t, &z2_50_0)      /* 2^51 - 2^1 */
215
-	for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
216
-		square(&t, &t)
217
-	}
218
-	mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */
219
-
220
-	square(&t, &z2_100_0)      /* 2^101 - 2^1 */
221
-	for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
222
-		square(&t, &t)
223
-	}
224
-	mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */
225
-
226
-	square(&t, &t)            /* 2^201 - 2^1 */
227
-	for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
228
-		square(&t, &t)
229
-	}
230
-	mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */
231
-
232
-	square(&t, &t) /* 2^251 - 2^1 */
233
-	square(&t, &t) /* 2^252 - 2^2 */
234
-	square(&t, &t) /* 2^253 - 2^3 */
235
-
236
-	square(&t, &t) /* 2^254 - 2^4 */
237
-
238
-	square(&t, &t)   /* 2^255 - 2^5 */
239
-	mul(r, &t, &z11) /* 2^255 - 21 */
240
-}
241 1
deleted file mode 100644
... ...
@@ -1,169 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 6a from the public
6
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
7
-
8
-// +build amd64,!gccgo,!appengine
9
-
10
-#include "const_amd64.h"
11
-
12
-// func mul(dest, a, b *[5]uint64)
13
-TEXT ·mul(SB),0,$16-24
14
-	MOVQ dest+0(FP), DI
15
-	MOVQ a+8(FP), SI
16
-	MOVQ b+16(FP), DX
17
-
18
-	MOVQ DX,CX
19
-	MOVQ 24(SI),DX
20
-	IMUL3Q $19,DX,AX
21
-	MOVQ AX,0(SP)
22
-	MULQ 16(CX)
23
-	MOVQ AX,R8
24
-	MOVQ DX,R9
25
-	MOVQ 32(SI),DX
26
-	IMUL3Q $19,DX,AX
27
-	MOVQ AX,8(SP)
28
-	MULQ 8(CX)
29
-	ADDQ AX,R8
30
-	ADCQ DX,R9
31
-	MOVQ 0(SI),AX
32
-	MULQ 0(CX)
33
-	ADDQ AX,R8
34
-	ADCQ DX,R9
35
-	MOVQ 0(SI),AX
36
-	MULQ 8(CX)
37
-	MOVQ AX,R10
38
-	MOVQ DX,R11
39
-	MOVQ 0(SI),AX
40
-	MULQ 16(CX)
41
-	MOVQ AX,R12
42
-	MOVQ DX,R13
43
-	MOVQ 0(SI),AX
44
-	MULQ 24(CX)
45
-	MOVQ AX,R14
46
-	MOVQ DX,R15
47
-	MOVQ 0(SI),AX
48
-	MULQ 32(CX)
49
-	MOVQ AX,BX
50
-	MOVQ DX,BP
51
-	MOVQ 8(SI),AX
52
-	MULQ 0(CX)
53
-	ADDQ AX,R10
54
-	ADCQ DX,R11
55
-	MOVQ 8(SI),AX
56
-	MULQ 8(CX)
57
-	ADDQ AX,R12
58
-	ADCQ DX,R13
59
-	MOVQ 8(SI),AX
60
-	MULQ 16(CX)
61
-	ADDQ AX,R14
62
-	ADCQ DX,R15
63
-	MOVQ 8(SI),AX
64
-	MULQ 24(CX)
65
-	ADDQ AX,BX
66
-	ADCQ DX,BP
67
-	MOVQ 8(SI),DX
68
-	IMUL3Q $19,DX,AX
69
-	MULQ 32(CX)
70
-	ADDQ AX,R8
71
-	ADCQ DX,R9
72
-	MOVQ 16(SI),AX
73
-	MULQ 0(CX)
74
-	ADDQ AX,R12
75
-	ADCQ DX,R13
76
-	MOVQ 16(SI),AX
77
-	MULQ 8(CX)
78
-	ADDQ AX,R14
79
-	ADCQ DX,R15
80
-	MOVQ 16(SI),AX
81
-	MULQ 16(CX)
82
-	ADDQ AX,BX
83
-	ADCQ DX,BP
84
-	MOVQ 16(SI),DX
85
-	IMUL3Q $19,DX,AX
86
-	MULQ 24(CX)
87
-	ADDQ AX,R8
88
-	ADCQ DX,R9
89
-	MOVQ 16(SI),DX
90
-	IMUL3Q $19,DX,AX
91
-	MULQ 32(CX)
92
-	ADDQ AX,R10
93
-	ADCQ DX,R11
94
-	MOVQ 24(SI),AX
95
-	MULQ 0(CX)
96
-	ADDQ AX,R14
97
-	ADCQ DX,R15
98
-	MOVQ 24(SI),AX
99
-	MULQ 8(CX)
100
-	ADDQ AX,BX
101
-	ADCQ DX,BP
102
-	MOVQ 0(SP),AX
103
-	MULQ 24(CX)
104
-	ADDQ AX,R10
105
-	ADCQ DX,R11
106
-	MOVQ 0(SP),AX
107
-	MULQ 32(CX)
108
-	ADDQ AX,R12
109
-	ADCQ DX,R13
110
-	MOVQ 32(SI),AX
111
-	MULQ 0(CX)
112
-	ADDQ AX,BX
113
-	ADCQ DX,BP
114
-	MOVQ 8(SP),AX
115
-	MULQ 16(CX)
116
-	ADDQ AX,R10
117
-	ADCQ DX,R11
118
-	MOVQ 8(SP),AX
119
-	MULQ 24(CX)
120
-	ADDQ AX,R12
121
-	ADCQ DX,R13
122
-	MOVQ 8(SP),AX
123
-	MULQ 32(CX)
124
-	ADDQ AX,R14
125
-	ADCQ DX,R15
126
-	MOVQ $REDMASK51,SI
127
-	SHLQ $13,R9:R8
128
-	ANDQ SI,R8
129
-	SHLQ $13,R11:R10
130
-	ANDQ SI,R10
131
-	ADDQ R9,R10
132
-	SHLQ $13,R13:R12
133
-	ANDQ SI,R12
134
-	ADDQ R11,R12
135
-	SHLQ $13,R15:R14
136
-	ANDQ SI,R14
137
-	ADDQ R13,R14
138
-	SHLQ $13,BP:BX
139
-	ANDQ SI,BX
140
-	ADDQ R15,BX
141
-	IMUL3Q $19,BP,DX
142
-	ADDQ DX,R8
143
-	MOVQ R8,DX
144
-	SHRQ $51,DX
145
-	ADDQ R10,DX
146
-	MOVQ DX,CX
147
-	SHRQ $51,DX
148
-	ANDQ SI,R8
149
-	ADDQ R12,DX
150
-	MOVQ DX,R9
151
-	SHRQ $51,DX
152
-	ANDQ SI,CX
153
-	ADDQ R14,DX
154
-	MOVQ DX,AX
155
-	SHRQ $51,DX
156
-	ANDQ SI,R9
157
-	ADDQ BX,DX
158
-	MOVQ DX,R10
159
-	SHRQ $51,DX
160
-	ANDQ SI,AX
161
-	IMUL3Q $19,DX,DX
162
-	ADDQ DX,R8
163
-	ANDQ SI,R10
164
-	MOVQ R8,0(DI)
165
-	MOVQ CX,8(DI)
166
-	MOVQ R9,16(DI)
167
-	MOVQ AX,24(DI)
168
-	MOVQ R10,32(DI)
169
-	RET
170 1
deleted file mode 100644
... ...
@@ -1,132 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 6a from the public
6
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
7
-
8
-// +build amd64,!gccgo,!appengine
9
-
10
-#include "const_amd64.h"
11
-
12
-// func square(out, in *[5]uint64)
13
-TEXT ·square(SB),7,$0-16
14
-	MOVQ out+0(FP), DI
15
-	MOVQ in+8(FP), SI
16
-
17
-	MOVQ 0(SI),AX
18
-	MULQ 0(SI)
19
-	MOVQ AX,CX
20
-	MOVQ DX,R8
21
-	MOVQ 0(SI),AX
22
-	SHLQ $1,AX
23
-	MULQ 8(SI)
24
-	MOVQ AX,R9
25
-	MOVQ DX,R10
26
-	MOVQ 0(SI),AX
27
-	SHLQ $1,AX
28
-	MULQ 16(SI)
29
-	MOVQ AX,R11
30
-	MOVQ DX,R12
31
-	MOVQ 0(SI),AX
32
-	SHLQ $1,AX
33
-	MULQ 24(SI)
34
-	MOVQ AX,R13
35
-	MOVQ DX,R14
36
-	MOVQ 0(SI),AX
37
-	SHLQ $1,AX
38
-	MULQ 32(SI)
39
-	MOVQ AX,R15
40
-	MOVQ DX,BX
41
-	MOVQ 8(SI),AX
42
-	MULQ 8(SI)
43
-	ADDQ AX,R11
44
-	ADCQ DX,R12
45
-	MOVQ 8(SI),AX
46
-	SHLQ $1,AX
47
-	MULQ 16(SI)
48
-	ADDQ AX,R13
49
-	ADCQ DX,R14
50
-	MOVQ 8(SI),AX
51
-	SHLQ $1,AX
52
-	MULQ 24(SI)
53
-	ADDQ AX,R15
54
-	ADCQ DX,BX
55
-	MOVQ 8(SI),DX
56
-	IMUL3Q $38,DX,AX
57
-	MULQ 32(SI)
58
-	ADDQ AX,CX
59
-	ADCQ DX,R8
60
-	MOVQ 16(SI),AX
61
-	MULQ 16(SI)
62
-	ADDQ AX,R15
63
-	ADCQ DX,BX
64
-	MOVQ 16(SI),DX
65
-	IMUL3Q $38,DX,AX
66
-	MULQ 24(SI)
67
-	ADDQ AX,CX
68
-	ADCQ DX,R8
69
-	MOVQ 16(SI),DX
70
-	IMUL3Q $38,DX,AX
71
-	MULQ 32(SI)
72
-	ADDQ AX,R9
73
-	ADCQ DX,R10
74
-	MOVQ 24(SI),DX
75
-	IMUL3Q $19,DX,AX
76
-	MULQ 24(SI)
77
-	ADDQ AX,R9
78
-	ADCQ DX,R10
79
-	MOVQ 24(SI),DX
80
-	IMUL3Q $38,DX,AX
81
-	MULQ 32(SI)
82
-	ADDQ AX,R11
83
-	ADCQ DX,R12
84
-	MOVQ 32(SI),DX
85
-	IMUL3Q $19,DX,AX
86
-	MULQ 32(SI)
87
-	ADDQ AX,R13
88
-	ADCQ DX,R14
89
-	MOVQ $REDMASK51,SI
90
-	SHLQ $13,R8:CX
91
-	ANDQ SI,CX
92
-	SHLQ $13,R10:R9
93
-	ANDQ SI,R9
94
-	ADDQ R8,R9
95
-	SHLQ $13,R12:R11
96
-	ANDQ SI,R11
97
-	ADDQ R10,R11
98
-	SHLQ $13,R14:R13
99
-	ANDQ SI,R13
100
-	ADDQ R12,R13
101
-	SHLQ $13,BX:R15
102
-	ANDQ SI,R15
103
-	ADDQ R14,R15
104
-	IMUL3Q $19,BX,DX
105
-	ADDQ DX,CX
106
-	MOVQ CX,DX
107
-	SHRQ $51,DX
108
-	ADDQ R9,DX
109
-	ANDQ SI,CX
110
-	MOVQ DX,R8
111
-	SHRQ $51,DX
112
-	ADDQ R11,DX
113
-	ANDQ SI,R8
114
-	MOVQ DX,R9
115
-	SHRQ $51,DX
116
-	ADDQ R13,DX
117
-	ANDQ SI,R9
118
-	MOVQ DX,AX
119
-	SHRQ $51,DX
120
-	ADDQ R15,DX
121
-	ANDQ SI,AX
122
-	MOVQ DX,R10
123
-	SHRQ $51,DX
124
-	IMUL3Q $19,DX,DX
125
-	ADDQ DX,CX
126
-	ANDQ SI,R10
127
-	MOVQ CX,0(DI)
128
-	MOVQ R8,8(DI)
129
-	MOVQ R9,16(DI)
130
-	MOVQ AX,24(DI)
131
-	MOVQ R10,32(DI)
132
-	RET
... ...
@@ -2,6 +2,11 @@
2 2
 // Use of this source code is governed by a BSD-style
3 3
 // license that can be found in the LICENSE file.
4 4
 
5
+// In Go 1.13, the ed25519 package was promoted to the standard library as
6
+// crypto/ed25519, and this package became a wrapper for the standard library one.
7
+//
8
+// +build !go1.13
9
+
5 10
 // Package ed25519 implements the Ed25519 signature algorithm. See
6 11
 // https://ed25519.cr.yp.to/.
7 12
 //
8 13
new file mode 100644
... ...
@@ -0,0 +1,73 @@
0
+// Copyright 2019 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build go1.13
5
+
6
+// Package ed25519 implements the Ed25519 signature algorithm. See
7
+// https://ed25519.cr.yp.to/.
8
+//
9
+// These functions are also compatible with the “Ed25519” function defined in
10
+// RFC 8032. However, unlike RFC 8032's formulation, this package's private key
11
+// representation includes a public key suffix to make multiple signing
12
+// operations with the same key more efficient. This package refers to the RFC
13
+// 8032 private key as the “seed”.
14
+//
15
+// Beginning with Go 1.13, the functionality of this package was moved to the
16
+// standard library as crypto/ed25519. This package only acts as a compatibility
17
+// wrapper.
18
+package ed25519
19
+
20
+import (
21
+	"crypto/ed25519"
22
+	"io"
23
+)
24
+
25
+const (
26
+	// PublicKeySize is the size, in bytes, of public keys as used in this package.
27
+	PublicKeySize = 32
28
+	// PrivateKeySize is the size, in bytes, of private keys as used in this package.
29
+	PrivateKeySize = 64
30
+	// SignatureSize is the size, in bytes, of signatures generated and verified by this package.
31
+	SignatureSize = 64
32
+	// SeedSize is the size, in bytes, of private key seeds. These are the private key representations used by RFC 8032.
33
+	SeedSize = 32
34
+)
35
+
36
+// PublicKey is the type of Ed25519 public keys.
37
+//
38
+// This type is an alias for crypto/ed25519's PublicKey type.
39
+// See the crypto/ed25519 package for the methods on this type.
40
+type PublicKey = ed25519.PublicKey
41
+
42
+// PrivateKey is the type of Ed25519 private keys. It implements crypto.Signer.
43
+//
44
+// This type is an alias for crypto/ed25519's PrivateKey type.
45
+// See the crypto/ed25519 package for the methods on this type.
46
+type PrivateKey = ed25519.PrivateKey
47
+
48
+// GenerateKey generates a public/private key pair using entropy from rand.
49
+// If rand is nil, crypto/rand.Reader will be used.
50
+func GenerateKey(rand io.Reader) (PublicKey, PrivateKey, error) {
51
+	return ed25519.GenerateKey(rand)
52
+}
53
+
54
+// NewKeyFromSeed calculates a private key from a seed. It will panic if
55
+// len(seed) is not SeedSize. This function is provided for interoperability
56
+// with RFC 8032. RFC 8032's private keys correspond to seeds in this
57
+// package.
58
+func NewKeyFromSeed(seed []byte) PrivateKey {
59
+	return ed25519.NewKeyFromSeed(seed)
60
+}
61
+
62
+// Sign signs the message with privateKey and returns a signature. It will
63
+// panic if len(privateKey) is not PrivateKeySize.
64
+func Sign(privateKey PrivateKey, message []byte) []byte {
65
+	return ed25519.Sign(privateKey, message)
66
+}
67
+
68
+// Verify reports whether sig is a valid signature of message by publicKey. It
69
+// will panic if len(publicKey) is not PublicKeySize.
70
+func Verify(publicKey PublicKey, message, sig []byte) bool {
71
+	return ed25519.Verify(publicKey, message, sig)
72
+}
... ...
@@ -1,3 +1,8 @@
1 1
 module golang.org/x/crypto
2 2
 
3
-require golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e
3
+go 1.11
4
+
5
+require (
6
+	golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3
7
+	golang.org/x/sys v0.0.0-20190412213103-97732733099d
8
+)
... ...
@@ -252,6 +252,7 @@ func Decode(pfxData []byte, password string) (privateKey interface{}, certificat
252 252
 		case bag.Id.Equal(oidPKCS8ShroundedKeyBag):
253 253
 			if privateKey != nil {
254 254
 				err = errors.New("pkcs12: expected exactly one key bag")
255
+				return nil, nil, err
255 256
 			}
256 257
 
257 258
 			if privateKey, err = decodePkcs8ShroudedKeyBag(bag.Value.Bytes, encodedPassword); err != nil {
258 259
new file mode 100644
... ...
@@ -0,0 +1,39 @@
0
+// Copyright 2019 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build !go1.13
5
+
6
+package poly1305
7
+
8
+// Generic fallbacks for the math/bits intrinsics, copied from
9
+// src/math/bits/bits.go. They were added in Go 1.12, but Add64 and Sum64 had
10
+// variable time fallbacks until Go 1.13.
11
+
12
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
13
+	sum = x + y + carry
14
+	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
15
+	return
16
+}
17
+
18
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
19
+	diff = x - y - borrow
20
+	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
21
+	return
22
+}
23
+
24
+func bitsMul64(x, y uint64) (hi, lo uint64) {
25
+	const mask32 = 1<<32 - 1
26
+	x0 := x & mask32
27
+	x1 := x >> 32
28
+	y0 := y & mask32
29
+	y1 := y >> 32
30
+	w0 := x0 * y0
31
+	t := x1*y0 + w0>>32
32
+	w1 := t & mask32
33
+	w2 := t >> 32
34
+	w1 += x0 * y1
35
+	hi = x1*y1 + w2 + w1>>32
36
+	lo = x * y
37
+	return
38
+}
0 39
new file mode 100644
... ...
@@ -0,0 +1,21 @@
0
+// Copyright 2019 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build go1.13
5
+
6
+package poly1305
7
+
8
+import "math/bits"
9
+
10
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
11
+	return bits.Add64(x, y, carry)
12
+}
13
+
14
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
15
+	return bits.Sub64(x, y, borrow)
16
+}
17
+
18
+func bitsMul64(x, y uint64) (hi, lo uint64) {
19
+	return bits.Mul64(x, y)
20
+}
... ...
@@ -2,7 +2,7 @@
2 2
 // Use of this source code is governed by a BSD-style
3 3
 // license that can be found in the LICENSE file.
4 4
 
5
-// +build !amd64 gccgo appengine
5
+// +build !amd64,!ppc64le gccgo appengine
6 6
 
7 7
 package poly1305
8 8
 
... ...
@@ -22,8 +22,14 @@ import "crypto/subtle"
22 22
 // TagSize is the size, in bytes, of a poly1305 authenticator.
23 23
 const TagSize = 16
24 24
 
25
-// Verify returns true if mac is a valid authenticator for m with the given
26
-// key.
25
+// Sum generates an authenticator for msg using a one-time key and puts the
26
+// 16-byte result into out. Authenticating two different messages with the same
27
+// key allows an attacker to forge messages at will.
28
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
29
+	sum(out, m, key)
30
+}
31
+
32
+// Verify returns true if mac is a valid authenticator for m with the given key.
27 33
 func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
28 34
 	var tmp [16]byte
29 35
 	Sum(&tmp, m, key)
... ...
@@ -7,62 +7,52 @@
7 7
 package poly1305
8 8
 
9 9
 //go:noescape
10
-func initialize(state *[7]uint64, key *[32]byte)
10
+func update(state *macState, msg []byte)
11 11
 
12
-//go:noescape
13
-func update(state *[7]uint64, msg []byte)
14
-
15
-//go:noescape
16
-func finalize(tag *[TagSize]byte, state *[7]uint64)
17
-
18
-// Sum generates an authenticator for m using a one-time key and puts the
19
-// 16-byte result into out. Authenticating two different messages with the same
20
-// key allows an attacker to forge messages at will.
21
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
12
+func sum(out *[16]byte, m []byte, key *[32]byte) {
22 13
 	h := newMAC(key)
23 14
 	h.Write(m)
24 15
 	h.Sum(out)
25 16
 }
26 17
 
27 18
 func newMAC(key *[32]byte) (h mac) {
28
-	initialize(&h.state, key)
19
+	initialize(key, &h.r, &h.s)
29 20
 	return
30 21
 }
31 22
 
32
-type mac struct {
33
-	state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
34
-
35
-	buffer [TagSize]byte
36
-	offset int
37
-}
23
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
24
+// updateGeneric to update.
25
+//
26
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
27
+// using function pointers would carry a major performance cost.
28
+type mac struct{ macGeneric }
38 29
 
39
-func (h *mac) Write(p []byte) (n int, err error) {
40
-	n = len(p)
30
+func (h *mac) Write(p []byte) (int, error) {
31
+	nn := len(p)
41 32
 	if h.offset > 0 {
42
-		remaining := TagSize - h.offset
43
-		if n < remaining {
44
-			h.offset += copy(h.buffer[h.offset:], p)
45
-			return n, nil
33
+		n := copy(h.buffer[h.offset:], p)
34
+		if h.offset+n < TagSize {
35
+			h.offset += n
36
+			return nn, nil
46 37
 		}
47
-		copy(h.buffer[h.offset:], p[:remaining])
48
-		p = p[remaining:]
38
+		p = p[n:]
49 39
 		h.offset = 0
50
-		update(&h.state, h.buffer[:])
40
+		update(&h.macState, h.buffer[:])
51 41
 	}
52
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
53
-		update(&h.state, p[:nn])
54
-		p = p[nn:]
42
+	if n := len(p) - (len(p) % TagSize); n > 0 {
43
+		update(&h.macState, p[:n])
44
+		p = p[n:]
55 45
 	}
56 46
 	if len(p) > 0 {
57 47
 		h.offset += copy(h.buffer[h.offset:], p)
58 48
 	}
59
-	return n, nil
49
+	return nn, nil
60 50
 }
61 51
 
62 52
 func (h *mac) Sum(out *[16]byte) {
63
-	state := h.state
53
+	state := h.macState
64 54
 	if h.offset > 0 {
65 55
 		update(&state, h.buffer[:h.offset])
66 56
 	}
67
-	finalize(out, &state)
57
+	finalize(out, &state.h, &state.s)
68 58
 }
... ...
@@ -54,10 +54,6 @@
54 54
 	ADCQ  t3, h1;                  \
55 55
 	ADCQ  $0, h2
56 56
 
57
-DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
58
-DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
59
-GLOBL ·poly1305Mask<>(SB), RODATA, $16
60
-
61 57
 // func update(state *[7]uint64, msg []byte)
62 58
 TEXT ·update(SB), $0-32
63 59
 	MOVQ state+0(FP), DI
... ...
@@ -110,39 +106,3 @@ done:
110 110
 	MOVQ R9, 8(DI)
111 111
 	MOVQ R10, 16(DI)
112 112
 	RET
113
-
114
-// func initialize(state *[7]uint64, key *[32]byte)
115
-TEXT ·initialize(SB), $0-16
116
-	MOVQ state+0(FP), DI
117
-	MOVQ key+8(FP), SI
118
-
119
-	// state[0...7] is initialized with zero
120
-	MOVOU 0(SI), X0
121
-	MOVOU 16(SI), X1
122
-	MOVOU ·poly1305Mask<>(SB), X2
123
-	PAND  X2, X0
124
-	MOVOU X0, 24(DI)
125
-	MOVOU X1, 40(DI)
126
-	RET
127
-
128
-// func finalize(tag *[TagSize]byte, state *[7]uint64)
129
-TEXT ·finalize(SB), $0-16
130
-	MOVQ tag+0(FP), DI
131
-	MOVQ state+8(FP), SI
132
-
133
-	MOVQ    0(SI), AX
134
-	MOVQ    8(SI), BX
135
-	MOVQ    16(SI), CX
136
-	MOVQ    AX, R8
137
-	MOVQ    BX, R9
138
-	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
139
-	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
140
-	SBBQ    $3, CX
141
-	CMOVQCS R8, AX
142
-	CMOVQCS R9, BX
143
-	ADDQ    40(SI), AX
144
-	ADCQ    48(SI), BX
145
-
146
-	MOVQ AX, 0(DI)
147
-	MOVQ BX, 8(DI)
148
-	RET
149 113
deleted file mode 100644
... ...
@@ -1,22 +0,0 @@
1
-// Copyright 2015 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// +build arm,!gccgo,!appengine,!nacl
6
-
7
-package poly1305
8
-
9
-// This function is implemented in sum_arm.s
10
-//go:noescape
11
-func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)
12
-
13
-// Sum generates an authenticator for m using a one-time key and puts the
14
-// 16-byte result into out. Authenticating two different messages with the same
15
-// key allows an attacker to forge messages at will.
16
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
17
-	var mPtr *byte
18
-	if len(m) > 0 {
19
-		mPtr = &m[0]
20
-	}
21
-	poly1305_auth_armv6(out, mPtr, uint32(len(m)), key)
22
-}
23 1
deleted file mode 100644
... ...
@@ -1,427 +0,0 @@
1
-// Copyright 2015 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// +build arm,!gccgo,!appengine,!nacl
6
-
7
-#include "textflag.h"
8
-
9
-// This code was translated into a form compatible with 5a from the public
10
-// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
11
-
12
-DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
13
-DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
14
-DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
15
-DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
16
-DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
17
-GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20
18
-
19
-// Warning: the linker may use R11 to synthesize certain instructions. Please
20
-// take care and verify that no synthetic instructions use it.
21
-
22
-TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0
23
-	// Needs 16 bytes of stack and 64 bytes of space pointed to by R0.  (It
24
-	// might look like it's only 60 bytes of space but the final four bytes
25
-	// will be written by another function.) We need to skip over four
26
-	// bytes of stack because that's saving the value of 'g'.
27
-	ADD       $4, R13, R8
28
-	MOVM.IB   [R4-R7], (R8)
29
-	MOVM.IA.W (R1), [R2-R5]
30
-	MOVW      $·poly1305_init_constants_armv6<>(SB), R7
31
-	MOVW      R2, R8
32
-	MOVW      R2>>26, R9
33
-	MOVW      R3>>20, g
34
-	MOVW      R4>>14, R11
35
-	MOVW      R5>>8, R12
36
-	ORR       R3<<6, R9, R9
37
-	ORR       R4<<12, g, g
38
-	ORR       R5<<18, R11, R11
39
-	MOVM.IA   (R7), [R2-R6]
40
-	AND       R8, R2, R2
41
-	AND       R9, R3, R3
42
-	AND       g, R4, R4
43
-	AND       R11, R5, R5
44
-	AND       R12, R6, R6
45
-	MOVM.IA.W [R2-R6], (R0)
46
-	EOR       R2, R2, R2
47
-	EOR       R3, R3, R3
48
-	EOR       R4, R4, R4
49
-	EOR       R5, R5, R5
50
-	EOR       R6, R6, R6
51
-	MOVM.IA.W [R2-R6], (R0)
52
-	MOVM.IA.W (R1), [R2-R5]
53
-	MOVM.IA   [R2-R6], (R0)
54
-	ADD       $20, R13, R0
55
-	MOVM.DA   (R0), [R4-R7]
56
-	RET
57
-
58
-#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
59
-	MOVBU (offset+0)(Rsrc), Rtmp; \
60
-	MOVBU Rtmp, (offset+0)(Rdst); \
61
-	MOVBU (offset+1)(Rsrc), Rtmp; \
62
-	MOVBU Rtmp, (offset+1)(Rdst); \
63
-	MOVBU (offset+2)(Rsrc), Rtmp; \
64
-	MOVBU Rtmp, (offset+2)(Rdst); \
65
-	MOVBU (offset+3)(Rsrc), Rtmp; \
66
-	MOVBU Rtmp, (offset+3)(Rdst)
67
-
68
-TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0
69
-	// Needs 24 bytes of stack for saved registers and then 88 bytes of
70
-	// scratch space after that. We assume that 24 bytes at (R13) have
71
-	// already been used: four bytes for the link register saved in the
72
-	// prelude of poly1305_auth_armv6, four bytes for saving the value of g
73
-	// in that function and 16 bytes of scratch space used around
74
-	// poly1305_finish_ext_armv6_skip1.
75
-	ADD     $24, R13, R12
76
-	MOVM.IB [R4-R8, R14], (R12)
77
-	MOVW    R0, 88(R13)
78
-	MOVW    R1, 92(R13)
79
-	MOVW    R2, 96(R13)
80
-	MOVW    R1, R14
81
-	MOVW    R2, R12
82
-	MOVW    56(R0), R8
83
-	WORD    $0xe1180008                // TST R8, R8 not working see issue 5921
84
-	EOR     R6, R6, R6
85
-	MOVW.EQ $(1<<24), R6
86
-	MOVW    R6, 84(R13)
87
-	ADD     $116, R13, g
88
-	MOVM.IA (R0), [R0-R9]
89
-	MOVM.IA [R0-R4], (g)
90
-	CMP     $16, R12
91
-	BLO     poly1305_blocks_armv6_done
92
-
93
-poly1305_blocks_armv6_mainloop:
94
-	WORD    $0xe31e0003                            // TST R14, #3 not working see issue 5921
95
-	BEQ     poly1305_blocks_armv6_mainloop_aligned
96
-	ADD     $100, R13, g
97
-	MOVW_UNALIGNED(R14, g, R0, 0)
98
-	MOVW_UNALIGNED(R14, g, R0, 4)
99
-	MOVW_UNALIGNED(R14, g, R0, 8)
100
-	MOVW_UNALIGNED(R14, g, R0, 12)
101
-	MOVM.IA (g), [R0-R3]
102
-	ADD     $16, R14
103
-	B       poly1305_blocks_armv6_mainloop_loaded
104
-
105
-poly1305_blocks_armv6_mainloop_aligned:
106
-	MOVM.IA.W (R14), [R0-R3]
107
-
108
-poly1305_blocks_armv6_mainloop_loaded:
109
-	MOVW    R0>>26, g
110
-	MOVW    R1>>20, R11
111
-	MOVW    R2>>14, R12
112
-	MOVW    R14, 92(R13)
113
-	MOVW    R3>>8, R4
114
-	ORR     R1<<6, g, g
115
-	ORR     R2<<12, R11, R11
116
-	ORR     R3<<18, R12, R12
117
-	BIC     $0xfc000000, R0, R0
118
-	BIC     $0xfc000000, g, g
119
-	MOVW    84(R13), R3
120
-	BIC     $0xfc000000, R11, R11
121
-	BIC     $0xfc000000, R12, R12
122
-	ADD     R0, R5, R5
123
-	ADD     g, R6, R6
124
-	ORR     R3, R4, R4
125
-	ADD     R11, R7, R7
126
-	ADD     $116, R13, R14
127
-	ADD     R12, R8, R8
128
-	ADD     R4, R9, R9
129
-	MOVM.IA (R14), [R0-R4]
130
-	MULLU   R4, R5, (R11, g)
131
-	MULLU   R3, R5, (R14, R12)
132
-	MULALU  R3, R6, (R11, g)
133
-	MULALU  R2, R6, (R14, R12)
134
-	MULALU  R2, R7, (R11, g)
135
-	MULALU  R1, R7, (R14, R12)
136
-	ADD     R4<<2, R4, R4
137
-	ADD     R3<<2, R3, R3
138
-	MULALU  R1, R8, (R11, g)
139
-	MULALU  R0, R8, (R14, R12)
140
-	MULALU  R0, R9, (R11, g)
141
-	MULALU  R4, R9, (R14, R12)
142
-	MOVW    g, 76(R13)
143
-	MOVW    R11, 80(R13)
144
-	MOVW    R12, 68(R13)
145
-	MOVW    R14, 72(R13)
146
-	MULLU   R2, R5, (R11, g)
147
-	MULLU   R1, R5, (R14, R12)
148
-	MULALU  R1, R6, (R11, g)
149
-	MULALU  R0, R6, (R14, R12)
150
-	MULALU  R0, R7, (R11, g)
151
-	MULALU  R4, R7, (R14, R12)
152
-	ADD     R2<<2, R2, R2
153
-	ADD     R1<<2, R1, R1
154
-	MULALU  R4, R8, (R11, g)
155
-	MULALU  R3, R8, (R14, R12)
156
-	MULALU  R3, R9, (R11, g)
157
-	MULALU  R2, R9, (R14, R12)
158
-	MOVW    g, 60(R13)
159
-	MOVW    R11, 64(R13)
160
-	MOVW    R12, 52(R13)
161
-	MOVW    R14, 56(R13)
162
-	MULLU   R0, R5, (R11, g)
163
-	MULALU  R4, R6, (R11, g)
164
-	MULALU  R3, R7, (R11, g)
165
-	MULALU  R2, R8, (R11, g)
166
-	MULALU  R1, R9, (R11, g)
167
-	ADD     $52, R13, R0
168
-	MOVM.IA (R0), [R0-R7]
169
-	MOVW    g>>26, R12
170
-	MOVW    R4>>26, R14
171
-	ORR     R11<<6, R12, R12
172
-	ORR     R5<<6, R14, R14
173
-	BIC     $0xfc000000, g, g
174
-	BIC     $0xfc000000, R4, R4
175
-	ADD.S   R12, R0, R0
176
-	ADC     $0, R1, R1
177
-	ADD.S   R14, R6, R6
178
-	ADC     $0, R7, R7
179
-	MOVW    R0>>26, R12
180
-	MOVW    R6>>26, R14
181
-	ORR     R1<<6, R12, R12
182
-	ORR     R7<<6, R14, R14
183
-	BIC     $0xfc000000, R0, R0
184
-	BIC     $0xfc000000, R6, R6
185
-	ADD     R14<<2, R14, R14
186
-	ADD.S   R12, R2, R2
187
-	ADC     $0, R3, R3
188
-	ADD     R14, g, g
189
-	MOVW    R2>>26, R12
190
-	MOVW    g>>26, R14
191
-	ORR     R3<<6, R12, R12
192
-	BIC     $0xfc000000, g, R5
193
-	BIC     $0xfc000000, R2, R7
194
-	ADD     R12, R4, R4
195
-	ADD     R14, R0, R0
196
-	MOVW    R4>>26, R12
197
-	BIC     $0xfc000000, R4, R8
198
-	ADD     R12, R6, R9
199
-	MOVW    96(R13), R12
200
-	MOVW    92(R13), R14
201
-	MOVW    R0, R6
202
-	CMP     $32, R12
203
-	SUB     $16, R12, R12
204
-	MOVW    R12, 96(R13)
205
-	BHS     poly1305_blocks_armv6_mainloop
206
-
207
-poly1305_blocks_armv6_done:
208
-	MOVW    88(R13), R12
209
-	MOVW    R5, 20(R12)
210
-	MOVW    R6, 24(R12)
211
-	MOVW    R7, 28(R12)
212
-	MOVW    R8, 32(R12)
213
-	MOVW    R9, 36(R12)
214
-	ADD     $48, R13, R0
215
-	MOVM.DA (R0), [R4-R8, R14]
216
-	RET
217
-
218
-#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
219
-	MOVBU.P 1(Rsrc), Rtmp; \
220
-	MOVBU.P Rtmp, 1(Rdst); \
221
-	MOVBU.P 1(Rsrc), Rtmp; \
222
-	MOVBU.P Rtmp, 1(Rdst)
223
-
224
-#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
225
-	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
226
-	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
227
-
228
-// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
229
-TEXT ·poly1305_auth_armv6(SB), $196-16
230
-	// The value 196, just above, is the sum of 64 (the size of the context
231
-	// structure) and 132 (the amount of stack needed).
232
-	//
233
-	// At this point, the stack pointer (R13) has been moved down. It
234
-	// points to the saved link register and there's 196 bytes of free
235
-	// space above it.
236
-	//
237
-	// The stack for this function looks like:
238
-	//
239
-	// +---------------------
240
-	// |
241
-	// | 64 bytes of context structure
242
-	// |
243
-	// +---------------------
244
-	// |
245
-	// | 112 bytes for poly1305_blocks_armv6
246
-	// |
247
-	// +---------------------
248
-	// | 16 bytes of final block, constructed at
249
-	// | poly1305_finish_ext_armv6_skip8
250
-	// +---------------------
251
-	// | four bytes of saved 'g'
252
-	// +---------------------
253
-	// | lr, saved by prelude    <- R13 points here
254
-	// +---------------------
255
-	MOVW g, 4(R13)
256
-
257
-	MOVW out+0(FP), R4
258
-	MOVW m+4(FP), R5
259
-	MOVW mlen+8(FP), R6
260
-	MOVW key+12(FP), R7
261
-
262
-	ADD  $136, R13, R0 // 136 = 4 + 4 + 16 + 112
263
-	MOVW R7, R1
264
-
265
-	// poly1305_init_ext_armv6 will write to the stack from R13+4, but
266
-	// that's ok because none of the other values have been written yet.
267
-	BL    poly1305_init_ext_armv6<>(SB)
268
-	BIC.S $15, R6, R2
269
-	BEQ   poly1305_auth_armv6_noblocks
270
-	ADD   $136, R13, R0
271
-	MOVW  R5, R1
272
-	ADD   R2, R5, R5
273
-	SUB   R2, R6, R6
274
-	BL    poly1305_blocks_armv6<>(SB)
275
-
276
-poly1305_auth_armv6_noblocks:
277
-	ADD  $136, R13, R0
278
-	MOVW R5, R1
279
-	MOVW R6, R2
280
-	MOVW R4, R3
281
-
282
-	MOVW  R0, R5
283
-	MOVW  R1, R6
284
-	MOVW  R2, R7
285
-	MOVW  R3, R8
286
-	AND.S R2, R2, R2
287
-	BEQ   poly1305_finish_ext_armv6_noremaining
288
-	EOR   R0, R0
289
-	ADD   $8, R13, R9                           // 8 = offset to 16 byte scratch space
290
-	MOVW  R0, (R9)
291
-	MOVW  R0, 4(R9)
292
-	MOVW  R0, 8(R9)
293
-	MOVW  R0, 12(R9)
294
-	WORD  $0xe3110003                           // TST R1, #3 not working see issue 5921
295
-	BEQ   poly1305_finish_ext_armv6_aligned
296
-	WORD  $0xe3120008                           // TST R2, #8 not working see issue 5921
297
-	BEQ   poly1305_finish_ext_armv6_skip8
298
-	MOVWP_UNALIGNED(R1, R9, g)
299
-	MOVWP_UNALIGNED(R1, R9, g)
300
-
301
-poly1305_finish_ext_armv6_skip8:
302
-	WORD $0xe3120004                     // TST $4, R2 not working see issue 5921
303
-	BEQ  poly1305_finish_ext_armv6_skip4
304
-	MOVWP_UNALIGNED(R1, R9, g)
305
-
306
-poly1305_finish_ext_armv6_skip4:
307
-	WORD $0xe3120002                     // TST $2, R2 not working see issue 5921
308
-	BEQ  poly1305_finish_ext_armv6_skip2
309
-	MOVHUP_UNALIGNED(R1, R9, g)
310
-	B    poly1305_finish_ext_armv6_skip2
311
-
312
-poly1305_finish_ext_armv6_aligned:
313
-	WORD      $0xe3120008                             // TST R2, #8 not working see issue 5921
314
-	BEQ       poly1305_finish_ext_armv6_skip8_aligned
315
-	MOVM.IA.W (R1), [g-R11]
316
-	MOVM.IA.W [g-R11], (R9)
317
-
318
-poly1305_finish_ext_armv6_skip8_aligned:
319
-	WORD   $0xe3120004                             // TST $4, R2 not working see issue 5921
320
-	BEQ    poly1305_finish_ext_armv6_skip4_aligned
321
-	MOVW.P 4(R1), g
322
-	MOVW.P g, 4(R9)
323
-
324
-poly1305_finish_ext_armv6_skip4_aligned:
325
-	WORD    $0xe3120002                     // TST $2, R2 not working see issue 5921
326
-	BEQ     poly1305_finish_ext_armv6_skip2
327
-	MOVHU.P 2(R1), g
328
-	MOVH.P  g, 2(R9)
329
-
330
-poly1305_finish_ext_armv6_skip2:
331
-	WORD    $0xe3120001                     // TST $1, R2 not working see issue 5921
332
-	BEQ     poly1305_finish_ext_armv6_skip1
333
-	MOVBU.P 1(R1), g
334
-	MOVBU.P g, 1(R9)
335
-
336
-poly1305_finish_ext_armv6_skip1:
337
-	MOVW  $1, R11
338
-	MOVBU R11, 0(R9)
339
-	MOVW  R11, 56(R5)
340
-	MOVW  R5, R0
341
-	ADD   $8, R13, R1
342
-	MOVW  $16, R2
343
-	BL    poly1305_blocks_armv6<>(SB)
344
-
345
-poly1305_finish_ext_armv6_noremaining:
346
-	MOVW      20(R5), R0
347
-	MOVW      24(R5), R1
348
-	MOVW      28(R5), R2
349
-	MOVW      32(R5), R3
350
-	MOVW      36(R5), R4
351
-	MOVW      R4>>26, R12
352
-	BIC       $0xfc000000, R4, R4
353
-	ADD       R12<<2, R12, R12
354
-	ADD       R12, R0, R0
355
-	MOVW      R0>>26, R12
356
-	BIC       $0xfc000000, R0, R0
357
-	ADD       R12, R1, R1
358
-	MOVW      R1>>26, R12
359
-	BIC       $0xfc000000, R1, R1
360
-	ADD       R12, R2, R2
361
-	MOVW      R2>>26, R12
362
-	BIC       $0xfc000000, R2, R2
363
-	ADD       R12, R3, R3
364
-	MOVW      R3>>26, R12
365
-	BIC       $0xfc000000, R3, R3
366
-	ADD       R12, R4, R4
367
-	ADD       $5, R0, R6
368
-	MOVW      R6>>26, R12
369
-	BIC       $0xfc000000, R6, R6
370
-	ADD       R12, R1, R7
371
-	MOVW      R7>>26, R12
372
-	BIC       $0xfc000000, R7, R7
373
-	ADD       R12, R2, g
374
-	MOVW      g>>26, R12
375
-	BIC       $0xfc000000, g, g
376
-	ADD       R12, R3, R11
377
-	MOVW      $-(1<<26), R12
378
-	ADD       R11>>26, R12, R12
379
-	BIC       $0xfc000000, R11, R11
380
-	ADD       R12, R4, R9
381
-	MOVW      R9>>31, R12
382
-	SUB       $1, R12
383
-	AND       R12, R6, R6
384
-	AND       R12, R7, R7
385
-	AND       R12, g, g
386
-	AND       R12, R11, R11
387
-	AND       R12, R9, R9
388
-	MVN       R12, R12
389
-	AND       R12, R0, R0
390
-	AND       R12, R1, R1
391
-	AND       R12, R2, R2
392
-	AND       R12, R3, R3
393
-	AND       R12, R4, R4
394
-	ORR       R6, R0, R0
395
-	ORR       R7, R1, R1
396
-	ORR       g, R2, R2
397
-	ORR       R11, R3, R3
398
-	ORR       R9, R4, R4
399
-	ORR       R1<<26, R0, R0
400
-	MOVW      R1>>6, R1
401
-	ORR       R2<<20, R1, R1
402
-	MOVW      R2>>12, R2
403
-	ORR       R3<<14, R2, R2
404
-	MOVW      R3>>18, R3
405
-	ORR       R4<<8, R3, R3
406
-	MOVW      40(R5), R6
407
-	MOVW      44(R5), R7
408
-	MOVW      48(R5), g
409
-	MOVW      52(R5), R11
410
-	ADD.S     R6, R0, R0
411
-	ADC.S     R7, R1, R1
412
-	ADC.S     g, R2, R2
413
-	ADC.S     R11, R3, R3
414
-	MOVM.IA   [R0-R3], (R8)
415
-	MOVW      R5, R12
416
-	EOR       R0, R0, R0
417
-	EOR       R1, R1, R1
418
-	EOR       R2, R2, R2
419
-	EOR       R3, R3, R3
420
-	EOR       R4, R4, R4
421
-	EOR       R5, R5, R5
422
-	EOR       R6, R6, R6
423
-	EOR       R7, R7, R7
424
-	MOVM.IA.W [R0-R7], (R12)
425
-	MOVM.IA   [R0-R7], (R12)
426
-	MOVW      4(R13), g
427
-	RET
... ...
@@ -2,18 +2,29 @@
2 2
 // Use of this source code is governed by a BSD-style
3 3
 // license that can be found in the LICENSE file.
4 4
 
5
+// This file provides the generic implementation of Sum and MAC. Other files
6
+// might provide optimized assembly implementations of some of this code.
7
+
5 8
 package poly1305
6 9
 
7 10
 import "encoding/binary"
8 11
 
9
-const (
10
-	msgBlock   = uint32(1 << 24)
11
-	finalBlock = uint32(0)
12
-)
12
+// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag
13
+// for a 64 bytes message is approximately
14
+//
15
+//     s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r  mod  2¹³⁰ - 5
16
+//
17
+// for some secret r and s. It can be computed sequentially like
18
+//
19
+//     for len(msg) > 0:
20
+//         h += read(msg, 16)
21
+//         h *= r
22
+//         h %= 2¹³⁰ - 5
23
+//     return h + s
24
+//
25
+// All the complexity is about doing performant constant-time math on numbers
26
+// larger than any available numeric type.
13 27
 
14
-// sumGeneric generates an authenticator for msg using a one-time key and
15
-// puts the 16-byte result into out. This is the generic implementation of
16
-// Sum and should be called if no assembly implementation is available.
17 28
 func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
18 29
 	h := newMACGeneric(key)
19 30
 	h.Write(msg)
... ...
@@ -21,152 +32,276 @@ func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
21 21
 }
22 22
 
23 23
 func newMACGeneric(key *[32]byte) (h macGeneric) {
24
-	h.r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
25
-	h.r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
26
-	h.r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
27
-	h.r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
28
-	h.r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
29
-
30
-	h.s[0] = binary.LittleEndian.Uint32(key[16:])
31
-	h.s[1] = binary.LittleEndian.Uint32(key[20:])
32
-	h.s[2] = binary.LittleEndian.Uint32(key[24:])
33
-	h.s[3] = binary.LittleEndian.Uint32(key[28:])
24
+	initialize(key, &h.r, &h.s)
34 25
 	return
35 26
 }
36 27
 
28
+// macState holds numbers in saturated 64-bit little-endian limbs. That is,
29
+// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸.
30
+type macState struct {
31
+	// h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but
32
+	// can grow larger during and after rounds.
33
+	h [3]uint64
34
+	// r and s are the private key components.
35
+	r [2]uint64
36
+	s [2]uint64
37
+}
38
+
37 39
 type macGeneric struct {
38
-	h, r [5]uint32
39
-	s    [4]uint32
40
+	macState
40 41
 
41 42
 	buffer [TagSize]byte
42 43
 	offset int
43 44
 }
44 45
 
45
-func (h *macGeneric) Write(p []byte) (n int, err error) {
46
-	n = len(p)
46
+// Write splits the incoming message into TagSize chunks, and passes them to
47
+// update. It buffers incomplete chunks.
48
+func (h *macGeneric) Write(p []byte) (int, error) {
49
+	nn := len(p)
47 50
 	if h.offset > 0 {
48
-		remaining := TagSize - h.offset
49
-		if n < remaining {
50
-			h.offset += copy(h.buffer[h.offset:], p)
51
-			return n, nil
51
+		n := copy(h.buffer[h.offset:], p)
52
+		if h.offset+n < TagSize {
53
+			h.offset += n
54
+			return nn, nil
52 55
 		}
53
-		copy(h.buffer[h.offset:], p[:remaining])
54
-		p = p[remaining:]
56
+		p = p[n:]
55 57
 		h.offset = 0
56
-		updateGeneric(h.buffer[:], msgBlock, &(h.h), &(h.r))
58
+		updateGeneric(&h.macState, h.buffer[:])
57 59
 	}
58
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
59
-		updateGeneric(p, msgBlock, &(h.h), &(h.r))
60
-		p = p[nn:]
60
+	if n := len(p) - (len(p) % TagSize); n > 0 {
61
+		updateGeneric(&h.macState, p[:n])
62
+		p = p[n:]
61 63
 	}
62 64
 	if len(p) > 0 {
63 65
 		h.offset += copy(h.buffer[h.offset:], p)
64 66
 	}
65
-	return n, nil
67
+	return nn, nil
66 68
 }
67 69
 
68
-func (h *macGeneric) Sum(out *[16]byte) {
69
-	H, R := h.h, h.r
70
+// Sum flushes the last incomplete chunk from the buffer, if any, and generates
71
+// the MAC output. It does not modify its state, in order to allow for multiple
72
+// calls to Sum, even if no Write is allowed after Sum.
73
+func (h *macGeneric) Sum(out *[TagSize]byte) {
74
+	state := h.macState
70 75
 	if h.offset > 0 {
71
-		var buffer [TagSize]byte
72
-		copy(buffer[:], h.buffer[:h.offset])
73
-		buffer[h.offset] = 1 // invariant: h.offset < TagSize
74
-		updateGeneric(buffer[:], finalBlock, &H, &R)
76
+		updateGeneric(&state, h.buffer[:h.offset])
75 77
 	}
76
-	finalizeGeneric(out, &H, &(h.s))
78
+	finalize(out, &state.h, &state.s)
79
+}
80
+
81
+// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It
82
+// clears some bits of the secret coefficient to make it possible to implement
83
+// multiplication more efficiently.
84
+const (
85
+	rMask0 = 0x0FFFFFFC0FFFFFFF
86
+	rMask1 = 0x0FFFFFFC0FFFFFFC
87
+)
88
+
89
+func initialize(key *[32]byte, r, s *[2]uint64) {
90
+	r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
91
+	r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
92
+	s[0] = binary.LittleEndian.Uint64(key[16:24])
93
+	s[1] = binary.LittleEndian.Uint64(key[24:32])
94
+}
95
+
96
+// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
97
+// bits.Mul64 and bits.Add64 intrinsics.
98
+type uint128 struct {
99
+	lo, hi uint64
100
+}
101
+
102
+func mul64(a, b uint64) uint128 {
103
+	hi, lo := bitsMul64(a, b)
104
+	return uint128{lo, hi}
77 105
 }
78 106
 
79
-func updateGeneric(msg []byte, flag uint32, h, r *[5]uint32) {
80
-	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
81
-	r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
82
-	R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
83
-
84
-	for len(msg) >= TagSize {
85
-		// h += msg
86
-		h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
87
-		h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
88
-		h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
89
-		h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
90
-		h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
91
-
92
-		// h *= r
93
-		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
94
-		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
95
-		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
96
-		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
97
-		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
98
-
99
-		// h %= p
100
-		h0 = uint32(d0) & 0x3ffffff
101
-		h1 = uint32(d1) & 0x3ffffff
102
-		h2 = uint32(d2) & 0x3ffffff
103
-		h3 = uint32(d3) & 0x3ffffff
104
-		h4 = uint32(d4) & 0x3ffffff
105
-
106
-		h0 += uint32(d4>>26) * 5
107
-		h1 += h0 >> 26
108
-		h0 = h0 & 0x3ffffff
109
-
110
-		msg = msg[TagSize:]
107
+func add128(a, b uint128) uint128 {
108
+	lo, c := bitsAdd64(a.lo, b.lo, 0)
109
+	hi, c := bitsAdd64(a.hi, b.hi, c)
110
+	if c != 0 {
111
+		panic("poly1305: unexpected overflow")
111 112
 	}
113
+	return uint128{lo, hi}
114
+}
112 115
 
113
-	h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
116
+func shiftRightBy2(a uint128) uint128 {
117
+	a.lo = a.lo>>2 | (a.hi&3)<<62
118
+	a.hi = a.hi >> 2
119
+	return a
114 120
 }
115 121
 
116
-func finalizeGeneric(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
117
-	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
118
-
119
-	// h %= p reduction
120
-	h2 += h1 >> 26
121
-	h1 &= 0x3ffffff
122
-	h3 += h2 >> 26
123
-	h2 &= 0x3ffffff
124
-	h4 += h3 >> 26
125
-	h3 &= 0x3ffffff
126
-	h0 += 5 * (h4 >> 26)
127
-	h4 &= 0x3ffffff
128
-	h1 += h0 >> 26
129
-	h0 &= 0x3ffffff
130
-
131
-	// h - p
132
-	t0 := h0 + 5
133
-	t1 := h1 + (t0 >> 26)
134
-	t2 := h2 + (t1 >> 26)
135
-	t3 := h3 + (t2 >> 26)
136
-	t4 := h4 + (t3 >> 26) - (1 << 26)
137
-	t0 &= 0x3ffffff
138
-	t1 &= 0x3ffffff
139
-	t2 &= 0x3ffffff
140
-	t3 &= 0x3ffffff
141
-
142
-	// select h if h < p else h - p
143
-	t_mask := (t4 >> 31) - 1
144
-	h_mask := ^t_mask
145
-	h0 = (h0 & h_mask) | (t0 & t_mask)
146
-	h1 = (h1 & h_mask) | (t1 & t_mask)
147
-	h2 = (h2 & h_mask) | (t2 & t_mask)
148
-	h3 = (h3 & h_mask) | (t3 & t_mask)
149
-	h4 = (h4 & h_mask) | (t4 & t_mask)
150
-
151
-	// h %= 2^128
152
-	h0 |= h1 << 26
153
-	h1 = ((h1 >> 6) | (h2 << 20))
154
-	h2 = ((h2 >> 12) | (h3 << 14))
155
-	h3 = ((h3 >> 18) | (h4 << 8))
156
-
157
-	// s: the s part of the key
158
-	// tag = (h + s) % (2^128)
159
-	t := uint64(h0) + uint64(s[0])
160
-	h0 = uint32(t)
161
-	t = uint64(h1) + uint64(s[1]) + (t >> 32)
162
-	h1 = uint32(t)
163
-	t = uint64(h2) + uint64(s[2]) + (t >> 32)
164
-	h2 = uint32(t)
165
-	t = uint64(h3) + uint64(s[3]) + (t >> 32)
166
-	h3 = uint32(t)
167
-
168
-	binary.LittleEndian.PutUint32(out[0:], h0)
169
-	binary.LittleEndian.PutUint32(out[4:], h1)
170
-	binary.LittleEndian.PutUint32(out[8:], h2)
171
-	binary.LittleEndian.PutUint32(out[12:], h3)
122
+// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
123
+// 128 bits of message, it computes
124
+//
125
+//     h₊ = (h + m) * r  mod  2¹³⁰ - 5
126
+//
127
+// If the msg length is not a multiple of TagSize, it assumes the last
128
+// incomplete chunk is the final one.
129
+func updateGeneric(state *macState, msg []byte) {
130
+	h0, h1, h2 := state.h[0], state.h[1], state.h[2]
131
+	r0, r1 := state.r[0], state.r[1]
132
+
133
+	for len(msg) > 0 {
134
+		var c uint64
135
+
136
+		// For the first step, h + m, we use a chain of bits.Add64 intrinsics.
137
+		// The resulting value of h might exceed 2¹³⁰ - 5, but will be partially
138
+		// reduced at the end of the multiplication below.
139
+		//
140
+		// The spec requires us to set a bit just above the message size, not to
141
+		// hide leading zeroes. For full chunks, that's 1 << 128, so we can just
142
+		// add 1 to the most significant (2¹²⁸) limb, h2.
143
+		if len(msg) >= TagSize {
144
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
145
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
146
+			h2 += c + 1
147
+
148
+			msg = msg[TagSize:]
149
+		} else {
150
+			var buf [TagSize]byte
151
+			copy(buf[:], msg)
152
+			buf[len(msg)] = 1
153
+
154
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
155
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
156
+			h2 += c
157
+
158
+			msg = nil
159
+		}
160
+
161
+		// Multiplication of big number limbs is similar to elementary school
162
+		// columnar multiplication. Instead of digits, there are 64-bit limbs.
163
+		//
164
+		// We are multiplying a 3 limbs number, h, by a 2 limbs number, r.
165
+		//
166
+		//                        h2    h1    h0  x
167
+		//                              r1    r0  =
168
+		//                       ----------------
169
+		//                      h2r0  h1r0  h0r0     <-- individual 128-bit products
170
+		//            +   h2r1  h1r1  h0r1
171
+		//               ------------------------
172
+		//                 m3    m2    m1    m0      <-- result in 128-bit overlapping limbs
173
+		//               ------------------------
174
+		//         m3.hi m2.hi m1.hi m0.hi           <-- carry propagation
175
+		//     +         m3.lo m2.lo m1.lo m0.lo
176
+		//        -------------------------------
177
+		//           t4    t3    t2    t1    t0      <-- final result in 64-bit limbs
178
+		//
179
+		// The main difference from pen-and-paper multiplication is that we do
180
+		// carry propagation in a separate step, as if we wrote two digit sums
181
+		// at first (the 128-bit limbs), and then carried the tens all at once.
182
+
183
+		h0r0 := mul64(h0, r0)
184
+		h1r0 := mul64(h1, r0)
185
+		h2r0 := mul64(h2, r0)
186
+		h0r1 := mul64(h0, r1)
187
+		h1r1 := mul64(h1, r1)
188
+		h2r1 := mul64(h2, r1)
189
+
190
+		// Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their
191
+		// top 4 bits cleared by rMask{0,1}, we know that their product is not going
192
+		// to overflow 64 bits, so we can ignore the high part of the products.
193
+		//
194
+		// This also means that the product doesn't have a fifth limb (t4).
195
+		if h2r0.hi != 0 {
196
+			panic("poly1305: unexpected overflow")
197
+		}
198
+		if h2r1.hi != 0 {
199
+			panic("poly1305: unexpected overflow")
200
+		}
201
+
202
+		m0 := h0r0
203
+		m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again
204
+		m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1.
205
+		m3 := h2r1
206
+
207
+		t0 := m0.lo
208
+		t1, c := bitsAdd64(m1.lo, m0.hi, 0)
209
+		t2, c := bitsAdd64(m2.lo, m1.hi, c)
210
+		t3, _ := bitsAdd64(m3.lo, m2.hi, c)
211
+
212
+		// Now we have the result as 4 64-bit limbs, and we need to reduce it
213
+		// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do
214
+		// a cheap partial reduction according to the reduction identity
215
+		//
216
+		//     c * 2¹³⁰ + n  =  c * 5 + n  mod  2¹³⁰ - 5
217
+		//
218
+		// because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is
219
+		// likely to be larger than 2¹³⁰ - 5, but still small enough to fit the
220
+		// assumptions we make about h in the rest of the code.
221
+		//
222
+		// See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23
223
+
224
+		// We split the final result at the 2¹³⁰ mark into h and cc, the carry.
225
+		// Note that the carry bits are effectively shifted left by 2, in other
226
+		// words, cc = c * 4 for the c in the reduction identity.
227
+		h0, h1, h2 = t0, t1, t2&maskLow2Bits
228
+		cc := uint128{t2 & maskNotLow2Bits, t3}
229
+
230
+		// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.
231
+
232
+		h0, c = bitsAdd64(h0, cc.lo, 0)
233
+		h1, c = bitsAdd64(h1, cc.hi, c)
234
+		h2 += c
235
+
236
+		cc = shiftRightBy2(cc)
237
+
238
+		h0, c = bitsAdd64(h0, cc.lo, 0)
239
+		h1, c = bitsAdd64(h1, cc.hi, c)
240
+		h2 += c
241
+
242
+		// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most
243
+		//
244
+		//     5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1
245
+	}
246
+
247
+	state.h[0], state.h[1], state.h[2] = h0, h1, h2
248
+}
249
+
250
+const (
251
+	maskLow2Bits    uint64 = 0x0000000000000003
252
+	maskNotLow2Bits uint64 = ^maskLow2Bits
253
+)
254
+
255
+// select64 returns x if v == 1 and y if v == 0, in constant time.
256
+func select64(v, x, y uint64) uint64 { return ^(v-1)&x | (v-1)&y }
257
+
258
+// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order.
259
+const (
260
+	p0 = 0xFFFFFFFFFFFFFFFB
261
+	p1 = 0xFFFFFFFFFFFFFFFF
262
+	p2 = 0x0000000000000003
263
+)
264
+
265
+// finalize completes the modular reduction of h and computes
266
+//
267
+//     out = h + s  mod  2¹²⁸
268
+//
269
+func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
270
+	h0, h1, h2 := h[0], h[1], h[2]
271
+
272
+	// After the partial reduction in updateGeneric, h might be more than
273
+	// 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction
274
+	// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the
275
+	// result if the subtraction underflows, and t otherwise.
276
+
277
+	hMinusP0, b := bitsSub64(h0, p0, 0)
278
+	hMinusP1, b := bitsSub64(h1, p1, b)
279
+	_, b = bitsSub64(h2, p2, b)
280
+
281
+	// h = h if h < p else h - p
282
+	h0 = select64(b, h0, hMinusP0)
283
+	h1 = select64(b, h1, hMinusP1)
284
+
285
+	// Finally, we compute the last Poly1305 step
286
+	//
287
+	//     tag = h + s  mod  2¹²⁸
288
+	//
289
+	// by just doing a wide addition with the 128 low bits of h and discarding
290
+	// the overflow.
291
+	h0, c := bitsAdd64(h0, s[0], 0)
292
+	h1, _ = bitsAdd64(h1, s[1], c)
293
+
294
+	binary.LittleEndian.PutUint64(out[0:8], h0)
295
+	binary.LittleEndian.PutUint64(out[8:16], h1)
172 296
 }
... ...
@@ -2,14 +2,11 @@
2 2
 // Use of this source code is governed by a BSD-style
3 3
 // license that can be found in the LICENSE file.
4 4
 
5
-// +build s390x,!go1.11 !arm,!amd64,!s390x gccgo appengine nacl
5
+// +build s390x,!go1.11 !amd64,!s390x,!ppc64le gccgo appengine nacl
6 6
 
7 7
 package poly1305
8 8
 
9
-// Sum generates an authenticator for msg using a one-time key and puts the
10
-// 16-byte result into out. Authenticating two different messages with the same
11
-// key allows an attacker to forge messages at will.
12
-func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
9
+func sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
13 10
 	h := newMAC(key)
14 11
 	h.Write(msg)
15 12
 	h.Sum(out)
16 13
new file mode 100644
... ...
@@ -0,0 +1,58 @@
0
+// Copyright 2019 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build ppc64le,!gccgo,!appengine
5
+
6
+package poly1305
7
+
8
+//go:noescape
9
+func update(state *macState, msg []byte)
10
+
11
+func sum(out *[16]byte, m []byte, key *[32]byte) {
12
+	h := newMAC(key)
13
+	h.Write(m)
14
+	h.Sum(out)
15
+}
16
+
17
+func newMAC(key *[32]byte) (h mac) {
18
+	initialize(key, &h.r, &h.s)
19
+	return
20
+}
21
+
22
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
23
+// updateGeneric to update.
24
+//
25
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
26
+// using function pointers would carry a major performance cost.
27
+type mac struct{ macGeneric }
28
+
29
+func (h *mac) Write(p []byte) (int, error) {
30
+	nn := len(p)
31
+	if h.offset > 0 {
32
+		n := copy(h.buffer[h.offset:], p)
33
+		if h.offset+n < TagSize {
34
+			h.offset += n
35
+			return nn, nil
36
+		}
37
+		p = p[n:]
38
+		h.offset = 0
39
+		update(&h.macState, h.buffer[:])
40
+	}
41
+	if n := len(p) - (len(p) % TagSize); n > 0 {
42
+		update(&h.macState, p[:n])
43
+		p = p[n:]
44
+	}
45
+	if len(p) > 0 {
46
+		h.offset += copy(h.buffer[h.offset:], p)
47
+	}
48
+	return nn, nil
49
+}
50
+
51
+func (h *mac) Sum(out *[16]byte) {
52
+	state := h.macState
53
+	if h.offset > 0 {
54
+		update(&state, h.buffer[:h.offset])
55
+	}
56
+	finalize(out, &state.h, &state.s)
57
+}
0 58
new file mode 100644
... ...
@@ -0,0 +1,181 @@
0
+// Copyright 2019 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build ppc64le,!gccgo,!appengine
5
+
6
+#include "textflag.h"
7
+
8
+// This was ported from the amd64 implementation.
9
+
10
+#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
11
+	MOVD (msg), t0;  \
12
+	MOVD 8(msg), t1; \
13
+	MOVD $1, t2;     \
14
+	ADDC t0, h0, h0; \
15
+	ADDE t1, h1, h1; \
16
+	ADDE t2, h2;     \
17
+	ADD  $16, msg
18
+
19
+#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
20
+	MULLD  r0, h0, t0;  \
21
+	MULLD  r0, h1, t4;  \
22
+	MULHDU r0, h0, t1;  \
23
+	MULHDU r0, h1, t5;  \
24
+	ADDC   t4, t1, t1;  \
25
+	MULLD  r0, h2, t2;  \
26
+	ADDZE  t5;          \
27
+	MULHDU r1, h0, t4;  \
28
+	MULLD  r1, h0, h0;  \
29
+	ADD    t5, t2, t2;  \
30
+	ADDC   h0, t1, t1;  \
31
+	MULLD  h2, r1, t3;  \
32
+	ADDZE  t4, h0;      \
33
+	MULHDU r1, h1, t5;  \
34
+	MULLD  r1, h1, t4;  \
35
+	ADDC   t4, t2, t2;  \
36
+	ADDE   t5, t3, t3;  \
37
+	ADDC   h0, t2, t2;  \
38
+	MOVD   $-4, t4;     \
39
+	MOVD   t0, h0;      \
40
+	MOVD   t1, h1;      \
41
+	ADDZE  t3;          \
42
+	ANDCC  $3, t2, h2;  \
43
+	AND    t2, t4, t0;  \
44
+	ADDC   t0, h0, h0;  \
45
+	ADDE   t3, h1, h1;  \
46
+	SLD    $62, t3, t4; \
47
+	SRD    $2, t2;      \
48
+	ADDZE  h2;          \
49
+	OR     t4, t2, t2;  \
50
+	SRD    $2, t3;      \
51
+	ADDC   t2, h0, h0;  \
52
+	ADDE   t3, h1, h1;  \
53
+	ADDZE  h2
54
+
55
+DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
56
+DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
57
+GLOBL ·poly1305Mask<>(SB), RODATA, $16
58
+
59
+// func update(state *[7]uint64, msg []byte)
60
+TEXT ·update(SB), $0-32
61
+	MOVD state+0(FP), R3
62
+	MOVD msg_base+8(FP), R4
63
+	MOVD msg_len+16(FP), R5
64
+
65
+	MOVD 0(R3), R8   // h0
66
+	MOVD 8(R3), R9   // h1
67
+	MOVD 16(R3), R10 // h2
68
+	MOVD 24(R3), R11 // r0
69
+	MOVD 32(R3), R12 // r1
70
+
71
+	CMP R5, $16
72
+	BLT bytes_between_0_and_15
73
+
74
+loop:
75
+	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
76
+
77
+multiply:
78
+	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
79
+	ADD $-16, R5
80
+	CMP R5, $16
81
+	BGE loop
82
+
83
+bytes_between_0_and_15:
84
+	CMP  $0, R5
85
+	BEQ  done
86
+	MOVD $0, R16 // h0
87
+	MOVD $0, R17 // h1
88
+
89
+flush_buffer:
90
+	CMP R5, $8
91
+	BLE just1
92
+
93
+	MOVD $8, R21
94
+	SUB  R21, R5, R21
95
+
96
+	// Greater than 8 -- load the rightmost remaining bytes in msg
97
+	// and put into R17 (h1)
98
+	MOVD (R4)(R21), R17
99
+	MOVD $16, R22
100
+
101
+	// Find the offset to those bytes
102
+	SUB R5, R22, R22
103
+	SLD $3, R22
104
+
105
+	// Shift to get only the bytes in msg
106
+	SRD R22, R17, R17
107
+
108
+	// Put 1 at high end
109
+	MOVD $1, R23
110
+	SLD  $3, R21
111
+	SLD  R21, R23, R23
112
+	OR   R23, R17, R17
113
+
114
+	// Remainder is 8
115
+	MOVD $8, R5
116
+
117
+just1:
118
+	CMP R5, $8
119
+	BLT less8
120
+
121
+	// Exactly 8
122
+	MOVD (R4), R16
123
+
124
+	CMP $0, R17
125
+
126
+	// Check if we've already set R17; if not
127
+	// set 1 to indicate end of msg.
128
+	BNE  carry
129
+	MOVD $1, R17
130
+	BR   carry
131
+
132
+less8:
133
+	MOVD  $0, R16   // h0
134
+	MOVD  $0, R22   // shift count
135
+	CMP   R5, $4
136
+	BLT   less4
137
+	MOVWZ (R4), R16
138
+	ADD   $4, R4
139
+	ADD   $-4, R5
140
+	MOVD  $32, R22
141
+
142
+less4:
143
+	CMP   R5, $2
144
+	BLT   less2
145
+	MOVHZ (R4), R21
146
+	SLD   R22, R21, R21
147
+	OR    R16, R21, R16
148
+	ADD   $16, R22
149
+	ADD   $-2, R5
150
+	ADD   $2, R4
151
+
152
+less2:
153
+	CMP   $0, R5
154
+	BEQ   insert1
155
+	MOVBZ (R4), R21
156
+	SLD   R22, R21, R21
157
+	OR    R16, R21, R16
158
+	ADD   $8, R22
159
+
160
+insert1:
161
+	// Insert 1 at end of msg
162
+	MOVD $1, R21
163
+	SLD  R22, R21, R21
164
+	OR   R16, R21, R16
165
+
166
+carry:
167
+	// Add new values to h0, h1, h2
168
+	ADDC R16, R8
169
+	ADDE R17, R9
170
+	ADDE $0, R10
171
+	MOVD $16, R5
172
+	ADD  R5, R4
173
+	BR   multiply
174
+
175
+done:
176
+	// Save h0, h1, h2 in state
177
+	MOVD R8, 0(R3)
178
+	MOVD R9, 8(R3)
179
+	MOVD R10, 16(R3)
180
+	RET
... ...
@@ -22,10 +22,7 @@ func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
22 22
 //go:noescape
23 23
 func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
24 24
 
25
-// Sum generates an authenticator for m using a one-time key and puts the
26
-// 16-byte result into out. Authenticating two different messages with the same
27
-// key allows an attacker to forge messages at will.
28
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
25
+func sum(out *[16]byte, m []byte, key *[32]byte) {
29 26
 	if cpu.S390X.HasVX {
30 27
 		var mPtr *byte
31 28
 		if len(m) > 0 {