Browse code

revendor logrus and x/crypto

this fixes the issue that was blocking a test from running on ppc64le.
the logrus revendor changes the color code used in that same test, so
that breaks the test for all platforms (updated in this pr)

Signed-off-by: Christy Perez <christy@linux.vnet.ibm.com>

Christy Perez authored on 2017/08/09 02:39:55
Showing 47 changed files
... ...
@@ -2159,9 +2159,9 @@ func (s *DockerDaemonSuite) TestRunLinksChanged(c *check.C) {
2159 2159
 }
2160 2160
 
2161 2161
 func (s *DockerDaemonSuite) TestDaemonStartWithoutColors(c *check.C) {
2162
-	testRequires(c, DaemonIsLinux, NotPpc64le)
2162
+	testRequires(c, DaemonIsLinux)
2163 2163
 
2164
-	infoLog := "\x1b[34mINFO\x1b"
2164
+	infoLog := "\x1b[36mINFO\x1b"
2165 2165
 
2166 2166
 	b := bytes.NewBuffer(nil)
2167 2167
 	done := make(chan bool)
... ...
@@ -2209,7 +2209,7 @@ func (s *DockerDaemonSuite) TestDaemonStartWithoutColors(c *check.C) {
2209 2209
 }
2210 2210
 
2211 2211
 func (s *DockerDaemonSuite) TestDaemonDebugLog(c *check.C) {
2212
-	testRequires(c, DaemonIsLinux, NotPpc64le)
2212
+	testRequires(c, DaemonIsLinux)
2213 2213
 
2214 2214
 	debugLog := "\x1b[37mDEBU\x1b"
2215 2215
 
... ...
@@ -11,7 +11,7 @@ github.com/gorilla/mux v1.1
11 11
 github.com/Microsoft/opengcs v0.3.3
12 12
 github.com/kr/pty 5cf931ef8f
13 13
 github.com/mattn/go-shellwords v1.0.3
14
-github.com/sirupsen/logrus v1.0.1
14
+github.com/sirupsen/logrus v1.0.3
15 15
 github.com/tchap/go-patricia v2.2.6
16 16
 github.com/vdemeester/shakers 24d7f1d6a71aa5d9cbe7390e4afb66b7eef9e1b3
17 17
 golang.org/x/net 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6
... ...
@@ -112,7 +112,7 @@ github.com/docker/swarmkit ddb4539f883b18ea40af44ee6de63ac2adc8dc1e
112 112
 github.com/gogo/protobuf v0.4
113 113
 github.com/cloudflare/cfssl 7fb22c8cba7ecaf98e4082d22d65800cf45e042a
114 114
 github.com/google/certificate-transparency d90e65c3a07988180c5b1ece71791c0b6506826e
115
-golang.org/x/crypto 3fbbcd23f1cb824e69491a5930cfeff09b12f4d2
115
+golang.org/x/crypto 558b6879de74bc843225cde5686419267ff707ca
116 116
 golang.org/x/time a4bde12657593d5e90d0533a3e4fd95e635124cb
117 117
 github.com/hashicorp/go-memdb cb9a474f84cc5e41b273b20c6927680b2a8776ad
118 118
 github.com/hashicorp/go-immutable-radix 8e8ed81f8f0bf1bdd829593fdd5c29922c1ea990
... ...
@@ -1,7 +1,7 @@
1 1
 # Logrus <img src="http://i.imgur.com/hTeVwmJ.png" width="40" height="40" alt=":walrus:" class="emoji" title=":walrus:"/>&nbsp;[![Build Status](https://travis-ci.org/sirupsen/logrus.svg?branch=master)](https://travis-ci.org/sirupsen/logrus)&nbsp;[![GoDoc](https://godoc.org/github.com/sirupsen/logrus?status.svg)](https://godoc.org/github.com/sirupsen/logrus)
2 2
 
3 3
 Logrus is a structured logger for Go (golang), completely API compatible with
4
-the standard library logger. [Godoc][godoc].
4
+the standard library logger.
5 5
 
6 6
 **Seeing weird case-sensitive problems?** It's in the past been possible to
7 7
 import Logrus as both upper- and lower-case. Due to the Go package environment,
... ...
@@ -372,6 +372,7 @@ The built-in logging formatters are:
372 372
 
373 373
 Third party logging formatters:
374 374
 
375
+* [`FluentdFormatter`](https://github.com/joonix/log). Formats entries that can by parsed by Kubernetes and Google Container Engine.
375 376
 * [`logstash`](https://github.com/bshuster-repo/logrus-logstash-hook). Logs fields as [Logstash](http://logstash.net) Events.
376 377
 * [`prefixed`](https://github.com/x-cray/logrus-prefixed-formatter). Displays log entry source along with alternative layout.
377 378
 * [`zalgo`](https://github.com/aybabtme/logzalgo). Invoking the P͉̫o̳̼̊w̖͈̰͎e̬͔̭͂r͚̼̹̲ ̫͓͉̳͈ō̠͕͖̚f̝͍̠ ͕̲̞͖͑Z̖̫̤̫ͪa͉̬͈̗l͖͎g̳̥o̰̥̅!̣͔̲̻͊̄ ̙̘̦̹̦.
... ...
@@ -35,6 +35,7 @@ type Entry struct {
35 35
 	Time time.Time
36 36
 
37 37
 	// Level the log entry was logged at: Debug, Info, Warn, Error, Fatal or Panic
38
+	// This field will be set on entry firing and the value will be equal to the one in Logger struct field.
38 39
 	Level Level
39 40
 
40 41
 	// Message passed to Debug, Info, Warn, Error, Fatal or Panic
... ...
@@ -31,7 +31,7 @@ func SetFormatter(formatter Formatter) {
31 31
 func SetLevel(level Level) {
32 32
 	std.mu.Lock()
33 33
 	defer std.mu.Unlock()
34
-	std.setLevel(level)
34
+	std.SetLevel(level)
35 35
 }
36 36
 
37 37
 // GetLevel returns the standard logger level.
... ...
@@ -2,7 +2,7 @@ package logrus
2 2
 
3 3
 import "time"
4 4
 
5
-const DefaultTimestampFormat = time.RFC3339
5
+const defaultTimestampFormat = time.RFC3339
6 6
 
7 7
 // The Formatter interface is used to implement a custom Formatter. It takes an
8 8
 // `Entry`. It exposes all the fields, including the default ones:
... ...
@@ -6,8 +6,11 @@ import (
6 6
 )
7 7
 
8 8
 type fieldKey string
9
+
10
+// FieldMap allows customization of the key names for default fields.
9 11
 type FieldMap map[fieldKey]string
10 12
 
13
+// Default key names for the default fields
11 14
 const (
12 15
 	FieldKeyMsg   = "msg"
13 16
 	FieldKeyLevel = "level"
... ...
@@ -22,6 +25,7 @@ func (f FieldMap) resolve(key fieldKey) string {
22 22
 	return string(key)
23 23
 }
24 24
 
25
+// JSONFormatter formats logs into parsable json
25 26
 type JSONFormatter struct {
26 27
 	// TimestampFormat sets the format used for marshaling timestamps.
27 28
 	TimestampFormat string
... ...
@@ -29,7 +33,7 @@ type JSONFormatter struct {
29 29
 	// DisableTimestamp allows disabling automatic timestamps in output
30 30
 	DisableTimestamp bool
31 31
 
32
-	// FieldMap allows users to customize the names of keys for various fields.
32
+	// FieldMap allows users to customize the names of keys for default fields.
33 33
 	// As an example:
34 34
 	// formatter := &JSONFormatter{
35 35
 	//   	FieldMap: FieldMap{
... ...
@@ -41,6 +45,7 @@ type JSONFormatter struct {
41 41
 	FieldMap FieldMap
42 42
 }
43 43
 
44
+// Format renders a single log entry
44 45
 func (f *JSONFormatter) Format(entry *Entry) ([]byte, error) {
45 46
 	data := make(Fields, len(entry.Data)+3)
46 47
 	for k, v := range entry.Data {
... ...
@@ -57,7 +62,7 @@ func (f *JSONFormatter) Format(entry *Entry) ([]byte, error) {
57 57
 
58 58
 	timestampFormat := f.TimestampFormat
59 59
 	if timestampFormat == "" {
60
-		timestampFormat = DefaultTimestampFormat
60
+		timestampFormat = defaultTimestampFormat
61 61
 	}
62 62
 
63 63
 	if !f.DisableTimestamp {
... ...
@@ -25,7 +25,7 @@ type Logger struct {
25 25
 	Formatter Formatter
26 26
 	// The logging level the logger should log at. This is typically (and defaults
27 27
 	// to) `logrus.Info`, which allows Info(), Warn(), Error() and Fatal() to be
28
-	// logged. `logrus.Debug` is useful in
28
+	// logged.
29 29
 	Level Level
30 30
 	// Used to sync writing to the log. Locking is enabled by Default
31 31
 	mu MutexWrap
... ...
@@ -312,6 +312,6 @@ func (logger *Logger) level() Level {
312 312
 	return Level(atomic.LoadUint32((*uint32)(&logger.Level)))
313 313
 }
314 314
 
315
-func (logger *Logger) setLevel(level Level) {
315
+func (logger *Logger) SetLevel(level Level) {
316 316
 	atomic.StoreUint32((*uint32)(&logger.Level), uint32(level))
317 317
 }
318 318
deleted file mode 100644
... ...
@@ -1,10 +0,0 @@
1
-// +build appengine
2
-
3
-package logrus
4
-
5
-import "io"
6
-
7
-// IsTerminal returns true if stderr's file descriptor is a terminal.
8
-func IsTerminal(f io.Writer) bool {
9
-	return true
10
-}
... ...
@@ -3,8 +3,8 @@
3 3
 
4 4
 package logrus
5 5
 
6
-import "syscall"
6
+import "golang.org/x/sys/unix"
7 7
 
8
-const ioctlReadTermios = syscall.TIOCGETA
8
+const ioctlReadTermios = unix.TIOCGETA
9 9
 
10
-type Termios syscall.Termios
10
+type Termios unix.Termios
... ...
@@ -7,8 +7,8 @@
7 7
 
8 8
 package logrus
9 9
 
10
-import "syscall"
10
+import "golang.org/x/sys/unix"
11 11
 
12
-const ioctlReadTermios = syscall.TCGETS
12
+const ioctlReadTermios = unix.TCGETS
13 13
 
14
-type Termios syscall.Termios
14
+type Termios unix.Termios
15 15
deleted file mode 100644
... ...
@@ -1,28 +0,0 @@
1
-// Based on ssh/terminal:
2
-// Copyright 2011 The Go Authors. All rights reserved.
3
-// Use of this source code is governed by a BSD-style
4
-// license that can be found in the LICENSE file.
5
-
6
-// +build linux darwin freebsd openbsd netbsd dragonfly
7
-// +build !appengine
8
-
9
-package logrus
10
-
11
-import (
12
-	"io"
13
-	"os"
14
-	"syscall"
15
-	"unsafe"
16
-)
17
-
18
-// IsTerminal returns true if stderr's file descriptor is a terminal.
19
-func IsTerminal(f io.Writer) bool {
20
-	var termios Termios
21
-	switch v := f.(type) {
22
-	case *os.File:
23
-		_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(v.Fd()), ioctlReadTermios, uintptr(unsafe.Pointer(&termios)), 0, 0, 0)
24
-		return err == 0
25
-	default:
26
-		return false
27
-	}
28
-}
29 1
deleted file mode 100644
... ...
@@ -1,21 +0,0 @@
1
-// +build solaris,!appengine
2
-
3
-package logrus
4
-
5
-import (
6
-	"io"
7
-	"os"
8
-
9
-	"golang.org/x/sys/unix"
10
-)
11
-
12
-// IsTerminal returns true if the given file descriptor is a terminal.
13
-func IsTerminal(f io.Writer) bool {
14
-	switch v := f.(type) {
15
-	case *os.File:
16
-		_, err := unix.IoctlGetTermios(int(v.Fd()), unix.TCGETA)
17
-		return err == nil
18
-	default:
19
-		return false
20
-	}
21
-}
22 1
deleted file mode 100644
... ...
@@ -1,82 +0,0 @@
1
-// Based on ssh/terminal:
2
-// Copyright 2011 The Go Authors. All rights reserved.
3
-// Use of this source code is governed by a BSD-style
4
-// license that can be found in the LICENSE file.
5
-
6
-// +build windows,!appengine
7
-
8
-package logrus
9
-
10
-import (
11
-	"bytes"
12
-	"errors"
13
-	"io"
14
-	"os"
15
-	"os/exec"
16
-	"strconv"
17
-	"strings"
18
-	"syscall"
19
-	"unsafe"
20
-)
21
-
22
-var kernel32 = syscall.NewLazyDLL("kernel32.dll")
23
-
24
-var (
25
-	procGetConsoleMode = kernel32.NewProc("GetConsoleMode")
26
-	procSetConsoleMode = kernel32.NewProc("SetConsoleMode")
27
-)
28
-
29
-const (
30
-	enableProcessedOutput           = 0x0001
31
-	enableWrapAtEolOutput           = 0x0002
32
-	enableVirtualTerminalProcessing = 0x0004
33
-)
34
-
35
-func getVersion() (float64, error) {
36
-	stdout, stderr := &bytes.Buffer{}, &bytes.Buffer{}
37
-	cmd := exec.Command("cmd", "ver")
38
-	cmd.Stdout = stdout
39
-	cmd.Stderr = stderr
40
-	err := cmd.Run()
41
-	if err != nil {
42
-		return -1, err
43
-	}
44
-	
45
-	// The output should be like "Microsoft Windows [Version XX.X.XXXXXX]"
46
-	version := strings.Replace(stdout.String(), "\n", "", -1)
47
-	version = strings.Replace(version, "\r\n", "", -1)
48
-
49
-	x1 := strings.Index(version, "[Version")
50
-
51
-	if x1 == -1 || strings.Index(version, "]") == -1 {
52
-		return -1, errors.New("Can't determine Windows version")
53
-	}
54
-
55
-	return strconv.ParseFloat(version[x1+9:x1+13], 64)
56
-}
57
-
58
-func init() {
59
-	ver, err := getVersion()
60
-	if err != nil {
61
-		return
62
-	}
63
-
64
-	// Activate Virtual Processing for Windows CMD
65
-	// Info: https://msdn.microsoft.com/en-us/library/windows/desktop/ms686033(v=vs.85).aspx
66
-	if ver >= 10 {
67
-		handle := syscall.Handle(os.Stderr.Fd())
68
-		procSetConsoleMode.Call(uintptr(handle), enableProcessedOutput|enableWrapAtEolOutput|enableVirtualTerminalProcessing)
69
-	}
70
-}
71
-
72
-// IsTerminal returns true if stderr's file descriptor is a terminal.
73
-func IsTerminal(f io.Writer) bool {
74
-	switch v := f.(type) {
75
-	case *os.File:
76
-		var st uint32
77
-		r, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(v.Fd()), uintptr(unsafe.Pointer(&st)), 0)
78
-		return r != 0 && e == 0
79
-	default:
80
-		return false
81
-	}
82
-}
... ...
@@ -3,10 +3,14 @@ package logrus
3 3
 import (
4 4
 	"bytes"
5 5
 	"fmt"
6
+	"io"
7
+	"os"
6 8
 	"sort"
7 9
 	"strings"
8 10
 	"sync"
9 11
 	"time"
12
+
13
+	"golang.org/x/crypto/ssh/terminal"
10 14
 )
11 15
 
12 16
 const (
... ...
@@ -14,7 +18,7 @@ const (
14 14
 	red     = 31
15 15
 	green   = 32
16 16
 	yellow  = 33
17
-	blue    = 34
17
+	blue    = 36
18 18
 	gray    = 37
19 19
 )
20 20
 
... ...
@@ -26,6 +30,7 @@ func init() {
26 26
 	baseTimestamp = time.Now()
27 27
 }
28 28
 
29
+// TextFormatter formats logs into text
29 30
 type TextFormatter struct {
30 31
 	// Set to true to bypass checking for a TTY before outputting colors.
31 32
 	ForceColors bool
... ...
@@ -52,10 +57,6 @@ type TextFormatter struct {
52 52
 	// QuoteEmptyFields will wrap empty fields in quotes if true
53 53
 	QuoteEmptyFields bool
54 54
 
55
-	// QuoteCharacter can be set to the override the default quoting character "
56
-	// with something else. For example: ', or `.
57
-	QuoteCharacter string
58
-
59 55
 	// Whether the logger's out is to a terminal
60 56
 	isTerminal bool
61 57
 
... ...
@@ -63,14 +64,21 @@ type TextFormatter struct {
63 63
 }
64 64
 
65 65
 func (f *TextFormatter) init(entry *Entry) {
66
-	if len(f.QuoteCharacter) == 0 {
67
-		f.QuoteCharacter = "\""
68
-	}
69 66
 	if entry.Logger != nil {
70
-		f.isTerminal = IsTerminal(entry.Logger.Out)
67
+		f.isTerminal = f.checkIfTerminal(entry.Logger.Out)
68
+	}
69
+}
70
+
71
+func (f *TextFormatter) checkIfTerminal(w io.Writer) bool {
72
+	switch v := w.(type) {
73
+	case *os.File:
74
+		return terminal.IsTerminal(int(v.Fd()))
75
+	default:
76
+		return false
71 77
 	}
72 78
 }
73 79
 
80
+// Format renders a single log entry
74 81
 func (f *TextFormatter) Format(entry *Entry) ([]byte, error) {
75 82
 	var b *bytes.Buffer
76 83
 	keys := make([]string, 0, len(entry.Data))
... ...
@@ -95,7 +103,7 @@ func (f *TextFormatter) Format(entry *Entry) ([]byte, error) {
95 95
 
96 96
 	timestampFormat := f.TimestampFormat
97 97
 	if timestampFormat == "" {
98
-		timestampFormat = DefaultTimestampFormat
98
+		timestampFormat = defaultTimestampFormat
99 99
 	}
100 100
 	if isColored {
101 101
 		f.printColored(b, entry, keys, timestampFormat)
... ...
@@ -153,7 +161,7 @@ func (f *TextFormatter) needsQuoting(text string) bool {
153 153
 		if !((ch >= 'a' && ch <= 'z') ||
154 154
 			(ch >= 'A' && ch <= 'Z') ||
155 155
 			(ch >= '0' && ch <= '9') ||
156
-			ch == '-' || ch == '.') {
156
+			ch == '-' || ch == '.' || ch == '_' || ch == '/' || ch == '@' || ch == '^' || ch == '+') {
157 157
 			return true
158 158
 		}
159 159
 	}
... ...
@@ -161,36 +169,23 @@ func (f *TextFormatter) needsQuoting(text string) bool {
161 161
 }
162 162
 
163 163
 func (f *TextFormatter) appendKeyValue(b *bytes.Buffer, key string, value interface{}) {
164
-
164
+	if b.Len() > 0 {
165
+		b.WriteByte(' ')
166
+	}
165 167
 	b.WriteString(key)
166 168
 	b.WriteByte('=')
167 169
 	f.appendValue(b, value)
168
-	b.WriteByte(' ')
169 170
 }
170 171
 
171 172
 func (f *TextFormatter) appendValue(b *bytes.Buffer, value interface{}) {
172
-	switch value := value.(type) {
173
-	case string:
174
-		if !f.needsQuoting(value) {
175
-			b.WriteString(value)
176
-		} else {
177
-			b.WriteString(f.quoteString(value))
178
-		}
179
-	case error:
180
-		errmsg := value.Error()
181
-		if !f.needsQuoting(errmsg) {
182
-			b.WriteString(errmsg)
183
-		} else {
184
-			b.WriteString(f.quoteString(errmsg))
185
-		}
186
-	default:
187
-		fmt.Fprint(b, value)
173
+	stringVal, ok := value.(string)
174
+	if !ok {
175
+		stringVal = fmt.Sprint(value)
188 176
 	}
189
-}
190
-
191
-func (f *TextFormatter) quoteString(v string) string {
192
-	escapedQuote := fmt.Sprintf("\\%s", f.QuoteCharacter)
193
-	escapedValue := strings.Replace(v, f.QuoteCharacter, escapedQuote, -1)
194 177
 
195
-	return fmt.Sprintf("%s%v%s", f.QuoteCharacter, escapedValue, f.QuoteCharacter)
178
+	if !f.needsQuoting(stringVal) {
179
+		b.WriteString(stringVal)
180
+	} else {
181
+		b.WriteString(fmt.Sprintf("%q", stringVal))
182
+	}
196 183
 }
197 184
deleted file mode 100644
... ...
@@ -1,3 +0,0 @@
1
-This repository holds supplementary Go cryptography libraries.
2
-
3
-To submit changes to this repository, see http://golang.org/doc/contribute.html.
4 1
new file mode 100644
... ...
@@ -0,0 +1,21 @@
0
+# Go Cryptography
1
+
2
+This repository holds supplementary Go cryptography libraries.
3
+
4
+## Download/Install
5
+
6
+The easiest way to install is to run `go get -u golang.org/x/crypto/...`. You
7
+can also manually git clone the repository to `$GOPATH/src/golang.org/x/crypto`.
8
+
9
+## Report Issues / Send Patches
10
+
11
+This repository uses Gerrit for code changes. To learn how to submit changes to
12
+this repository, see https://golang.org/doc/contribute.html.
13
+
14
+The main issue tracker for the crypto repository is located at
15
+https://github.com/golang/go/issues. Prefix your issue with "x/crypto:" in the
16
+subject line, so it is easy to find.
17
+
18
+Note that contributions to the cryptography package receive additional scrutiny
19
+due to their sensitive nature. Patches may take longer than normal to receive
20
+feedback.
0 21
new file mode 100644
... ...
@@ -0,0 +1,8 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// This code was translated into a form compatible with 6a from the public
5
+// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
6
+
7
+#define REDMASK51     0x0007FFFFFFFFFFFF
0 8
new file mode 100644
... ...
@@ -0,0 +1,20 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// This code was translated into a form compatible with 6a from the public
5
+// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
6
+
7
+// +build amd64,!gccgo,!appengine
8
+
9
+// These constants cannot be encoded in non-MOVQ immediates.
10
+// We access them directly from memory instead.
11
+
12
+DATA ·_121666_213(SB)/8, $996687872
13
+GLOBL ·_121666_213(SB), 8, $8
14
+
15
+DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
16
+GLOBL ·_2P0(SB), 8, $8
17
+
18
+DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
19
+GLOBL ·_2P1234(SB), 8, $8
0 20
new file mode 100644
... ...
@@ -0,0 +1,65 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build amd64,!gccgo,!appengine
5
+
6
+// func cswap(inout *[4][5]uint64, v uint64)
7
+TEXT ·cswap(SB),7,$0
8
+	MOVQ inout+0(FP),DI
9
+	MOVQ v+8(FP),SI
10
+
11
+	SUBQ $1, SI
12
+	NOTQ SI
13
+	MOVQ SI, X15
14
+	PSHUFD $0x44, X15, X15
15
+
16
+	MOVOU 0(DI), X0
17
+	MOVOU 16(DI), X2
18
+	MOVOU 32(DI), X4
19
+	MOVOU 48(DI), X6
20
+	MOVOU 64(DI), X8
21
+	MOVOU 80(DI), X1
22
+	MOVOU 96(DI), X3
23
+	MOVOU 112(DI), X5
24
+	MOVOU 128(DI), X7
25
+	MOVOU 144(DI), X9
26
+
27
+	MOVO X1, X10
28
+	MOVO X3, X11
29
+	MOVO X5, X12
30
+	MOVO X7, X13
31
+	MOVO X9, X14
32
+
33
+	PXOR X0, X10
34
+	PXOR X2, X11
35
+	PXOR X4, X12
36
+	PXOR X6, X13
37
+	PXOR X8, X14
38
+	PAND X15, X10
39
+	PAND X15, X11
40
+	PAND X15, X12
41
+	PAND X15, X13
42
+	PAND X15, X14
43
+	PXOR X10, X0
44
+	PXOR X10, X1
45
+	PXOR X11, X2
46
+	PXOR X11, X3
47
+	PXOR X12, X4
48
+	PXOR X12, X5
49
+	PXOR X13, X6
50
+	PXOR X13, X7
51
+	PXOR X14, X8
52
+	PXOR X14, X9
53
+
54
+	MOVOU X0, 0(DI)
55
+	MOVOU X2, 16(DI)
56
+	MOVOU X4, 32(DI)
57
+	MOVOU X6, 48(DI)
58
+	MOVOU X8, 64(DI)
59
+	MOVOU X1, 80(DI)
60
+	MOVOU X3, 96(DI)
61
+	MOVOU X5, 112(DI)
62
+	MOVOU X7, 128(DI)
63
+	MOVOU X9, 144(DI)
64
+	RET
0 65
new file mode 100644
... ...
@@ -0,0 +1,834 @@
0
+// Copyright 2013 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// We have a implementation in amd64 assembly so this code is only run on
5
+// non-amd64 platforms. The amd64 assembly does not support gccgo.
6
+// +build !amd64 gccgo appengine
7
+
8
+package curve25519
9
+
10
+import (
11
+	"encoding/binary"
12
+)
13
+
14
+// This code is a port of the public domain, "ref10" implementation of
15
+// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
16
+
17
+// fieldElement represents an element of the field GF(2^255 - 19). An element
18
+// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
19
+// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
20
+// context.
21
+type fieldElement [10]int32
22
+
23
+func feZero(fe *fieldElement) {
24
+	for i := range fe {
25
+		fe[i] = 0
26
+	}
27
+}
28
+
29
+func feOne(fe *fieldElement) {
30
+	feZero(fe)
31
+	fe[0] = 1
32
+}
33
+
34
+func feAdd(dst, a, b *fieldElement) {
35
+	for i := range dst {
36
+		dst[i] = a[i] + b[i]
37
+	}
38
+}
39
+
40
+func feSub(dst, a, b *fieldElement) {
41
+	for i := range dst {
42
+		dst[i] = a[i] - b[i]
43
+	}
44
+}
45
+
46
+func feCopy(dst, src *fieldElement) {
47
+	for i := range dst {
48
+		dst[i] = src[i]
49
+	}
50
+}
51
+
52
+// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
53
+//
54
+// Preconditions: b in {0,1}.
55
+func feCSwap(f, g *fieldElement, b int32) {
56
+	b = -b
57
+	for i := range f {
58
+		t := b & (f[i] ^ g[i])
59
+		f[i] ^= t
60
+		g[i] ^= t
61
+	}
62
+}
63
+
64
+// load3 reads a 24-bit, little-endian value from in.
65
+func load3(in []byte) int64 {
66
+	var r int64
67
+	r = int64(in[0])
68
+	r |= int64(in[1]) << 8
69
+	r |= int64(in[2]) << 16
70
+	return r
71
+}
72
+
73
+// load4 reads a 32-bit, little-endian value from in.
74
+func load4(in []byte) int64 {
75
+	return int64(binary.LittleEndian.Uint32(in))
76
+}
77
+
78
+func feFromBytes(dst *fieldElement, src *[32]byte) {
79
+	h0 := load4(src[:])
80
+	h1 := load3(src[4:]) << 6
81
+	h2 := load3(src[7:]) << 5
82
+	h3 := load3(src[10:]) << 3
83
+	h4 := load3(src[13:]) << 2
84
+	h5 := load4(src[16:])
85
+	h6 := load3(src[20:]) << 7
86
+	h7 := load3(src[23:]) << 5
87
+	h8 := load3(src[26:]) << 4
88
+	h9 := load3(src[29:]) << 2
89
+
90
+	var carry [10]int64
91
+	carry[9] = (h9 + 1<<24) >> 25
92
+	h0 += carry[9] * 19
93
+	h9 -= carry[9] << 25
94
+	carry[1] = (h1 + 1<<24) >> 25
95
+	h2 += carry[1]
96
+	h1 -= carry[1] << 25
97
+	carry[3] = (h3 + 1<<24) >> 25
98
+	h4 += carry[3]
99
+	h3 -= carry[3] << 25
100
+	carry[5] = (h5 + 1<<24) >> 25
101
+	h6 += carry[5]
102
+	h5 -= carry[5] << 25
103
+	carry[7] = (h7 + 1<<24) >> 25
104
+	h8 += carry[7]
105
+	h7 -= carry[7] << 25
106
+
107
+	carry[0] = (h0 + 1<<25) >> 26
108
+	h1 += carry[0]
109
+	h0 -= carry[0] << 26
110
+	carry[2] = (h2 + 1<<25) >> 26
111
+	h3 += carry[2]
112
+	h2 -= carry[2] << 26
113
+	carry[4] = (h4 + 1<<25) >> 26
114
+	h5 += carry[4]
115
+	h4 -= carry[4] << 26
116
+	carry[6] = (h6 + 1<<25) >> 26
117
+	h7 += carry[6]
118
+	h6 -= carry[6] << 26
119
+	carry[8] = (h8 + 1<<25) >> 26
120
+	h9 += carry[8]
121
+	h8 -= carry[8] << 26
122
+
123
+	dst[0] = int32(h0)
124
+	dst[1] = int32(h1)
125
+	dst[2] = int32(h2)
126
+	dst[3] = int32(h3)
127
+	dst[4] = int32(h4)
128
+	dst[5] = int32(h5)
129
+	dst[6] = int32(h6)
130
+	dst[7] = int32(h7)
131
+	dst[8] = int32(h8)
132
+	dst[9] = int32(h9)
133
+}
134
+
135
+// feToBytes marshals h to s.
136
+// Preconditions:
137
+//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
138
+//
139
+// Write p=2^255-19; q=floor(h/p).
140
+// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
141
+//
142
+// Proof:
143
+//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
144
+//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
145
+//
146
+//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
147
+//   Then 0<y<1.
148
+//
149
+//   Write r=h-pq.
150
+//   Have 0<=r<=p-1=2^255-20.
151
+//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
152
+//
153
+//   Write x=r+19(2^-255)r+y.
154
+//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
155
+//
156
+//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
157
+//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
158
+func feToBytes(s *[32]byte, h *fieldElement) {
159
+	var carry [10]int32
160
+
161
+	q := (19*h[9] + (1 << 24)) >> 25
162
+	q = (h[0] + q) >> 26
163
+	q = (h[1] + q) >> 25
164
+	q = (h[2] + q) >> 26
165
+	q = (h[3] + q) >> 25
166
+	q = (h[4] + q) >> 26
167
+	q = (h[5] + q) >> 25
168
+	q = (h[6] + q) >> 26
169
+	q = (h[7] + q) >> 25
170
+	q = (h[8] + q) >> 26
171
+	q = (h[9] + q) >> 25
172
+
173
+	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
174
+	h[0] += 19 * q
175
+	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
176
+
177
+	carry[0] = h[0] >> 26
178
+	h[1] += carry[0]
179
+	h[0] -= carry[0] << 26
180
+	carry[1] = h[1] >> 25
181
+	h[2] += carry[1]
182
+	h[1] -= carry[1] << 25
183
+	carry[2] = h[2] >> 26
184
+	h[3] += carry[2]
185
+	h[2] -= carry[2] << 26
186
+	carry[3] = h[3] >> 25
187
+	h[4] += carry[3]
188
+	h[3] -= carry[3] << 25
189
+	carry[4] = h[4] >> 26
190
+	h[5] += carry[4]
191
+	h[4] -= carry[4] << 26
192
+	carry[5] = h[5] >> 25
193
+	h[6] += carry[5]
194
+	h[5] -= carry[5] << 25
195
+	carry[6] = h[6] >> 26
196
+	h[7] += carry[6]
197
+	h[6] -= carry[6] << 26
198
+	carry[7] = h[7] >> 25
199
+	h[8] += carry[7]
200
+	h[7] -= carry[7] << 25
201
+	carry[8] = h[8] >> 26
202
+	h[9] += carry[8]
203
+	h[8] -= carry[8] << 26
204
+	carry[9] = h[9] >> 25
205
+	h[9] -= carry[9] << 25
206
+	// h10 = carry9
207
+
208
+	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
209
+	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
210
+	// evidently 2^255 h10-2^255 q = 0.
211
+	// Goal: Output h[0]+...+2^230 h[9].
212
+
213
+	s[0] = byte(h[0] >> 0)
214
+	s[1] = byte(h[0] >> 8)
215
+	s[2] = byte(h[0] >> 16)
216
+	s[3] = byte((h[0] >> 24) | (h[1] << 2))
217
+	s[4] = byte(h[1] >> 6)
218
+	s[5] = byte(h[1] >> 14)
219
+	s[6] = byte((h[1] >> 22) | (h[2] << 3))
220
+	s[7] = byte(h[2] >> 5)
221
+	s[8] = byte(h[2] >> 13)
222
+	s[9] = byte((h[2] >> 21) | (h[3] << 5))
223
+	s[10] = byte(h[3] >> 3)
224
+	s[11] = byte(h[3] >> 11)
225
+	s[12] = byte((h[3] >> 19) | (h[4] << 6))
226
+	s[13] = byte(h[4] >> 2)
227
+	s[14] = byte(h[4] >> 10)
228
+	s[15] = byte(h[4] >> 18)
229
+	s[16] = byte(h[5] >> 0)
230
+	s[17] = byte(h[5] >> 8)
231
+	s[18] = byte(h[5] >> 16)
232
+	s[19] = byte((h[5] >> 24) | (h[6] << 1))
233
+	s[20] = byte(h[6] >> 7)
234
+	s[21] = byte(h[6] >> 15)
235
+	s[22] = byte((h[6] >> 23) | (h[7] << 3))
236
+	s[23] = byte(h[7] >> 5)
237
+	s[24] = byte(h[7] >> 13)
238
+	s[25] = byte((h[7] >> 21) | (h[8] << 4))
239
+	s[26] = byte(h[8] >> 4)
240
+	s[27] = byte(h[8] >> 12)
241
+	s[28] = byte((h[8] >> 20) | (h[9] << 6))
242
+	s[29] = byte(h[9] >> 2)
243
+	s[30] = byte(h[9] >> 10)
244
+	s[31] = byte(h[9] >> 18)
245
+}
246
+
247
+// feMul calculates h = f * g
248
+// Can overlap h with f or g.
249
+//
250
+// Preconditions:
251
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
252
+//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
253
+//
254
+// Postconditions:
255
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
256
+//
257
+// Notes on implementation strategy:
258
+//
259
+// Using schoolbook multiplication.
260
+// Karatsuba would save a little in some cost models.
261
+//
262
+// Most multiplications by 2 and 19 are 32-bit precomputations;
263
+// cheaper than 64-bit postcomputations.
264
+//
265
+// There is one remaining multiplication by 19 in the carry chain;
266
+// one *19 precomputation can be merged into this,
267
+// but the resulting data flow is considerably less clean.
268
+//
269
+// There are 12 carries below.
270
+// 10 of them are 2-way parallelizable and vectorizable.
271
+// Can get away with 11 carries, but then data flow is much deeper.
272
+//
273
+// With tighter constraints on inputs can squeeze carries into int32.
274
+func feMul(h, f, g *fieldElement) {
275
+	f0 := f[0]
276
+	f1 := f[1]
277
+	f2 := f[2]
278
+	f3 := f[3]
279
+	f4 := f[4]
280
+	f5 := f[5]
281
+	f6 := f[6]
282
+	f7 := f[7]
283
+	f8 := f[8]
284
+	f9 := f[9]
285
+	g0 := g[0]
286
+	g1 := g[1]
287
+	g2 := g[2]
288
+	g3 := g[3]
289
+	g4 := g[4]
290
+	g5 := g[5]
291
+	g6 := g[6]
292
+	g7 := g[7]
293
+	g8 := g[8]
294
+	g9 := g[9]
295
+	g1_19 := 19 * g1 // 1.4*2^29
296
+	g2_19 := 19 * g2 // 1.4*2^30; still ok
297
+	g3_19 := 19 * g3
298
+	g4_19 := 19 * g4
299
+	g5_19 := 19 * g5
300
+	g6_19 := 19 * g6
301
+	g7_19 := 19 * g7
302
+	g8_19 := 19 * g8
303
+	g9_19 := 19 * g9
304
+	f1_2 := 2 * f1
305
+	f3_2 := 2 * f3
306
+	f5_2 := 2 * f5
307
+	f7_2 := 2 * f7
308
+	f9_2 := 2 * f9
309
+	f0g0 := int64(f0) * int64(g0)
310
+	f0g1 := int64(f0) * int64(g1)
311
+	f0g2 := int64(f0) * int64(g2)
312
+	f0g3 := int64(f0) * int64(g3)
313
+	f0g4 := int64(f0) * int64(g4)
314
+	f0g5 := int64(f0) * int64(g5)
315
+	f0g6 := int64(f0) * int64(g6)
316
+	f0g7 := int64(f0) * int64(g7)
317
+	f0g8 := int64(f0) * int64(g8)
318
+	f0g9 := int64(f0) * int64(g9)
319
+	f1g0 := int64(f1) * int64(g0)
320
+	f1g1_2 := int64(f1_2) * int64(g1)
321
+	f1g2 := int64(f1) * int64(g2)
322
+	f1g3_2 := int64(f1_2) * int64(g3)
323
+	f1g4 := int64(f1) * int64(g4)
324
+	f1g5_2 := int64(f1_2) * int64(g5)
325
+	f1g6 := int64(f1) * int64(g6)
326
+	f1g7_2 := int64(f1_2) * int64(g7)
327
+	f1g8 := int64(f1) * int64(g8)
328
+	f1g9_38 := int64(f1_2) * int64(g9_19)
329
+	f2g0 := int64(f2) * int64(g0)
330
+	f2g1 := int64(f2) * int64(g1)
331
+	f2g2 := int64(f2) * int64(g2)
332
+	f2g3 := int64(f2) * int64(g3)
333
+	f2g4 := int64(f2) * int64(g4)
334
+	f2g5 := int64(f2) * int64(g5)
335
+	f2g6 := int64(f2) * int64(g6)
336
+	f2g7 := int64(f2) * int64(g7)
337
+	f2g8_19 := int64(f2) * int64(g8_19)
338
+	f2g9_19 := int64(f2) * int64(g9_19)
339
+	f3g0 := int64(f3) * int64(g0)
340
+	f3g1_2 := int64(f3_2) * int64(g1)
341
+	f3g2 := int64(f3) * int64(g2)
342
+	f3g3_2 := int64(f3_2) * int64(g3)
343
+	f3g4 := int64(f3) * int64(g4)
344
+	f3g5_2 := int64(f3_2) * int64(g5)
345
+	f3g6 := int64(f3) * int64(g6)
346
+	f3g7_38 := int64(f3_2) * int64(g7_19)
347
+	f3g8_19 := int64(f3) * int64(g8_19)
348
+	f3g9_38 := int64(f3_2) * int64(g9_19)
349
+	f4g0 := int64(f4) * int64(g0)
350
+	f4g1 := int64(f4) * int64(g1)
351
+	f4g2 := int64(f4) * int64(g2)
352
+	f4g3 := int64(f4) * int64(g3)
353
+	f4g4 := int64(f4) * int64(g4)
354
+	f4g5 := int64(f4) * int64(g5)
355
+	f4g6_19 := int64(f4) * int64(g6_19)
356
+	f4g7_19 := int64(f4) * int64(g7_19)
357
+	f4g8_19 := int64(f4) * int64(g8_19)
358
+	f4g9_19 := int64(f4) * int64(g9_19)
359
+	f5g0 := int64(f5) * int64(g0)
360
+	f5g1_2 := int64(f5_2) * int64(g1)
361
+	f5g2 := int64(f5) * int64(g2)
362
+	f5g3_2 := int64(f5_2) * int64(g3)
363
+	f5g4 := int64(f5) * int64(g4)
364
+	f5g5_38 := int64(f5_2) * int64(g5_19)
365
+	f5g6_19 := int64(f5) * int64(g6_19)
366
+	f5g7_38 := int64(f5_2) * int64(g7_19)
367
+	f5g8_19 := int64(f5) * int64(g8_19)
368
+	f5g9_38 := int64(f5_2) * int64(g9_19)
369
+	f6g0 := int64(f6) * int64(g0)
370
+	f6g1 := int64(f6) * int64(g1)
371
+	f6g2 := int64(f6) * int64(g2)
372
+	f6g3 := int64(f6) * int64(g3)
373
+	f6g4_19 := int64(f6) * int64(g4_19)
374
+	f6g5_19 := int64(f6) * int64(g5_19)
375
+	f6g6_19 := int64(f6) * int64(g6_19)
376
+	f6g7_19 := int64(f6) * int64(g7_19)
377
+	f6g8_19 := int64(f6) * int64(g8_19)
378
+	f6g9_19 := int64(f6) * int64(g9_19)
379
+	f7g0 := int64(f7) * int64(g0)
380
+	f7g1_2 := int64(f7_2) * int64(g1)
381
+	f7g2 := int64(f7) * int64(g2)
382
+	f7g3_38 := int64(f7_2) * int64(g3_19)
383
+	f7g4_19 := int64(f7) * int64(g4_19)
384
+	f7g5_38 := int64(f7_2) * int64(g5_19)
385
+	f7g6_19 := int64(f7) * int64(g6_19)
386
+	f7g7_38 := int64(f7_2) * int64(g7_19)
387
+	f7g8_19 := int64(f7) * int64(g8_19)
388
+	f7g9_38 := int64(f7_2) * int64(g9_19)
389
+	f8g0 := int64(f8) * int64(g0)
390
+	f8g1 := int64(f8) * int64(g1)
391
+	f8g2_19 := int64(f8) * int64(g2_19)
392
+	f8g3_19 := int64(f8) * int64(g3_19)
393
+	f8g4_19 := int64(f8) * int64(g4_19)
394
+	f8g5_19 := int64(f8) * int64(g5_19)
395
+	f8g6_19 := int64(f8) * int64(g6_19)
396
+	f8g7_19 := int64(f8) * int64(g7_19)
397
+	f8g8_19 := int64(f8) * int64(g8_19)
398
+	f8g9_19 := int64(f8) * int64(g9_19)
399
+	f9g0 := int64(f9) * int64(g0)
400
+	f9g1_38 := int64(f9_2) * int64(g1_19)
401
+	f9g2_19 := int64(f9) * int64(g2_19)
402
+	f9g3_38 := int64(f9_2) * int64(g3_19)
403
+	f9g4_19 := int64(f9) * int64(g4_19)
404
+	f9g5_38 := int64(f9_2) * int64(g5_19)
405
+	f9g6_19 := int64(f9) * int64(g6_19)
406
+	f9g7_38 := int64(f9_2) * int64(g7_19)
407
+	f9g8_19 := int64(f9) * int64(g8_19)
408
+	f9g9_38 := int64(f9_2) * int64(g9_19)
409
+	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
410
+	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
411
+	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
412
+	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
413
+	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
414
+	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
415
+	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
416
+	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
417
+	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
418
+	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
419
+	var carry [10]int64
420
+
421
+	// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
422
+	//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
423
+	// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
424
+	//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
425
+
426
+	carry[0] = (h0 + (1 << 25)) >> 26
427
+	h1 += carry[0]
428
+	h0 -= carry[0] << 26
429
+	carry[4] = (h4 + (1 << 25)) >> 26
430
+	h5 += carry[4]
431
+	h4 -= carry[4] << 26
432
+	// |h0| <= 2^25
433
+	// |h4| <= 2^25
434
+	// |h1| <= 1.51*2^58
435
+	// |h5| <= 1.51*2^58
436
+
437
+	carry[1] = (h1 + (1 << 24)) >> 25
438
+	h2 += carry[1]
439
+	h1 -= carry[1] << 25
440
+	carry[5] = (h5 + (1 << 24)) >> 25
441
+	h6 += carry[5]
442
+	h5 -= carry[5] << 25
443
+	// |h1| <= 2^24; from now on fits into int32
444
+	// |h5| <= 2^24; from now on fits into int32
445
+	// |h2| <= 1.21*2^59
446
+	// |h6| <= 1.21*2^59
447
+
448
+	carry[2] = (h2 + (1 << 25)) >> 26
449
+	h3 += carry[2]
450
+	h2 -= carry[2] << 26
451
+	carry[6] = (h6 + (1 << 25)) >> 26
452
+	h7 += carry[6]
453
+	h6 -= carry[6] << 26
454
+	// |h2| <= 2^25; from now on fits into int32 unchanged
455
+	// |h6| <= 2^25; from now on fits into int32 unchanged
456
+	// |h3| <= 1.51*2^58
457
+	// |h7| <= 1.51*2^58
458
+
459
+	carry[3] = (h3 + (1 << 24)) >> 25
460
+	h4 += carry[3]
461
+	h3 -= carry[3] << 25
462
+	carry[7] = (h7 + (1 << 24)) >> 25
463
+	h8 += carry[7]
464
+	h7 -= carry[7] << 25
465
+	// |h3| <= 2^24; from now on fits into int32 unchanged
466
+	// |h7| <= 2^24; from now on fits into int32 unchanged
467
+	// |h4| <= 1.52*2^33
468
+	// |h8| <= 1.52*2^33
469
+
470
+	carry[4] = (h4 + (1 << 25)) >> 26
471
+	h5 += carry[4]
472
+	h4 -= carry[4] << 26
473
+	carry[8] = (h8 + (1 << 25)) >> 26
474
+	h9 += carry[8]
475
+	h8 -= carry[8] << 26
476
+	// |h4| <= 2^25; from now on fits into int32 unchanged
477
+	// |h8| <= 2^25; from now on fits into int32 unchanged
478
+	// |h5| <= 1.01*2^24
479
+	// |h9| <= 1.51*2^58
480
+
481
+	carry[9] = (h9 + (1 << 24)) >> 25
482
+	h0 += carry[9] * 19
483
+	h9 -= carry[9] << 25
484
+	// |h9| <= 2^24; from now on fits into int32 unchanged
485
+	// |h0| <= 1.8*2^37
486
+
487
+	carry[0] = (h0 + (1 << 25)) >> 26
488
+	h1 += carry[0]
489
+	h0 -= carry[0] << 26
490
+	// |h0| <= 2^25; from now on fits into int32 unchanged
491
+	// |h1| <= 1.01*2^24
492
+
493
+	h[0] = int32(h0)
494
+	h[1] = int32(h1)
495
+	h[2] = int32(h2)
496
+	h[3] = int32(h3)
497
+	h[4] = int32(h4)
498
+	h[5] = int32(h5)
499
+	h[6] = int32(h6)
500
+	h[7] = int32(h7)
501
+	h[8] = int32(h8)
502
+	h[9] = int32(h9)
503
+}
504
+
505
+// feSquare calculates h = f*f. Can overlap h with f.
506
+//
507
+// Preconditions:
508
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
509
+//
510
+// Postconditions:
511
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
512
+func feSquare(h, f *fieldElement) {
513
+	f0 := f[0]
514
+	f1 := f[1]
515
+	f2 := f[2]
516
+	f3 := f[3]
517
+	f4 := f[4]
518
+	f5 := f[5]
519
+	f6 := f[6]
520
+	f7 := f[7]
521
+	f8 := f[8]
522
+	f9 := f[9]
523
+	f0_2 := 2 * f0
524
+	f1_2 := 2 * f1
525
+	f2_2 := 2 * f2
526
+	f3_2 := 2 * f3
527
+	f4_2 := 2 * f4
528
+	f5_2 := 2 * f5
529
+	f6_2 := 2 * f6
530
+	f7_2 := 2 * f7
531
+	f5_38 := 38 * f5 // 1.31*2^30
532
+	f6_19 := 19 * f6 // 1.31*2^30
533
+	f7_38 := 38 * f7 // 1.31*2^30
534
+	f8_19 := 19 * f8 // 1.31*2^30
535
+	f9_38 := 38 * f9 // 1.31*2^30
536
+	f0f0 := int64(f0) * int64(f0)
537
+	f0f1_2 := int64(f0_2) * int64(f1)
538
+	f0f2_2 := int64(f0_2) * int64(f2)
539
+	f0f3_2 := int64(f0_2) * int64(f3)
540
+	f0f4_2 := int64(f0_2) * int64(f4)
541
+	f0f5_2 := int64(f0_2) * int64(f5)
542
+	f0f6_2 := int64(f0_2) * int64(f6)
543
+	f0f7_2 := int64(f0_2) * int64(f7)
544
+	f0f8_2 := int64(f0_2) * int64(f8)
545
+	f0f9_2 := int64(f0_2) * int64(f9)
546
+	f1f1_2 := int64(f1_2) * int64(f1)
547
+	f1f2_2 := int64(f1_2) * int64(f2)
548
+	f1f3_4 := int64(f1_2) * int64(f3_2)
549
+	f1f4_2 := int64(f1_2) * int64(f4)
550
+	f1f5_4 := int64(f1_2) * int64(f5_2)
551
+	f1f6_2 := int64(f1_2) * int64(f6)
552
+	f1f7_4 := int64(f1_2) * int64(f7_2)
553
+	f1f8_2 := int64(f1_2) * int64(f8)
554
+	f1f9_76 := int64(f1_2) * int64(f9_38)
555
+	f2f2 := int64(f2) * int64(f2)
556
+	f2f3_2 := int64(f2_2) * int64(f3)
557
+	f2f4_2 := int64(f2_2) * int64(f4)
558
+	f2f5_2 := int64(f2_2) * int64(f5)
559
+	f2f6_2 := int64(f2_2) * int64(f6)
560
+	f2f7_2 := int64(f2_2) * int64(f7)
561
+	f2f8_38 := int64(f2_2) * int64(f8_19)
562
+	f2f9_38 := int64(f2) * int64(f9_38)
563
+	f3f3_2 := int64(f3_2) * int64(f3)
564
+	f3f4_2 := int64(f3_2) * int64(f4)
565
+	f3f5_4 := int64(f3_2) * int64(f5_2)
566
+	f3f6_2 := int64(f3_2) * int64(f6)
567
+	f3f7_76 := int64(f3_2) * int64(f7_38)
568
+	f3f8_38 := int64(f3_2) * int64(f8_19)
569
+	f3f9_76 := int64(f3_2) * int64(f9_38)
570
+	f4f4 := int64(f4) * int64(f4)
571
+	f4f5_2 := int64(f4_2) * int64(f5)
572
+	f4f6_38 := int64(f4_2) * int64(f6_19)
573
+	f4f7_38 := int64(f4) * int64(f7_38)
574
+	f4f8_38 := int64(f4_2) * int64(f8_19)
575
+	f4f9_38 := int64(f4) * int64(f9_38)
576
+	f5f5_38 := int64(f5) * int64(f5_38)
577
+	f5f6_38 := int64(f5_2) * int64(f6_19)
578
+	f5f7_76 := int64(f5_2) * int64(f7_38)
579
+	f5f8_38 := int64(f5_2) * int64(f8_19)
580
+	f5f9_76 := int64(f5_2) * int64(f9_38)
581
+	f6f6_19 := int64(f6) * int64(f6_19)
582
+	f6f7_38 := int64(f6) * int64(f7_38)
583
+	f6f8_38 := int64(f6_2) * int64(f8_19)
584
+	f6f9_38 := int64(f6) * int64(f9_38)
585
+	f7f7_38 := int64(f7) * int64(f7_38)
586
+	f7f8_38 := int64(f7_2) * int64(f8_19)
587
+	f7f9_76 := int64(f7_2) * int64(f9_38)
588
+	f8f8_19 := int64(f8) * int64(f8_19)
589
+	f8f9_38 := int64(f8) * int64(f9_38)
590
+	f9f9_38 := int64(f9) * int64(f9_38)
591
+	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
592
+	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
593
+	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
594
+	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
595
+	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
596
+	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
597
+	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
598
+	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
599
+	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
600
+	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
601
+	var carry [10]int64
602
+
603
+	carry[0] = (h0 + (1 << 25)) >> 26
604
+	h1 += carry[0]
605
+	h0 -= carry[0] << 26
606
+	carry[4] = (h4 + (1 << 25)) >> 26
607
+	h5 += carry[4]
608
+	h4 -= carry[4] << 26
609
+
610
+	carry[1] = (h1 + (1 << 24)) >> 25
611
+	h2 += carry[1]
612
+	h1 -= carry[1] << 25
613
+	carry[5] = (h5 + (1 << 24)) >> 25
614
+	h6 += carry[5]
615
+	h5 -= carry[5] << 25
616
+
617
+	carry[2] = (h2 + (1 << 25)) >> 26
618
+	h3 += carry[2]
619
+	h2 -= carry[2] << 26
620
+	carry[6] = (h6 + (1 << 25)) >> 26
621
+	h7 += carry[6]
622
+	h6 -= carry[6] << 26
623
+
624
+	carry[3] = (h3 + (1 << 24)) >> 25
625
+	h4 += carry[3]
626
+	h3 -= carry[3] << 25
627
+	carry[7] = (h7 + (1 << 24)) >> 25
628
+	h8 += carry[7]
629
+	h7 -= carry[7] << 25
630
+
631
+	carry[4] = (h4 + (1 << 25)) >> 26
632
+	h5 += carry[4]
633
+	h4 -= carry[4] << 26
634
+	carry[8] = (h8 + (1 << 25)) >> 26
635
+	h9 += carry[8]
636
+	h8 -= carry[8] << 26
637
+
638
+	carry[9] = (h9 + (1 << 24)) >> 25
639
+	h0 += carry[9] * 19
640
+	h9 -= carry[9] << 25
641
+
642
+	carry[0] = (h0 + (1 << 25)) >> 26
643
+	h1 += carry[0]
644
+	h0 -= carry[0] << 26
645
+
646
+	h[0] = int32(h0)
647
+	h[1] = int32(h1)
648
+	h[2] = int32(h2)
649
+	h[3] = int32(h3)
650
+	h[4] = int32(h4)
651
+	h[5] = int32(h5)
652
+	h[6] = int32(h6)
653
+	h[7] = int32(h7)
654
+	h[8] = int32(h8)
655
+	h[9] = int32(h9)
656
+}
657
+
658
+// feMul121666 calculates h = f * 121666. Can overlap h with f.
659
+//
660
+// Preconditions:
661
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
662
+//
663
+// Postconditions:
664
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
665
+func feMul121666(h, f *fieldElement) {
666
+	h0 := int64(f[0]) * 121666
667
+	h1 := int64(f[1]) * 121666
668
+	h2 := int64(f[2]) * 121666
669
+	h3 := int64(f[3]) * 121666
670
+	h4 := int64(f[4]) * 121666
671
+	h5 := int64(f[5]) * 121666
672
+	h6 := int64(f[6]) * 121666
673
+	h7 := int64(f[7]) * 121666
674
+	h8 := int64(f[8]) * 121666
675
+	h9 := int64(f[9]) * 121666
676
+	var carry [10]int64
677
+
678
+	carry[9] = (h9 + (1 << 24)) >> 25
679
+	h0 += carry[9] * 19
680
+	h9 -= carry[9] << 25
681
+	carry[1] = (h1 + (1 << 24)) >> 25
682
+	h2 += carry[1]
683
+	h1 -= carry[1] << 25
684
+	carry[3] = (h3 + (1 << 24)) >> 25
685
+	h4 += carry[3]
686
+	h3 -= carry[3] << 25
687
+	carry[5] = (h5 + (1 << 24)) >> 25
688
+	h6 += carry[5]
689
+	h5 -= carry[5] << 25
690
+	carry[7] = (h7 + (1 << 24)) >> 25
691
+	h8 += carry[7]
692
+	h7 -= carry[7] << 25
693
+
694
+	carry[0] = (h0 + (1 << 25)) >> 26
695
+	h1 += carry[0]
696
+	h0 -= carry[0] << 26
697
+	carry[2] = (h2 + (1 << 25)) >> 26
698
+	h3 += carry[2]
699
+	h2 -= carry[2] << 26
700
+	carry[4] = (h4 + (1 << 25)) >> 26
701
+	h5 += carry[4]
702
+	h4 -= carry[4] << 26
703
+	carry[6] = (h6 + (1 << 25)) >> 26
704
+	h7 += carry[6]
705
+	h6 -= carry[6] << 26
706
+	carry[8] = (h8 + (1 << 25)) >> 26
707
+	h9 += carry[8]
708
+	h8 -= carry[8] << 26
709
+
710
+	h[0] = int32(h0)
711
+	h[1] = int32(h1)
712
+	h[2] = int32(h2)
713
+	h[3] = int32(h3)
714
+	h[4] = int32(h4)
715
+	h[5] = int32(h5)
716
+	h[6] = int32(h6)
717
+	h[7] = int32(h7)
718
+	h[8] = int32(h8)
719
+	h[9] = int32(h9)
720
+}
721
+
722
+// feInvert sets out = z^-1.
723
+func feInvert(out, z *fieldElement) {
724
+	var t0, t1, t2, t3 fieldElement
725
+	var i int
726
+
727
+	feSquare(&t0, z)
728
+	for i = 1; i < 1; i++ {
729
+		feSquare(&t0, &t0)
730
+	}
731
+	feSquare(&t1, &t0)
732
+	for i = 1; i < 2; i++ {
733
+		feSquare(&t1, &t1)
734
+	}
735
+	feMul(&t1, z, &t1)
736
+	feMul(&t0, &t0, &t1)
737
+	feSquare(&t2, &t0)
738
+	for i = 1; i < 1; i++ {
739
+		feSquare(&t2, &t2)
740
+	}
741
+	feMul(&t1, &t1, &t2)
742
+	feSquare(&t2, &t1)
743
+	for i = 1; i < 5; i++ {
744
+		feSquare(&t2, &t2)
745
+	}
746
+	feMul(&t1, &t2, &t1)
747
+	feSquare(&t2, &t1)
748
+	for i = 1; i < 10; i++ {
749
+		feSquare(&t2, &t2)
750
+	}
751
+	feMul(&t2, &t2, &t1)
752
+	feSquare(&t3, &t2)
753
+	for i = 1; i < 20; i++ {
754
+		feSquare(&t3, &t3)
755
+	}
756
+	feMul(&t2, &t3, &t2)
757
+	feSquare(&t2, &t2)
758
+	for i = 1; i < 10; i++ {
759
+		feSquare(&t2, &t2)
760
+	}
761
+	feMul(&t1, &t2, &t1)
762
+	feSquare(&t2, &t1)
763
+	for i = 1; i < 50; i++ {
764
+		feSquare(&t2, &t2)
765
+	}
766
+	feMul(&t2, &t2, &t1)
767
+	feSquare(&t3, &t2)
768
+	for i = 1; i < 100; i++ {
769
+		feSquare(&t3, &t3)
770
+	}
771
+	feMul(&t2, &t3, &t2)
772
+	feSquare(&t2, &t2)
773
+	for i = 1; i < 50; i++ {
774
+		feSquare(&t2, &t2)
775
+	}
776
+	feMul(&t1, &t2, &t1)
777
+	feSquare(&t1, &t1)
778
+	for i = 1; i < 5; i++ {
779
+		feSquare(&t1, &t1)
780
+	}
781
+	feMul(out, &t1, &t0)
782
+}
783
+
784
+func scalarMult(out, in, base *[32]byte) {
785
+	var e [32]byte
786
+
787
+	copy(e[:], in[:])
788
+	e[0] &= 248
789
+	e[31] &= 127
790
+	e[31] |= 64
791
+
792
+	var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
793
+	feFromBytes(&x1, base)
794
+	feOne(&x2)
795
+	feCopy(&x3, &x1)
796
+	feOne(&z3)
797
+
798
+	swap := int32(0)
799
+	for pos := 254; pos >= 0; pos-- {
800
+		b := e[pos/8] >> uint(pos&7)
801
+		b &= 1
802
+		swap ^= int32(b)
803
+		feCSwap(&x2, &x3, swap)
804
+		feCSwap(&z2, &z3, swap)
805
+		swap = int32(b)
806
+
807
+		feSub(&tmp0, &x3, &z3)
808
+		feSub(&tmp1, &x2, &z2)
809
+		feAdd(&x2, &x2, &z2)
810
+		feAdd(&z2, &x3, &z3)
811
+		feMul(&z3, &tmp0, &x2)
812
+		feMul(&z2, &z2, &tmp1)
813
+		feSquare(&tmp0, &tmp1)
814
+		feSquare(&tmp1, &x2)
815
+		feAdd(&x3, &z3, &z2)
816
+		feSub(&z2, &z3, &z2)
817
+		feMul(&x2, &tmp1, &tmp0)
818
+		feSub(&tmp1, &tmp1, &tmp0)
819
+		feSquare(&z2, &z2)
820
+		feMul121666(&z3, &tmp1)
821
+		feSquare(&x3, &x3)
822
+		feAdd(&tmp0, &tmp0, &z3)
823
+		feMul(&z3, &x1, &z2)
824
+		feMul(&z2, &tmp1, &tmp0)
825
+	}
826
+
827
+	feCSwap(&x2, &x3, swap)
828
+	feCSwap(&z2, &z3, swap)
829
+
830
+	feInvert(&z2, &z2)
831
+	feMul(&x2, &x2, &z2)
832
+	feToBytes(out, &x2)
833
+}
0 834
new file mode 100644
... ...
@@ -0,0 +1,23 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// Package curve25519 provides an implementation of scalar multiplication on
5
+// the elliptic curve known as curve25519. See https://cr.yp.to/ecdh.html
6
+package curve25519 // import "golang.org/x/crypto/curve25519"
7
+
8
+// basePoint is the x coordinate of the generator of the curve.
9
+var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
10
+
11
+// ScalarMult sets dst to the product in*base where dst and base are the x
12
+// coordinates of group points and all values are in little-endian form.
13
+func ScalarMult(dst, in, base *[32]byte) {
14
+	scalarMult(dst, in, base)
15
+}
16
+
17
+// ScalarBaseMult sets dst to the product in*base where dst and base are the x
18
+// coordinates of group points, base is the standard generator and all values
19
+// are in little-endian form.
20
+func ScalarBaseMult(dst, in *[32]byte) {
21
+	ScalarMult(dst, in, &basePoint)
22
+}
0 23
new file mode 100644
... ...
@@ -0,0 +1,73 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// This code was translated into a form compatible with 6a from the public
5
+// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
6
+
7
+// +build amd64,!gccgo,!appengine
8
+
9
+#include "const_amd64.h"
10
+
11
+// func freeze(inout *[5]uint64)
12
+TEXT ·freeze(SB),7,$0-8
13
+	MOVQ inout+0(FP), DI
14
+
15
+	MOVQ 0(DI),SI
16
+	MOVQ 8(DI),DX
17
+	MOVQ 16(DI),CX
18
+	MOVQ 24(DI),R8
19
+	MOVQ 32(DI),R9
20
+	MOVQ $REDMASK51,AX
21
+	MOVQ AX,R10
22
+	SUBQ $18,R10
23
+	MOVQ $3,R11
24
+REDUCELOOP:
25
+	MOVQ SI,R12
26
+	SHRQ $51,R12
27
+	ANDQ AX,SI
28
+	ADDQ R12,DX
29
+	MOVQ DX,R12
30
+	SHRQ $51,R12
31
+	ANDQ AX,DX
32
+	ADDQ R12,CX
33
+	MOVQ CX,R12
34
+	SHRQ $51,R12
35
+	ANDQ AX,CX
36
+	ADDQ R12,R8
37
+	MOVQ R8,R12
38
+	SHRQ $51,R12
39
+	ANDQ AX,R8
40
+	ADDQ R12,R9
41
+	MOVQ R9,R12
42
+	SHRQ $51,R12
43
+	ANDQ AX,R9
44
+	IMUL3Q $19,R12,R12
45
+	ADDQ R12,SI
46
+	SUBQ $1,R11
47
+	JA REDUCELOOP
48
+	MOVQ $1,R12
49
+	CMPQ R10,SI
50
+	CMOVQLT R11,R12
51
+	CMPQ AX,DX
52
+	CMOVQNE R11,R12
53
+	CMPQ AX,CX
54
+	CMOVQNE R11,R12
55
+	CMPQ AX,R8
56
+	CMOVQNE R11,R12
57
+	CMPQ AX,R9
58
+	CMOVQNE R11,R12
59
+	NEGQ R12
60
+	ANDQ R12,AX
61
+	ANDQ R12,R10
62
+	SUBQ R10,SI
63
+	SUBQ AX,DX
64
+	SUBQ AX,CX
65
+	SUBQ AX,R8
66
+	SUBQ AX,R9
67
+	MOVQ SI,0(DI)
68
+	MOVQ DX,8(DI)
69
+	MOVQ CX,16(DI)
70
+	MOVQ R8,24(DI)
71
+	MOVQ R9,32(DI)
72
+	RET
0 73
new file mode 100644
... ...
@@ -0,0 +1,1377 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// This code was translated into a form compatible with 6a from the public
5
+// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
6
+
7
+// +build amd64,!gccgo,!appengine
8
+
9
+#include "const_amd64.h"
10
+
11
+// func ladderstep(inout *[5][5]uint64)
12
+TEXT ·ladderstep(SB),0,$296-8
13
+	MOVQ inout+0(FP),DI
14
+
15
+	MOVQ 40(DI),SI
16
+	MOVQ 48(DI),DX
17
+	MOVQ 56(DI),CX
18
+	MOVQ 64(DI),R8
19
+	MOVQ 72(DI),R9
20
+	MOVQ SI,AX
21
+	MOVQ DX,R10
22
+	MOVQ CX,R11
23
+	MOVQ R8,R12
24
+	MOVQ R9,R13
25
+	ADDQ ·_2P0(SB),AX
26
+	ADDQ ·_2P1234(SB),R10
27
+	ADDQ ·_2P1234(SB),R11
28
+	ADDQ ·_2P1234(SB),R12
29
+	ADDQ ·_2P1234(SB),R13
30
+	ADDQ 80(DI),SI
31
+	ADDQ 88(DI),DX
32
+	ADDQ 96(DI),CX
33
+	ADDQ 104(DI),R8
34
+	ADDQ 112(DI),R9
35
+	SUBQ 80(DI),AX
36
+	SUBQ 88(DI),R10
37
+	SUBQ 96(DI),R11
38
+	SUBQ 104(DI),R12
39
+	SUBQ 112(DI),R13
40
+	MOVQ SI,0(SP)
41
+	MOVQ DX,8(SP)
42
+	MOVQ CX,16(SP)
43
+	MOVQ R8,24(SP)
44
+	MOVQ R9,32(SP)
45
+	MOVQ AX,40(SP)
46
+	MOVQ R10,48(SP)
47
+	MOVQ R11,56(SP)
48
+	MOVQ R12,64(SP)
49
+	MOVQ R13,72(SP)
50
+	MOVQ 40(SP),AX
51
+	MULQ 40(SP)
52
+	MOVQ AX,SI
53
+	MOVQ DX,CX
54
+	MOVQ 40(SP),AX
55
+	SHLQ $1,AX
56
+	MULQ 48(SP)
57
+	MOVQ AX,R8
58
+	MOVQ DX,R9
59
+	MOVQ 40(SP),AX
60
+	SHLQ $1,AX
61
+	MULQ 56(SP)
62
+	MOVQ AX,R10
63
+	MOVQ DX,R11
64
+	MOVQ 40(SP),AX
65
+	SHLQ $1,AX
66
+	MULQ 64(SP)
67
+	MOVQ AX,R12
68
+	MOVQ DX,R13
69
+	MOVQ 40(SP),AX
70
+	SHLQ $1,AX
71
+	MULQ 72(SP)
72
+	MOVQ AX,R14
73
+	MOVQ DX,R15
74
+	MOVQ 48(SP),AX
75
+	MULQ 48(SP)
76
+	ADDQ AX,R10
77
+	ADCQ DX,R11
78
+	MOVQ 48(SP),AX
79
+	SHLQ $1,AX
80
+	MULQ 56(SP)
81
+	ADDQ AX,R12
82
+	ADCQ DX,R13
83
+	MOVQ 48(SP),AX
84
+	SHLQ $1,AX
85
+	MULQ 64(SP)
86
+	ADDQ AX,R14
87
+	ADCQ DX,R15
88
+	MOVQ 48(SP),DX
89
+	IMUL3Q $38,DX,AX
90
+	MULQ 72(SP)
91
+	ADDQ AX,SI
92
+	ADCQ DX,CX
93
+	MOVQ 56(SP),AX
94
+	MULQ 56(SP)
95
+	ADDQ AX,R14
96
+	ADCQ DX,R15
97
+	MOVQ 56(SP),DX
98
+	IMUL3Q $38,DX,AX
99
+	MULQ 64(SP)
100
+	ADDQ AX,SI
101
+	ADCQ DX,CX
102
+	MOVQ 56(SP),DX
103
+	IMUL3Q $38,DX,AX
104
+	MULQ 72(SP)
105
+	ADDQ AX,R8
106
+	ADCQ DX,R9
107
+	MOVQ 64(SP),DX
108
+	IMUL3Q $19,DX,AX
109
+	MULQ 64(SP)
110
+	ADDQ AX,R8
111
+	ADCQ DX,R9
112
+	MOVQ 64(SP),DX
113
+	IMUL3Q $38,DX,AX
114
+	MULQ 72(SP)
115
+	ADDQ AX,R10
116
+	ADCQ DX,R11
117
+	MOVQ 72(SP),DX
118
+	IMUL3Q $19,DX,AX
119
+	MULQ 72(SP)
120
+	ADDQ AX,R12
121
+	ADCQ DX,R13
122
+	MOVQ $REDMASK51,DX
123
+	SHLQ $13,CX:SI
124
+	ANDQ DX,SI
125
+	SHLQ $13,R9:R8
126
+	ANDQ DX,R8
127
+	ADDQ CX,R8
128
+	SHLQ $13,R11:R10
129
+	ANDQ DX,R10
130
+	ADDQ R9,R10
131
+	SHLQ $13,R13:R12
132
+	ANDQ DX,R12
133
+	ADDQ R11,R12
134
+	SHLQ $13,R15:R14
135
+	ANDQ DX,R14
136
+	ADDQ R13,R14
137
+	IMUL3Q $19,R15,CX
138
+	ADDQ CX,SI
139
+	MOVQ SI,CX
140
+	SHRQ $51,CX
141
+	ADDQ R8,CX
142
+	ANDQ DX,SI
143
+	MOVQ CX,R8
144
+	SHRQ $51,CX
145
+	ADDQ R10,CX
146
+	ANDQ DX,R8
147
+	MOVQ CX,R9
148
+	SHRQ $51,CX
149
+	ADDQ R12,CX
150
+	ANDQ DX,R9
151
+	MOVQ CX,AX
152
+	SHRQ $51,CX
153
+	ADDQ R14,CX
154
+	ANDQ DX,AX
155
+	MOVQ CX,R10
156
+	SHRQ $51,CX
157
+	IMUL3Q $19,CX,CX
158
+	ADDQ CX,SI
159
+	ANDQ DX,R10
160
+	MOVQ SI,80(SP)
161
+	MOVQ R8,88(SP)
162
+	MOVQ R9,96(SP)
163
+	MOVQ AX,104(SP)
164
+	MOVQ R10,112(SP)
165
+	MOVQ 0(SP),AX
166
+	MULQ 0(SP)
167
+	MOVQ AX,SI
168
+	MOVQ DX,CX
169
+	MOVQ 0(SP),AX
170
+	SHLQ $1,AX
171
+	MULQ 8(SP)
172
+	MOVQ AX,R8
173
+	MOVQ DX,R9
174
+	MOVQ 0(SP),AX
175
+	SHLQ $1,AX
176
+	MULQ 16(SP)
177
+	MOVQ AX,R10
178
+	MOVQ DX,R11
179
+	MOVQ 0(SP),AX
180
+	SHLQ $1,AX
181
+	MULQ 24(SP)
182
+	MOVQ AX,R12
183
+	MOVQ DX,R13
184
+	MOVQ 0(SP),AX
185
+	SHLQ $1,AX
186
+	MULQ 32(SP)
187
+	MOVQ AX,R14
188
+	MOVQ DX,R15
189
+	MOVQ 8(SP),AX
190
+	MULQ 8(SP)
191
+	ADDQ AX,R10
192
+	ADCQ DX,R11
193
+	MOVQ 8(SP),AX
194
+	SHLQ $1,AX
195
+	MULQ 16(SP)
196
+	ADDQ AX,R12
197
+	ADCQ DX,R13
198
+	MOVQ 8(SP),AX
199
+	SHLQ $1,AX
200
+	MULQ 24(SP)
201
+	ADDQ AX,R14
202
+	ADCQ DX,R15
203
+	MOVQ 8(SP),DX
204
+	IMUL3Q $38,DX,AX
205
+	MULQ 32(SP)
206
+	ADDQ AX,SI
207
+	ADCQ DX,CX
208
+	MOVQ 16(SP),AX
209
+	MULQ 16(SP)
210
+	ADDQ AX,R14
211
+	ADCQ DX,R15
212
+	MOVQ 16(SP),DX
213
+	IMUL3Q $38,DX,AX
214
+	MULQ 24(SP)
215
+	ADDQ AX,SI
216
+	ADCQ DX,CX
217
+	MOVQ 16(SP),DX
218
+	IMUL3Q $38,DX,AX
219
+	MULQ 32(SP)
220
+	ADDQ AX,R8
221
+	ADCQ DX,R9
222
+	MOVQ 24(SP),DX
223
+	IMUL3Q $19,DX,AX
224
+	MULQ 24(SP)
225
+	ADDQ AX,R8
226
+	ADCQ DX,R9
227
+	MOVQ 24(SP),DX
228
+	IMUL3Q $38,DX,AX
229
+	MULQ 32(SP)
230
+	ADDQ AX,R10
231
+	ADCQ DX,R11
232
+	MOVQ 32(SP),DX
233
+	IMUL3Q $19,DX,AX
234
+	MULQ 32(SP)
235
+	ADDQ AX,R12
236
+	ADCQ DX,R13
237
+	MOVQ $REDMASK51,DX
238
+	SHLQ $13,CX:SI
239
+	ANDQ DX,SI
240
+	SHLQ $13,R9:R8
241
+	ANDQ DX,R8
242
+	ADDQ CX,R8
243
+	SHLQ $13,R11:R10
244
+	ANDQ DX,R10
245
+	ADDQ R9,R10
246
+	SHLQ $13,R13:R12
247
+	ANDQ DX,R12
248
+	ADDQ R11,R12
249
+	SHLQ $13,R15:R14
250
+	ANDQ DX,R14
251
+	ADDQ R13,R14
252
+	IMUL3Q $19,R15,CX
253
+	ADDQ CX,SI
254
+	MOVQ SI,CX
255
+	SHRQ $51,CX
256
+	ADDQ R8,CX
257
+	ANDQ DX,SI
258
+	MOVQ CX,R8
259
+	SHRQ $51,CX
260
+	ADDQ R10,CX
261
+	ANDQ DX,R8
262
+	MOVQ CX,R9
263
+	SHRQ $51,CX
264
+	ADDQ R12,CX
265
+	ANDQ DX,R9
266
+	MOVQ CX,AX
267
+	SHRQ $51,CX
268
+	ADDQ R14,CX
269
+	ANDQ DX,AX
270
+	MOVQ CX,R10
271
+	SHRQ $51,CX
272
+	IMUL3Q $19,CX,CX
273
+	ADDQ CX,SI
274
+	ANDQ DX,R10
275
+	MOVQ SI,120(SP)
276
+	MOVQ R8,128(SP)
277
+	MOVQ R9,136(SP)
278
+	MOVQ AX,144(SP)
279
+	MOVQ R10,152(SP)
280
+	MOVQ SI,SI
281
+	MOVQ R8,DX
282
+	MOVQ R9,CX
283
+	MOVQ AX,R8
284
+	MOVQ R10,R9
285
+	ADDQ ·_2P0(SB),SI
286
+	ADDQ ·_2P1234(SB),DX
287
+	ADDQ ·_2P1234(SB),CX
288
+	ADDQ ·_2P1234(SB),R8
289
+	ADDQ ·_2P1234(SB),R9
290
+	SUBQ 80(SP),SI
291
+	SUBQ 88(SP),DX
292
+	SUBQ 96(SP),CX
293
+	SUBQ 104(SP),R8
294
+	SUBQ 112(SP),R9
295
+	MOVQ SI,160(SP)
296
+	MOVQ DX,168(SP)
297
+	MOVQ CX,176(SP)
298
+	MOVQ R8,184(SP)
299
+	MOVQ R9,192(SP)
300
+	MOVQ 120(DI),SI
301
+	MOVQ 128(DI),DX
302
+	MOVQ 136(DI),CX
303
+	MOVQ 144(DI),R8
304
+	MOVQ 152(DI),R9
305
+	MOVQ SI,AX
306
+	MOVQ DX,R10
307
+	MOVQ CX,R11
308
+	MOVQ R8,R12
309
+	MOVQ R9,R13
310
+	ADDQ ·_2P0(SB),AX
311
+	ADDQ ·_2P1234(SB),R10
312
+	ADDQ ·_2P1234(SB),R11
313
+	ADDQ ·_2P1234(SB),R12
314
+	ADDQ ·_2P1234(SB),R13
315
+	ADDQ 160(DI),SI
316
+	ADDQ 168(DI),DX
317
+	ADDQ 176(DI),CX
318
+	ADDQ 184(DI),R8
319
+	ADDQ 192(DI),R9
320
+	SUBQ 160(DI),AX
321
+	SUBQ 168(DI),R10
322
+	SUBQ 176(DI),R11
323
+	SUBQ 184(DI),R12
324
+	SUBQ 192(DI),R13
325
+	MOVQ SI,200(SP)
326
+	MOVQ DX,208(SP)
327
+	MOVQ CX,216(SP)
328
+	MOVQ R8,224(SP)
329
+	MOVQ R9,232(SP)
330
+	MOVQ AX,240(SP)
331
+	MOVQ R10,248(SP)
332
+	MOVQ R11,256(SP)
333
+	MOVQ R12,264(SP)
334
+	MOVQ R13,272(SP)
335
+	MOVQ 224(SP),SI
336
+	IMUL3Q $19,SI,AX
337
+	MOVQ AX,280(SP)
338
+	MULQ 56(SP)
339
+	MOVQ AX,SI
340
+	MOVQ DX,CX
341
+	MOVQ 232(SP),DX
342
+	IMUL3Q $19,DX,AX
343
+	MOVQ AX,288(SP)
344
+	MULQ 48(SP)
345
+	ADDQ AX,SI
346
+	ADCQ DX,CX
347
+	MOVQ 200(SP),AX
348
+	MULQ 40(SP)
349
+	ADDQ AX,SI
350
+	ADCQ DX,CX
351
+	MOVQ 200(SP),AX
352
+	MULQ 48(SP)
353
+	MOVQ AX,R8
354
+	MOVQ DX,R9
355
+	MOVQ 200(SP),AX
356
+	MULQ 56(SP)
357
+	MOVQ AX,R10
358
+	MOVQ DX,R11
359
+	MOVQ 200(SP),AX
360
+	MULQ 64(SP)
361
+	MOVQ AX,R12
362
+	MOVQ DX,R13
363
+	MOVQ 200(SP),AX
364
+	MULQ 72(SP)
365
+	MOVQ AX,R14
366
+	MOVQ DX,R15
367
+	MOVQ 208(SP),AX
368
+	MULQ 40(SP)
369
+	ADDQ AX,R8
370
+	ADCQ DX,R9
371
+	MOVQ 208(SP),AX
372
+	MULQ 48(SP)
373
+	ADDQ AX,R10
374
+	ADCQ DX,R11
375
+	MOVQ 208(SP),AX
376
+	MULQ 56(SP)
377
+	ADDQ AX,R12
378
+	ADCQ DX,R13
379
+	MOVQ 208(SP),AX
380
+	MULQ 64(SP)
381
+	ADDQ AX,R14
382
+	ADCQ DX,R15
383
+	MOVQ 208(SP),DX
384
+	IMUL3Q $19,DX,AX
385
+	MULQ 72(SP)
386
+	ADDQ AX,SI
387
+	ADCQ DX,CX
388
+	MOVQ 216(SP),AX
389
+	MULQ 40(SP)
390
+	ADDQ AX,R10
391
+	ADCQ DX,R11
392
+	MOVQ 216(SP),AX
393
+	MULQ 48(SP)
394
+	ADDQ AX,R12
395
+	ADCQ DX,R13
396
+	MOVQ 216(SP),AX
397
+	MULQ 56(SP)
398
+	ADDQ AX,R14
399
+	ADCQ DX,R15
400
+	MOVQ 216(SP),DX
401
+	IMUL3Q $19,DX,AX
402
+	MULQ 64(SP)
403
+	ADDQ AX,SI
404
+	ADCQ DX,CX
405
+	MOVQ 216(SP),DX
406
+	IMUL3Q $19,DX,AX
407
+	MULQ 72(SP)
408
+	ADDQ AX,R8
409
+	ADCQ DX,R9
410
+	MOVQ 224(SP),AX
411
+	MULQ 40(SP)
412
+	ADDQ AX,R12
413
+	ADCQ DX,R13
414
+	MOVQ 224(SP),AX
415
+	MULQ 48(SP)
416
+	ADDQ AX,R14
417
+	ADCQ DX,R15
418
+	MOVQ 280(SP),AX
419
+	MULQ 64(SP)
420
+	ADDQ AX,R8
421
+	ADCQ DX,R9
422
+	MOVQ 280(SP),AX
423
+	MULQ 72(SP)
424
+	ADDQ AX,R10
425
+	ADCQ DX,R11
426
+	MOVQ 232(SP),AX
427
+	MULQ 40(SP)
428
+	ADDQ AX,R14
429
+	ADCQ DX,R15
430
+	MOVQ 288(SP),AX
431
+	MULQ 56(SP)
432
+	ADDQ AX,R8
433
+	ADCQ DX,R9
434
+	MOVQ 288(SP),AX
435
+	MULQ 64(SP)
436
+	ADDQ AX,R10
437
+	ADCQ DX,R11
438
+	MOVQ 288(SP),AX
439
+	MULQ 72(SP)
440
+	ADDQ AX,R12
441
+	ADCQ DX,R13
442
+	MOVQ $REDMASK51,DX
443
+	SHLQ $13,CX:SI
444
+	ANDQ DX,SI
445
+	SHLQ $13,R9:R8
446
+	ANDQ DX,R8
447
+	ADDQ CX,R8
448
+	SHLQ $13,R11:R10
449
+	ANDQ DX,R10
450
+	ADDQ R9,R10
451
+	SHLQ $13,R13:R12
452
+	ANDQ DX,R12
453
+	ADDQ R11,R12
454
+	SHLQ $13,R15:R14
455
+	ANDQ DX,R14
456
+	ADDQ R13,R14
457
+	IMUL3Q $19,R15,CX
458
+	ADDQ CX,SI
459
+	MOVQ SI,CX
460
+	SHRQ $51,CX
461
+	ADDQ R8,CX
462
+	MOVQ CX,R8
463
+	SHRQ $51,CX
464
+	ANDQ DX,SI
465
+	ADDQ R10,CX
466
+	MOVQ CX,R9
467
+	SHRQ $51,CX
468
+	ANDQ DX,R8
469
+	ADDQ R12,CX
470
+	MOVQ CX,AX
471
+	SHRQ $51,CX
472
+	ANDQ DX,R9
473
+	ADDQ R14,CX
474
+	MOVQ CX,R10
475
+	SHRQ $51,CX
476
+	ANDQ DX,AX
477
+	IMUL3Q $19,CX,CX
478
+	ADDQ CX,SI
479
+	ANDQ DX,R10
480
+	MOVQ SI,40(SP)
481
+	MOVQ R8,48(SP)
482
+	MOVQ R9,56(SP)
483
+	MOVQ AX,64(SP)
484
+	MOVQ R10,72(SP)
485
+	MOVQ 264(SP),SI
486
+	IMUL3Q $19,SI,AX
487
+	MOVQ AX,200(SP)
488
+	MULQ 16(SP)
489
+	MOVQ AX,SI
490
+	MOVQ DX,CX
491
+	MOVQ 272(SP),DX
492
+	IMUL3Q $19,DX,AX
493
+	MOVQ AX,208(SP)
494
+	MULQ 8(SP)
495
+	ADDQ AX,SI
496
+	ADCQ DX,CX
497
+	MOVQ 240(SP),AX
498
+	MULQ 0(SP)
499
+	ADDQ AX,SI
500
+	ADCQ DX,CX
501
+	MOVQ 240(SP),AX
502
+	MULQ 8(SP)
503
+	MOVQ AX,R8
504
+	MOVQ DX,R9
505
+	MOVQ 240(SP),AX
506
+	MULQ 16(SP)
507
+	MOVQ AX,R10
508
+	MOVQ DX,R11
509
+	MOVQ 240(SP),AX
510
+	MULQ 24(SP)
511
+	MOVQ AX,R12
512
+	MOVQ DX,R13
513
+	MOVQ 240(SP),AX
514
+	MULQ 32(SP)
515
+	MOVQ AX,R14
516
+	MOVQ DX,R15
517
+	MOVQ 248(SP),AX
518
+	MULQ 0(SP)
519
+	ADDQ AX,R8
520
+	ADCQ DX,R9
521
+	MOVQ 248(SP),AX
522
+	MULQ 8(SP)
523
+	ADDQ AX,R10
524
+	ADCQ DX,R11
525
+	MOVQ 248(SP),AX
526
+	MULQ 16(SP)
527
+	ADDQ AX,R12
528
+	ADCQ DX,R13
529
+	MOVQ 248(SP),AX
530
+	MULQ 24(SP)
531
+	ADDQ AX,R14
532
+	ADCQ DX,R15
533
+	MOVQ 248(SP),DX
534
+	IMUL3Q $19,DX,AX
535
+	MULQ 32(SP)
536
+	ADDQ AX,SI
537
+	ADCQ DX,CX
538
+	MOVQ 256(SP),AX
539
+	MULQ 0(SP)
540
+	ADDQ AX,R10
541
+	ADCQ DX,R11
542
+	MOVQ 256(SP),AX
543
+	MULQ 8(SP)
544
+	ADDQ AX,R12
545
+	ADCQ DX,R13
546
+	MOVQ 256(SP),AX
547
+	MULQ 16(SP)
548
+	ADDQ AX,R14
549
+	ADCQ DX,R15
550
+	MOVQ 256(SP),DX
551
+	IMUL3Q $19,DX,AX
552
+	MULQ 24(SP)
553
+	ADDQ AX,SI
554
+	ADCQ DX,CX
555
+	MOVQ 256(SP),DX
556
+	IMUL3Q $19,DX,AX
557
+	MULQ 32(SP)
558
+	ADDQ AX,R8
559
+	ADCQ DX,R9
560
+	MOVQ 264(SP),AX
561
+	MULQ 0(SP)
562
+	ADDQ AX,R12
563
+	ADCQ DX,R13
564
+	MOVQ 264(SP),AX
565
+	MULQ 8(SP)
566
+	ADDQ AX,R14
567
+	ADCQ DX,R15
568
+	MOVQ 200(SP),AX
569
+	MULQ 24(SP)
570
+	ADDQ AX,R8
571
+	ADCQ DX,R9
572
+	MOVQ 200(SP),AX
573
+	MULQ 32(SP)
574
+	ADDQ AX,R10
575
+	ADCQ DX,R11
576
+	MOVQ 272(SP),AX
577
+	MULQ 0(SP)
578
+	ADDQ AX,R14
579
+	ADCQ DX,R15
580
+	MOVQ 208(SP),AX
581
+	MULQ 16(SP)
582
+	ADDQ AX,R8
583
+	ADCQ DX,R9
584
+	MOVQ 208(SP),AX
585
+	MULQ 24(SP)
586
+	ADDQ AX,R10
587
+	ADCQ DX,R11
588
+	MOVQ 208(SP),AX
589
+	MULQ 32(SP)
590
+	ADDQ AX,R12
591
+	ADCQ DX,R13
592
+	MOVQ $REDMASK51,DX
593
+	SHLQ $13,CX:SI
594
+	ANDQ DX,SI
595
+	SHLQ $13,R9:R8
596
+	ANDQ DX,R8
597
+	ADDQ CX,R8
598
+	SHLQ $13,R11:R10
599
+	ANDQ DX,R10
600
+	ADDQ R9,R10
601
+	SHLQ $13,R13:R12
602
+	ANDQ DX,R12
603
+	ADDQ R11,R12
604
+	SHLQ $13,R15:R14
605
+	ANDQ DX,R14
606
+	ADDQ R13,R14
607
+	IMUL3Q $19,R15,CX
608
+	ADDQ CX,SI
609
+	MOVQ SI,CX
610
+	SHRQ $51,CX
611
+	ADDQ R8,CX
612
+	MOVQ CX,R8
613
+	SHRQ $51,CX
614
+	ANDQ DX,SI
615
+	ADDQ R10,CX
616
+	MOVQ CX,R9
617
+	SHRQ $51,CX
618
+	ANDQ DX,R8
619
+	ADDQ R12,CX
620
+	MOVQ CX,AX
621
+	SHRQ $51,CX
622
+	ANDQ DX,R9
623
+	ADDQ R14,CX
624
+	MOVQ CX,R10
625
+	SHRQ $51,CX
626
+	ANDQ DX,AX
627
+	IMUL3Q $19,CX,CX
628
+	ADDQ CX,SI
629
+	ANDQ DX,R10
630
+	MOVQ SI,DX
631
+	MOVQ R8,CX
632
+	MOVQ R9,R11
633
+	MOVQ AX,R12
634
+	MOVQ R10,R13
635
+	ADDQ ·_2P0(SB),DX
636
+	ADDQ ·_2P1234(SB),CX
637
+	ADDQ ·_2P1234(SB),R11
638
+	ADDQ ·_2P1234(SB),R12
639
+	ADDQ ·_2P1234(SB),R13
640
+	ADDQ 40(SP),SI
641
+	ADDQ 48(SP),R8
642
+	ADDQ 56(SP),R9
643
+	ADDQ 64(SP),AX
644
+	ADDQ 72(SP),R10
645
+	SUBQ 40(SP),DX
646
+	SUBQ 48(SP),CX
647
+	SUBQ 56(SP),R11
648
+	SUBQ 64(SP),R12
649
+	SUBQ 72(SP),R13
650
+	MOVQ SI,120(DI)
651
+	MOVQ R8,128(DI)
652
+	MOVQ R9,136(DI)
653
+	MOVQ AX,144(DI)
654
+	MOVQ R10,152(DI)
655
+	MOVQ DX,160(DI)
656
+	MOVQ CX,168(DI)
657
+	MOVQ R11,176(DI)
658
+	MOVQ R12,184(DI)
659
+	MOVQ R13,192(DI)
660
+	MOVQ 120(DI),AX
661
+	MULQ 120(DI)
662
+	MOVQ AX,SI
663
+	MOVQ DX,CX
664
+	MOVQ 120(DI),AX
665
+	SHLQ $1,AX
666
+	MULQ 128(DI)
667
+	MOVQ AX,R8
668
+	MOVQ DX,R9
669
+	MOVQ 120(DI),AX
670
+	SHLQ $1,AX
671
+	MULQ 136(DI)
672
+	MOVQ AX,R10
673
+	MOVQ DX,R11
674
+	MOVQ 120(DI),AX
675
+	SHLQ $1,AX
676
+	MULQ 144(DI)
677
+	MOVQ AX,R12
678
+	MOVQ DX,R13
679
+	MOVQ 120(DI),AX
680
+	SHLQ $1,AX
681
+	MULQ 152(DI)
682
+	MOVQ AX,R14
683
+	MOVQ DX,R15
684
+	MOVQ 128(DI),AX
685
+	MULQ 128(DI)
686
+	ADDQ AX,R10
687
+	ADCQ DX,R11
688
+	MOVQ 128(DI),AX
689
+	SHLQ $1,AX
690
+	MULQ 136(DI)
691
+	ADDQ AX,R12
692
+	ADCQ DX,R13
693
+	MOVQ 128(DI),AX
694
+	SHLQ $1,AX
695
+	MULQ 144(DI)
696
+	ADDQ AX,R14
697
+	ADCQ DX,R15
698
+	MOVQ 128(DI),DX
699
+	IMUL3Q $38,DX,AX
700
+	MULQ 152(DI)
701
+	ADDQ AX,SI
702
+	ADCQ DX,CX
703
+	MOVQ 136(DI),AX
704
+	MULQ 136(DI)
705
+	ADDQ AX,R14
706
+	ADCQ DX,R15
707
+	MOVQ 136(DI),DX
708
+	IMUL3Q $38,DX,AX
709
+	MULQ 144(DI)
710
+	ADDQ AX,SI
711
+	ADCQ DX,CX
712
+	MOVQ 136(DI),DX
713
+	IMUL3Q $38,DX,AX
714
+	MULQ 152(DI)
715
+	ADDQ AX,R8
716
+	ADCQ DX,R9
717
+	MOVQ 144(DI),DX
718
+	IMUL3Q $19,DX,AX
719
+	MULQ 144(DI)
720
+	ADDQ AX,R8
721
+	ADCQ DX,R9
722
+	MOVQ 144(DI),DX
723
+	IMUL3Q $38,DX,AX
724
+	MULQ 152(DI)
725
+	ADDQ AX,R10
726
+	ADCQ DX,R11
727
+	MOVQ 152(DI),DX
728
+	IMUL3Q $19,DX,AX
729
+	MULQ 152(DI)
730
+	ADDQ AX,R12
731
+	ADCQ DX,R13
732
+	MOVQ $REDMASK51,DX
733
+	SHLQ $13,CX:SI
734
+	ANDQ DX,SI
735
+	SHLQ $13,R9:R8
736
+	ANDQ DX,R8
737
+	ADDQ CX,R8
738
+	SHLQ $13,R11:R10
739
+	ANDQ DX,R10
740
+	ADDQ R9,R10
741
+	SHLQ $13,R13:R12
742
+	ANDQ DX,R12
743
+	ADDQ R11,R12
744
+	SHLQ $13,R15:R14
745
+	ANDQ DX,R14
746
+	ADDQ R13,R14
747
+	IMUL3Q $19,R15,CX
748
+	ADDQ CX,SI
749
+	MOVQ SI,CX
750
+	SHRQ $51,CX
751
+	ADDQ R8,CX
752
+	ANDQ DX,SI
753
+	MOVQ CX,R8
754
+	SHRQ $51,CX
755
+	ADDQ R10,CX
756
+	ANDQ DX,R8
757
+	MOVQ CX,R9
758
+	SHRQ $51,CX
759
+	ADDQ R12,CX
760
+	ANDQ DX,R9
761
+	MOVQ CX,AX
762
+	SHRQ $51,CX
763
+	ADDQ R14,CX
764
+	ANDQ DX,AX
765
+	MOVQ CX,R10
766
+	SHRQ $51,CX
767
+	IMUL3Q $19,CX,CX
768
+	ADDQ CX,SI
769
+	ANDQ DX,R10
770
+	MOVQ SI,120(DI)
771
+	MOVQ R8,128(DI)
772
+	MOVQ R9,136(DI)
773
+	MOVQ AX,144(DI)
774
+	MOVQ R10,152(DI)
775
+	MOVQ 160(DI),AX
776
+	MULQ 160(DI)
777
+	MOVQ AX,SI
778
+	MOVQ DX,CX
779
+	MOVQ 160(DI),AX
780
+	SHLQ $1,AX
781
+	MULQ 168(DI)
782
+	MOVQ AX,R8
783
+	MOVQ DX,R9
784
+	MOVQ 160(DI),AX
785
+	SHLQ $1,AX
786
+	MULQ 176(DI)
787
+	MOVQ AX,R10
788
+	MOVQ DX,R11
789
+	MOVQ 160(DI),AX
790
+	SHLQ $1,AX
791
+	MULQ 184(DI)
792
+	MOVQ AX,R12
793
+	MOVQ DX,R13
794
+	MOVQ 160(DI),AX
795
+	SHLQ $1,AX
796
+	MULQ 192(DI)
797
+	MOVQ AX,R14
798
+	MOVQ DX,R15
799
+	MOVQ 168(DI),AX
800
+	MULQ 168(DI)
801
+	ADDQ AX,R10
802
+	ADCQ DX,R11
803
+	MOVQ 168(DI),AX
804
+	SHLQ $1,AX
805
+	MULQ 176(DI)
806
+	ADDQ AX,R12
807
+	ADCQ DX,R13
808
+	MOVQ 168(DI),AX
809
+	SHLQ $1,AX
810
+	MULQ 184(DI)
811
+	ADDQ AX,R14
812
+	ADCQ DX,R15
813
+	MOVQ 168(DI),DX
814
+	IMUL3Q $38,DX,AX
815
+	MULQ 192(DI)
816
+	ADDQ AX,SI
817
+	ADCQ DX,CX
818
+	MOVQ 176(DI),AX
819
+	MULQ 176(DI)
820
+	ADDQ AX,R14
821
+	ADCQ DX,R15
822
+	MOVQ 176(DI),DX
823
+	IMUL3Q $38,DX,AX
824
+	MULQ 184(DI)
825
+	ADDQ AX,SI
826
+	ADCQ DX,CX
827
+	MOVQ 176(DI),DX
828
+	IMUL3Q $38,DX,AX
829
+	MULQ 192(DI)
830
+	ADDQ AX,R8
831
+	ADCQ DX,R9
832
+	MOVQ 184(DI),DX
833
+	IMUL3Q $19,DX,AX
834
+	MULQ 184(DI)
835
+	ADDQ AX,R8
836
+	ADCQ DX,R9
837
+	MOVQ 184(DI),DX
838
+	IMUL3Q $38,DX,AX
839
+	MULQ 192(DI)
840
+	ADDQ AX,R10
841
+	ADCQ DX,R11
842
+	MOVQ 192(DI),DX
843
+	IMUL3Q $19,DX,AX
844
+	MULQ 192(DI)
845
+	ADDQ AX,R12
846
+	ADCQ DX,R13
847
+	MOVQ $REDMASK51,DX
848
+	SHLQ $13,CX:SI
849
+	ANDQ DX,SI
850
+	SHLQ $13,R9:R8
851
+	ANDQ DX,R8
852
+	ADDQ CX,R8
853
+	SHLQ $13,R11:R10
854
+	ANDQ DX,R10
855
+	ADDQ R9,R10
856
+	SHLQ $13,R13:R12
857
+	ANDQ DX,R12
858
+	ADDQ R11,R12
859
+	SHLQ $13,R15:R14
860
+	ANDQ DX,R14
861
+	ADDQ R13,R14
862
+	IMUL3Q $19,R15,CX
863
+	ADDQ CX,SI
864
+	MOVQ SI,CX
865
+	SHRQ $51,CX
866
+	ADDQ R8,CX
867
+	ANDQ DX,SI
868
+	MOVQ CX,R8
869
+	SHRQ $51,CX
870
+	ADDQ R10,CX
871
+	ANDQ DX,R8
872
+	MOVQ CX,R9
873
+	SHRQ $51,CX
874
+	ADDQ R12,CX
875
+	ANDQ DX,R9
876
+	MOVQ CX,AX
877
+	SHRQ $51,CX
878
+	ADDQ R14,CX
879
+	ANDQ DX,AX
880
+	MOVQ CX,R10
881
+	SHRQ $51,CX
882
+	IMUL3Q $19,CX,CX
883
+	ADDQ CX,SI
884
+	ANDQ DX,R10
885
+	MOVQ SI,160(DI)
886
+	MOVQ R8,168(DI)
887
+	MOVQ R9,176(DI)
888
+	MOVQ AX,184(DI)
889
+	MOVQ R10,192(DI)
890
+	MOVQ 184(DI),SI
891
+	IMUL3Q $19,SI,AX
892
+	MOVQ AX,0(SP)
893
+	MULQ 16(DI)
894
+	MOVQ AX,SI
895
+	MOVQ DX,CX
896
+	MOVQ 192(DI),DX
897
+	IMUL3Q $19,DX,AX
898
+	MOVQ AX,8(SP)
899
+	MULQ 8(DI)
900
+	ADDQ AX,SI
901
+	ADCQ DX,CX
902
+	MOVQ 160(DI),AX
903
+	MULQ 0(DI)
904
+	ADDQ AX,SI
905
+	ADCQ DX,CX
906
+	MOVQ 160(DI),AX
907
+	MULQ 8(DI)
908
+	MOVQ AX,R8
909
+	MOVQ DX,R9
910
+	MOVQ 160(DI),AX
911
+	MULQ 16(DI)
912
+	MOVQ AX,R10
913
+	MOVQ DX,R11
914
+	MOVQ 160(DI),AX
915
+	MULQ 24(DI)
916
+	MOVQ AX,R12
917
+	MOVQ DX,R13
918
+	MOVQ 160(DI),AX
919
+	MULQ 32(DI)
920
+	MOVQ AX,R14
921
+	MOVQ DX,R15
922
+	MOVQ 168(DI),AX
923
+	MULQ 0(DI)
924
+	ADDQ AX,R8
925
+	ADCQ DX,R9
926
+	MOVQ 168(DI),AX
927
+	MULQ 8(DI)
928
+	ADDQ AX,R10
929
+	ADCQ DX,R11
930
+	MOVQ 168(DI),AX
931
+	MULQ 16(DI)
932
+	ADDQ AX,R12
933
+	ADCQ DX,R13
934
+	MOVQ 168(DI),AX
935
+	MULQ 24(DI)
936
+	ADDQ AX,R14
937
+	ADCQ DX,R15
938
+	MOVQ 168(DI),DX
939
+	IMUL3Q $19,DX,AX
940
+	MULQ 32(DI)
941
+	ADDQ AX,SI
942
+	ADCQ DX,CX
943
+	MOVQ 176(DI),AX
944
+	MULQ 0(DI)
945
+	ADDQ AX,R10
946
+	ADCQ DX,R11
947
+	MOVQ 176(DI),AX
948
+	MULQ 8(DI)
949
+	ADDQ AX,R12
950
+	ADCQ DX,R13
951
+	MOVQ 176(DI),AX
952
+	MULQ 16(DI)
953
+	ADDQ AX,R14
954
+	ADCQ DX,R15
955
+	MOVQ 176(DI),DX
956
+	IMUL3Q $19,DX,AX
957
+	MULQ 24(DI)
958
+	ADDQ AX,SI
959
+	ADCQ DX,CX
960
+	MOVQ 176(DI),DX
961
+	IMUL3Q $19,DX,AX
962
+	MULQ 32(DI)
963
+	ADDQ AX,R8
964
+	ADCQ DX,R9
965
+	MOVQ 184(DI),AX
966
+	MULQ 0(DI)
967
+	ADDQ AX,R12
968
+	ADCQ DX,R13
969
+	MOVQ 184(DI),AX
970
+	MULQ 8(DI)
971
+	ADDQ AX,R14
972
+	ADCQ DX,R15
973
+	MOVQ 0(SP),AX
974
+	MULQ 24(DI)
975
+	ADDQ AX,R8
976
+	ADCQ DX,R9
977
+	MOVQ 0(SP),AX
978
+	MULQ 32(DI)
979
+	ADDQ AX,R10
980
+	ADCQ DX,R11
981
+	MOVQ 192(DI),AX
982
+	MULQ 0(DI)
983
+	ADDQ AX,R14
984
+	ADCQ DX,R15
985
+	MOVQ 8(SP),AX
986
+	MULQ 16(DI)
987
+	ADDQ AX,R8
988
+	ADCQ DX,R9
989
+	MOVQ 8(SP),AX
990
+	MULQ 24(DI)
991
+	ADDQ AX,R10
992
+	ADCQ DX,R11
993
+	MOVQ 8(SP),AX
994
+	MULQ 32(DI)
995
+	ADDQ AX,R12
996
+	ADCQ DX,R13
997
+	MOVQ $REDMASK51,DX
998
+	SHLQ $13,CX:SI
999
+	ANDQ DX,SI
1000
+	SHLQ $13,R9:R8
1001
+	ANDQ DX,R8
1002
+	ADDQ CX,R8
1003
+	SHLQ $13,R11:R10
1004
+	ANDQ DX,R10
1005
+	ADDQ R9,R10
1006
+	SHLQ $13,R13:R12
1007
+	ANDQ DX,R12
1008
+	ADDQ R11,R12
1009
+	SHLQ $13,R15:R14
1010
+	ANDQ DX,R14
1011
+	ADDQ R13,R14
1012
+	IMUL3Q $19,R15,CX
1013
+	ADDQ CX,SI
1014
+	MOVQ SI,CX
1015
+	SHRQ $51,CX
1016
+	ADDQ R8,CX
1017
+	MOVQ CX,R8
1018
+	SHRQ $51,CX
1019
+	ANDQ DX,SI
1020
+	ADDQ R10,CX
1021
+	MOVQ CX,R9
1022
+	SHRQ $51,CX
1023
+	ANDQ DX,R8
1024
+	ADDQ R12,CX
1025
+	MOVQ CX,AX
1026
+	SHRQ $51,CX
1027
+	ANDQ DX,R9
1028
+	ADDQ R14,CX
1029
+	MOVQ CX,R10
1030
+	SHRQ $51,CX
1031
+	ANDQ DX,AX
1032
+	IMUL3Q $19,CX,CX
1033
+	ADDQ CX,SI
1034
+	ANDQ DX,R10
1035
+	MOVQ SI,160(DI)
1036
+	MOVQ R8,168(DI)
1037
+	MOVQ R9,176(DI)
1038
+	MOVQ AX,184(DI)
1039
+	MOVQ R10,192(DI)
1040
+	MOVQ 144(SP),SI
1041
+	IMUL3Q $19,SI,AX
1042
+	MOVQ AX,0(SP)
1043
+	MULQ 96(SP)
1044
+	MOVQ AX,SI
1045
+	MOVQ DX,CX
1046
+	MOVQ 152(SP),DX
1047
+	IMUL3Q $19,DX,AX
1048
+	MOVQ AX,8(SP)
1049
+	MULQ 88(SP)
1050
+	ADDQ AX,SI
1051
+	ADCQ DX,CX
1052
+	MOVQ 120(SP),AX
1053
+	MULQ 80(SP)
1054
+	ADDQ AX,SI
1055
+	ADCQ DX,CX
1056
+	MOVQ 120(SP),AX
1057
+	MULQ 88(SP)
1058
+	MOVQ AX,R8
1059
+	MOVQ DX,R9
1060
+	MOVQ 120(SP),AX
1061
+	MULQ 96(SP)
1062
+	MOVQ AX,R10
1063
+	MOVQ DX,R11
1064
+	MOVQ 120(SP),AX
1065
+	MULQ 104(SP)
1066
+	MOVQ AX,R12
1067
+	MOVQ DX,R13
1068
+	MOVQ 120(SP),AX
1069
+	MULQ 112(SP)
1070
+	MOVQ AX,R14
1071
+	MOVQ DX,R15
1072
+	MOVQ 128(SP),AX
1073
+	MULQ 80(SP)
1074
+	ADDQ AX,R8
1075
+	ADCQ DX,R9
1076
+	MOVQ 128(SP),AX
1077
+	MULQ 88(SP)
1078
+	ADDQ AX,R10
1079
+	ADCQ DX,R11
1080
+	MOVQ 128(SP),AX
1081
+	MULQ 96(SP)
1082
+	ADDQ AX,R12
1083
+	ADCQ DX,R13
1084
+	MOVQ 128(SP),AX
1085
+	MULQ 104(SP)
1086
+	ADDQ AX,R14
1087
+	ADCQ DX,R15
1088
+	MOVQ 128(SP),DX
1089
+	IMUL3Q $19,DX,AX
1090
+	MULQ 112(SP)
1091
+	ADDQ AX,SI
1092
+	ADCQ DX,CX
1093
+	MOVQ 136(SP),AX
1094
+	MULQ 80(SP)
1095
+	ADDQ AX,R10
1096
+	ADCQ DX,R11
1097
+	MOVQ 136(SP),AX
1098
+	MULQ 88(SP)
1099
+	ADDQ AX,R12
1100
+	ADCQ DX,R13
1101
+	MOVQ 136(SP),AX
1102
+	MULQ 96(SP)
1103
+	ADDQ AX,R14
1104
+	ADCQ DX,R15
1105
+	MOVQ 136(SP),DX
1106
+	IMUL3Q $19,DX,AX
1107
+	MULQ 104(SP)
1108
+	ADDQ AX,SI
1109
+	ADCQ DX,CX
1110
+	MOVQ 136(SP),DX
1111
+	IMUL3Q $19,DX,AX
1112
+	MULQ 112(SP)
1113
+	ADDQ AX,R8
1114
+	ADCQ DX,R9
1115
+	MOVQ 144(SP),AX
1116
+	MULQ 80(SP)
1117
+	ADDQ AX,R12
1118
+	ADCQ DX,R13
1119
+	MOVQ 144(SP),AX
1120
+	MULQ 88(SP)
1121
+	ADDQ AX,R14
1122
+	ADCQ DX,R15
1123
+	MOVQ 0(SP),AX
1124
+	MULQ 104(SP)
1125
+	ADDQ AX,R8
1126
+	ADCQ DX,R9
1127
+	MOVQ 0(SP),AX
1128
+	MULQ 112(SP)
1129
+	ADDQ AX,R10
1130
+	ADCQ DX,R11
1131
+	MOVQ 152(SP),AX
1132
+	MULQ 80(SP)
1133
+	ADDQ AX,R14
1134
+	ADCQ DX,R15
1135
+	MOVQ 8(SP),AX
1136
+	MULQ 96(SP)
1137
+	ADDQ AX,R8
1138
+	ADCQ DX,R9
1139
+	MOVQ 8(SP),AX
1140
+	MULQ 104(SP)
1141
+	ADDQ AX,R10
1142
+	ADCQ DX,R11
1143
+	MOVQ 8(SP),AX
1144
+	MULQ 112(SP)
1145
+	ADDQ AX,R12
1146
+	ADCQ DX,R13
1147
+	MOVQ $REDMASK51,DX
1148
+	SHLQ $13,CX:SI
1149
+	ANDQ DX,SI
1150
+	SHLQ $13,R9:R8
1151
+	ANDQ DX,R8
1152
+	ADDQ CX,R8
1153
+	SHLQ $13,R11:R10
1154
+	ANDQ DX,R10
1155
+	ADDQ R9,R10
1156
+	SHLQ $13,R13:R12
1157
+	ANDQ DX,R12
1158
+	ADDQ R11,R12
1159
+	SHLQ $13,R15:R14
1160
+	ANDQ DX,R14
1161
+	ADDQ R13,R14
1162
+	IMUL3Q $19,R15,CX
1163
+	ADDQ CX,SI
1164
+	MOVQ SI,CX
1165
+	SHRQ $51,CX
1166
+	ADDQ R8,CX
1167
+	MOVQ CX,R8
1168
+	SHRQ $51,CX
1169
+	ANDQ DX,SI
1170
+	ADDQ R10,CX
1171
+	MOVQ CX,R9
1172
+	SHRQ $51,CX
1173
+	ANDQ DX,R8
1174
+	ADDQ R12,CX
1175
+	MOVQ CX,AX
1176
+	SHRQ $51,CX
1177
+	ANDQ DX,R9
1178
+	ADDQ R14,CX
1179
+	MOVQ CX,R10
1180
+	SHRQ $51,CX
1181
+	ANDQ DX,AX
1182
+	IMUL3Q $19,CX,CX
1183
+	ADDQ CX,SI
1184
+	ANDQ DX,R10
1185
+	MOVQ SI,40(DI)
1186
+	MOVQ R8,48(DI)
1187
+	MOVQ R9,56(DI)
1188
+	MOVQ AX,64(DI)
1189
+	MOVQ R10,72(DI)
1190
+	MOVQ 160(SP),AX
1191
+	MULQ ·_121666_213(SB)
1192
+	SHRQ $13,AX
1193
+	MOVQ AX,SI
1194
+	MOVQ DX,CX
1195
+	MOVQ 168(SP),AX
1196
+	MULQ ·_121666_213(SB)
1197
+	SHRQ $13,AX
1198
+	ADDQ AX,CX
1199
+	MOVQ DX,R8
1200
+	MOVQ 176(SP),AX
1201
+	MULQ ·_121666_213(SB)
1202
+	SHRQ $13,AX
1203
+	ADDQ AX,R8
1204
+	MOVQ DX,R9
1205
+	MOVQ 184(SP),AX
1206
+	MULQ ·_121666_213(SB)
1207
+	SHRQ $13,AX
1208
+	ADDQ AX,R9
1209
+	MOVQ DX,R10
1210
+	MOVQ 192(SP),AX
1211
+	MULQ ·_121666_213(SB)
1212
+	SHRQ $13,AX
1213
+	ADDQ AX,R10
1214
+	IMUL3Q $19,DX,DX
1215
+	ADDQ DX,SI
1216
+	ADDQ 80(SP),SI
1217
+	ADDQ 88(SP),CX
1218
+	ADDQ 96(SP),R8
1219
+	ADDQ 104(SP),R9
1220
+	ADDQ 112(SP),R10
1221
+	MOVQ SI,80(DI)
1222
+	MOVQ CX,88(DI)
1223
+	MOVQ R8,96(DI)
1224
+	MOVQ R9,104(DI)
1225
+	MOVQ R10,112(DI)
1226
+	MOVQ 104(DI),SI
1227
+	IMUL3Q $19,SI,AX
1228
+	MOVQ AX,0(SP)
1229
+	MULQ 176(SP)
1230
+	MOVQ AX,SI
1231
+	MOVQ DX,CX
1232
+	MOVQ 112(DI),DX
1233
+	IMUL3Q $19,DX,AX
1234
+	MOVQ AX,8(SP)
1235
+	MULQ 168(SP)
1236
+	ADDQ AX,SI
1237
+	ADCQ DX,CX
1238
+	MOVQ 80(DI),AX
1239
+	MULQ 160(SP)
1240
+	ADDQ AX,SI
1241
+	ADCQ DX,CX
1242
+	MOVQ 80(DI),AX
1243
+	MULQ 168(SP)
1244
+	MOVQ AX,R8
1245
+	MOVQ DX,R9
1246
+	MOVQ 80(DI),AX
1247
+	MULQ 176(SP)
1248
+	MOVQ AX,R10
1249
+	MOVQ DX,R11
1250
+	MOVQ 80(DI),AX
1251
+	MULQ 184(SP)
1252
+	MOVQ AX,R12
1253
+	MOVQ DX,R13
1254
+	MOVQ 80(DI),AX
1255
+	MULQ 192(SP)
1256
+	MOVQ AX,R14
1257
+	MOVQ DX,R15
1258
+	MOVQ 88(DI),AX
1259
+	MULQ 160(SP)
1260
+	ADDQ AX,R8
1261
+	ADCQ DX,R9
1262
+	MOVQ 88(DI),AX
1263
+	MULQ 168(SP)
1264
+	ADDQ AX,R10
1265
+	ADCQ DX,R11
1266
+	MOVQ 88(DI),AX
1267
+	MULQ 176(SP)
1268
+	ADDQ AX,R12
1269
+	ADCQ DX,R13
1270
+	MOVQ 88(DI),AX
1271
+	MULQ 184(SP)
1272
+	ADDQ AX,R14
1273
+	ADCQ DX,R15
1274
+	MOVQ 88(DI),DX
1275
+	IMUL3Q $19,DX,AX
1276
+	MULQ 192(SP)
1277
+	ADDQ AX,SI
1278
+	ADCQ DX,CX
1279
+	MOVQ 96(DI),AX
1280
+	MULQ 160(SP)
1281
+	ADDQ AX,R10
1282
+	ADCQ DX,R11
1283
+	MOVQ 96(DI),AX
1284
+	MULQ 168(SP)
1285
+	ADDQ AX,R12
1286
+	ADCQ DX,R13
1287
+	MOVQ 96(DI),AX
1288
+	MULQ 176(SP)
1289
+	ADDQ AX,R14
1290
+	ADCQ DX,R15
1291
+	MOVQ 96(DI),DX
1292
+	IMUL3Q $19,DX,AX
1293
+	MULQ 184(SP)
1294
+	ADDQ AX,SI
1295
+	ADCQ DX,CX
1296
+	MOVQ 96(DI),DX
1297
+	IMUL3Q $19,DX,AX
1298
+	MULQ 192(SP)
1299
+	ADDQ AX,R8
1300
+	ADCQ DX,R9
1301
+	MOVQ 104(DI),AX
1302
+	MULQ 160(SP)
1303
+	ADDQ AX,R12
1304
+	ADCQ DX,R13
1305
+	MOVQ 104(DI),AX
1306
+	MULQ 168(SP)
1307
+	ADDQ AX,R14
1308
+	ADCQ DX,R15
1309
+	MOVQ 0(SP),AX
1310
+	MULQ 184(SP)
1311
+	ADDQ AX,R8
1312
+	ADCQ DX,R9
1313
+	MOVQ 0(SP),AX
1314
+	MULQ 192(SP)
1315
+	ADDQ AX,R10
1316
+	ADCQ DX,R11
1317
+	MOVQ 112(DI),AX
1318
+	MULQ 160(SP)
1319
+	ADDQ AX,R14
1320
+	ADCQ DX,R15
1321
+	MOVQ 8(SP),AX
1322
+	MULQ 176(SP)
1323
+	ADDQ AX,R8
1324
+	ADCQ DX,R9
1325
+	MOVQ 8(SP),AX
1326
+	MULQ 184(SP)
1327
+	ADDQ AX,R10
1328
+	ADCQ DX,R11
1329
+	MOVQ 8(SP),AX
1330
+	MULQ 192(SP)
1331
+	ADDQ AX,R12
1332
+	ADCQ DX,R13
1333
+	MOVQ $REDMASK51,DX
1334
+	SHLQ $13,CX:SI
1335
+	ANDQ DX,SI
1336
+	SHLQ $13,R9:R8
1337
+	ANDQ DX,R8
1338
+	ADDQ CX,R8
1339
+	SHLQ $13,R11:R10
1340
+	ANDQ DX,R10
1341
+	ADDQ R9,R10
1342
+	SHLQ $13,R13:R12
1343
+	ANDQ DX,R12
1344
+	ADDQ R11,R12
1345
+	SHLQ $13,R15:R14
1346
+	ANDQ DX,R14
1347
+	ADDQ R13,R14
1348
+	IMUL3Q $19,R15,CX
1349
+	ADDQ CX,SI
1350
+	MOVQ SI,CX
1351
+	SHRQ $51,CX
1352
+	ADDQ R8,CX
1353
+	MOVQ CX,R8
1354
+	SHRQ $51,CX
1355
+	ANDQ DX,SI
1356
+	ADDQ R10,CX
1357
+	MOVQ CX,R9
1358
+	SHRQ $51,CX
1359
+	ANDQ DX,R8
1360
+	ADDQ R12,CX
1361
+	MOVQ CX,AX
1362
+	SHRQ $51,CX
1363
+	ANDQ DX,R9
1364
+	ADDQ R14,CX
1365
+	MOVQ CX,R10
1366
+	SHRQ $51,CX
1367
+	ANDQ DX,AX
1368
+	IMUL3Q $19,CX,CX
1369
+	ADDQ CX,SI
1370
+	ANDQ DX,R10
1371
+	MOVQ SI,80(DI)
1372
+	MOVQ R8,88(DI)
1373
+	MOVQ R9,96(DI)
1374
+	MOVQ AX,104(DI)
1375
+	MOVQ R10,112(DI)
1376
+	RET
0 1377
new file mode 100644
... ...
@@ -0,0 +1,240 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build amd64,!gccgo,!appengine
5
+
6
+package curve25519
7
+
8
+// These functions are implemented in the .s files. The names of the functions
9
+// in the rest of the file are also taken from the SUPERCOP sources to help
10
+// people following along.
11
+
12
+//go:noescape
13
+
14
+func cswap(inout *[5]uint64, v uint64)
15
+
16
+//go:noescape
17
+
18
+func ladderstep(inout *[5][5]uint64)
19
+
20
+//go:noescape
21
+
22
+func freeze(inout *[5]uint64)
23
+
24
+//go:noescape
25
+
26
+func mul(dest, a, b *[5]uint64)
27
+
28
+//go:noescape
29
+
30
+func square(out, in *[5]uint64)
31
+
32
+// mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
33
+func mladder(xr, zr *[5]uint64, s *[32]byte) {
34
+	var work [5][5]uint64
35
+
36
+	work[0] = *xr
37
+	setint(&work[1], 1)
38
+	setint(&work[2], 0)
39
+	work[3] = *xr
40
+	setint(&work[4], 1)
41
+
42
+	j := uint(6)
43
+	var prevbit byte
44
+
45
+	for i := 31; i >= 0; i-- {
46
+		for j < 8 {
47
+			bit := ((*s)[i] >> j) & 1
48
+			swap := bit ^ prevbit
49
+			prevbit = bit
50
+			cswap(&work[1], uint64(swap))
51
+			ladderstep(&work)
52
+			j--
53
+		}
54
+		j = 7
55
+	}
56
+
57
+	*xr = work[1]
58
+	*zr = work[2]
59
+}
60
+
61
+func scalarMult(out, in, base *[32]byte) {
62
+	var e [32]byte
63
+	copy(e[:], (*in)[:])
64
+	e[0] &= 248
65
+	e[31] &= 127
66
+	e[31] |= 64
67
+
68
+	var t, z [5]uint64
69
+	unpack(&t, base)
70
+	mladder(&t, &z, &e)
71
+	invert(&z, &z)
72
+	mul(&t, &t, &z)
73
+	pack(out, &t)
74
+}
75
+
76
+func setint(r *[5]uint64, v uint64) {
77
+	r[0] = v
78
+	r[1] = 0
79
+	r[2] = 0
80
+	r[3] = 0
81
+	r[4] = 0
82
+}
83
+
84
+// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
85
+// order.
86
+func unpack(r *[5]uint64, x *[32]byte) {
87
+	r[0] = uint64(x[0]) |
88
+		uint64(x[1])<<8 |
89
+		uint64(x[2])<<16 |
90
+		uint64(x[3])<<24 |
91
+		uint64(x[4])<<32 |
92
+		uint64(x[5])<<40 |
93
+		uint64(x[6]&7)<<48
94
+
95
+	r[1] = uint64(x[6])>>3 |
96
+		uint64(x[7])<<5 |
97
+		uint64(x[8])<<13 |
98
+		uint64(x[9])<<21 |
99
+		uint64(x[10])<<29 |
100
+		uint64(x[11])<<37 |
101
+		uint64(x[12]&63)<<45
102
+
103
+	r[2] = uint64(x[12])>>6 |
104
+		uint64(x[13])<<2 |
105
+		uint64(x[14])<<10 |
106
+		uint64(x[15])<<18 |
107
+		uint64(x[16])<<26 |
108
+		uint64(x[17])<<34 |
109
+		uint64(x[18])<<42 |
110
+		uint64(x[19]&1)<<50
111
+
112
+	r[3] = uint64(x[19])>>1 |
113
+		uint64(x[20])<<7 |
114
+		uint64(x[21])<<15 |
115
+		uint64(x[22])<<23 |
116
+		uint64(x[23])<<31 |
117
+		uint64(x[24])<<39 |
118
+		uint64(x[25]&15)<<47
119
+
120
+	r[4] = uint64(x[25])>>4 |
121
+		uint64(x[26])<<4 |
122
+		uint64(x[27])<<12 |
123
+		uint64(x[28])<<20 |
124
+		uint64(x[29])<<28 |
125
+		uint64(x[30])<<36 |
126
+		uint64(x[31]&127)<<44
127
+}
128
+
129
+// pack sets out = x where out is the usual, little-endian form of the 5,
130
+// 51-bit limbs in x.
131
+func pack(out *[32]byte, x *[5]uint64) {
132
+	t := *x
133
+	freeze(&t)
134
+
135
+	out[0] = byte(t[0])
136
+	out[1] = byte(t[0] >> 8)
137
+	out[2] = byte(t[0] >> 16)
138
+	out[3] = byte(t[0] >> 24)
139
+	out[4] = byte(t[0] >> 32)
140
+	out[5] = byte(t[0] >> 40)
141
+	out[6] = byte(t[0] >> 48)
142
+
143
+	out[6] ^= byte(t[1]<<3) & 0xf8
144
+	out[7] = byte(t[1] >> 5)
145
+	out[8] = byte(t[1] >> 13)
146
+	out[9] = byte(t[1] >> 21)
147
+	out[10] = byte(t[1] >> 29)
148
+	out[11] = byte(t[1] >> 37)
149
+	out[12] = byte(t[1] >> 45)
150
+
151
+	out[12] ^= byte(t[2]<<6) & 0xc0
152
+	out[13] = byte(t[2] >> 2)
153
+	out[14] = byte(t[2] >> 10)
154
+	out[15] = byte(t[2] >> 18)
155
+	out[16] = byte(t[2] >> 26)
156
+	out[17] = byte(t[2] >> 34)
157
+	out[18] = byte(t[2] >> 42)
158
+	out[19] = byte(t[2] >> 50)
159
+
160
+	out[19] ^= byte(t[3]<<1) & 0xfe
161
+	out[20] = byte(t[3] >> 7)
162
+	out[21] = byte(t[3] >> 15)
163
+	out[22] = byte(t[3] >> 23)
164
+	out[23] = byte(t[3] >> 31)
165
+	out[24] = byte(t[3] >> 39)
166
+	out[25] = byte(t[3] >> 47)
167
+
168
+	out[25] ^= byte(t[4]<<4) & 0xf0
169
+	out[26] = byte(t[4] >> 4)
170
+	out[27] = byte(t[4] >> 12)
171
+	out[28] = byte(t[4] >> 20)
172
+	out[29] = byte(t[4] >> 28)
173
+	out[30] = byte(t[4] >> 36)
174
+	out[31] = byte(t[4] >> 44)
175
+}
176
+
177
+// invert calculates r = x^-1 mod p using Fermat's little theorem.
178
+func invert(r *[5]uint64, x *[5]uint64) {
179
+	var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64
180
+
181
+	square(&z2, x)        /* 2 */
182
+	square(&t, &z2)       /* 4 */
183
+	square(&t, &t)        /* 8 */
184
+	mul(&z9, &t, x)       /* 9 */
185
+	mul(&z11, &z9, &z2)   /* 11 */
186
+	square(&t, &z11)      /* 22 */
187
+	mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */
188
+
189
+	square(&t, &z2_5_0)      /* 2^6 - 2^1 */
190
+	for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
191
+		square(&t, &t)
192
+	}
193
+	mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */
194
+
195
+	square(&t, &z2_10_0)      /* 2^11 - 2^1 */
196
+	for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
197
+		square(&t, &t)
198
+	}
199
+	mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */
200
+
201
+	square(&t, &z2_20_0)      /* 2^21 - 2^1 */
202
+	for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
203
+		square(&t, &t)
204
+	}
205
+	mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */
206
+
207
+	square(&t, &t)            /* 2^41 - 2^1 */
208
+	for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
209
+		square(&t, &t)
210
+	}
211
+	mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */
212
+
213
+	square(&t, &z2_50_0)      /* 2^51 - 2^1 */
214
+	for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
215
+		square(&t, &t)
216
+	}
217
+	mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */
218
+
219
+	square(&t, &z2_100_0)      /* 2^101 - 2^1 */
220
+	for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
221
+		square(&t, &t)
222
+	}
223
+	mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */
224
+
225
+	square(&t, &t)            /* 2^201 - 2^1 */
226
+	for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
227
+		square(&t, &t)
228
+	}
229
+	mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */
230
+
231
+	square(&t, &t) /* 2^251 - 2^1 */
232
+	square(&t, &t) /* 2^252 - 2^2 */
233
+	square(&t, &t) /* 2^253 - 2^3 */
234
+
235
+	square(&t, &t) /* 2^254 - 2^4 */
236
+
237
+	square(&t, &t)   /* 2^255 - 2^5 */
238
+	mul(r, &t, &z11) /* 2^255 - 21 */
239
+}
0 240
new file mode 100644
... ...
@@ -0,0 +1,169 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// This code was translated into a form compatible with 6a from the public
5
+// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
6
+
7
+// +build amd64,!gccgo,!appengine
8
+
9
+#include "const_amd64.h"
10
+
11
+// func mul(dest, a, b *[5]uint64)
12
+TEXT ·mul(SB),0,$16-24
13
+	MOVQ dest+0(FP), DI
14
+	MOVQ a+8(FP), SI
15
+	MOVQ b+16(FP), DX
16
+
17
+	MOVQ DX,CX
18
+	MOVQ 24(SI),DX
19
+	IMUL3Q $19,DX,AX
20
+	MOVQ AX,0(SP)
21
+	MULQ 16(CX)
22
+	MOVQ AX,R8
23
+	MOVQ DX,R9
24
+	MOVQ 32(SI),DX
25
+	IMUL3Q $19,DX,AX
26
+	MOVQ AX,8(SP)
27
+	MULQ 8(CX)
28
+	ADDQ AX,R8
29
+	ADCQ DX,R9
30
+	MOVQ 0(SI),AX
31
+	MULQ 0(CX)
32
+	ADDQ AX,R8
33
+	ADCQ DX,R9
34
+	MOVQ 0(SI),AX
35
+	MULQ 8(CX)
36
+	MOVQ AX,R10
37
+	MOVQ DX,R11
38
+	MOVQ 0(SI),AX
39
+	MULQ 16(CX)
40
+	MOVQ AX,R12
41
+	MOVQ DX,R13
42
+	MOVQ 0(SI),AX
43
+	MULQ 24(CX)
44
+	MOVQ AX,R14
45
+	MOVQ DX,R15
46
+	MOVQ 0(SI),AX
47
+	MULQ 32(CX)
48
+	MOVQ AX,BX
49
+	MOVQ DX,BP
50
+	MOVQ 8(SI),AX
51
+	MULQ 0(CX)
52
+	ADDQ AX,R10
53
+	ADCQ DX,R11
54
+	MOVQ 8(SI),AX
55
+	MULQ 8(CX)
56
+	ADDQ AX,R12
57
+	ADCQ DX,R13
58
+	MOVQ 8(SI),AX
59
+	MULQ 16(CX)
60
+	ADDQ AX,R14
61
+	ADCQ DX,R15
62
+	MOVQ 8(SI),AX
63
+	MULQ 24(CX)
64
+	ADDQ AX,BX
65
+	ADCQ DX,BP
66
+	MOVQ 8(SI),DX
67
+	IMUL3Q $19,DX,AX
68
+	MULQ 32(CX)
69
+	ADDQ AX,R8
70
+	ADCQ DX,R9
71
+	MOVQ 16(SI),AX
72
+	MULQ 0(CX)
73
+	ADDQ AX,R12
74
+	ADCQ DX,R13
75
+	MOVQ 16(SI),AX
76
+	MULQ 8(CX)
77
+	ADDQ AX,R14
78
+	ADCQ DX,R15
79
+	MOVQ 16(SI),AX
80
+	MULQ 16(CX)
81
+	ADDQ AX,BX
82
+	ADCQ DX,BP
83
+	MOVQ 16(SI),DX
84
+	IMUL3Q $19,DX,AX
85
+	MULQ 24(CX)
86
+	ADDQ AX,R8
87
+	ADCQ DX,R9
88
+	MOVQ 16(SI),DX
89
+	IMUL3Q $19,DX,AX
90
+	MULQ 32(CX)
91
+	ADDQ AX,R10
92
+	ADCQ DX,R11
93
+	MOVQ 24(SI),AX
94
+	MULQ 0(CX)
95
+	ADDQ AX,R14
96
+	ADCQ DX,R15
97
+	MOVQ 24(SI),AX
98
+	MULQ 8(CX)
99
+	ADDQ AX,BX
100
+	ADCQ DX,BP
101
+	MOVQ 0(SP),AX
102
+	MULQ 24(CX)
103
+	ADDQ AX,R10
104
+	ADCQ DX,R11
105
+	MOVQ 0(SP),AX
106
+	MULQ 32(CX)
107
+	ADDQ AX,R12
108
+	ADCQ DX,R13
109
+	MOVQ 32(SI),AX
110
+	MULQ 0(CX)
111
+	ADDQ AX,BX
112
+	ADCQ DX,BP
113
+	MOVQ 8(SP),AX
114
+	MULQ 16(CX)
115
+	ADDQ AX,R10
116
+	ADCQ DX,R11
117
+	MOVQ 8(SP),AX
118
+	MULQ 24(CX)
119
+	ADDQ AX,R12
120
+	ADCQ DX,R13
121
+	MOVQ 8(SP),AX
122
+	MULQ 32(CX)
123
+	ADDQ AX,R14
124
+	ADCQ DX,R15
125
+	MOVQ $REDMASK51,SI
126
+	SHLQ $13,R9:R8
127
+	ANDQ SI,R8
128
+	SHLQ $13,R11:R10
129
+	ANDQ SI,R10
130
+	ADDQ R9,R10
131
+	SHLQ $13,R13:R12
132
+	ANDQ SI,R12
133
+	ADDQ R11,R12
134
+	SHLQ $13,R15:R14
135
+	ANDQ SI,R14
136
+	ADDQ R13,R14
137
+	SHLQ $13,BP:BX
138
+	ANDQ SI,BX
139
+	ADDQ R15,BX
140
+	IMUL3Q $19,BP,DX
141
+	ADDQ DX,R8
142
+	MOVQ R8,DX
143
+	SHRQ $51,DX
144
+	ADDQ R10,DX
145
+	MOVQ DX,CX
146
+	SHRQ $51,DX
147
+	ANDQ SI,R8
148
+	ADDQ R12,DX
149
+	MOVQ DX,R9
150
+	SHRQ $51,DX
151
+	ANDQ SI,CX
152
+	ADDQ R14,DX
153
+	MOVQ DX,AX
154
+	SHRQ $51,DX
155
+	ANDQ SI,R9
156
+	ADDQ BX,DX
157
+	MOVQ DX,R10
158
+	SHRQ $51,DX
159
+	ANDQ SI,AX
160
+	IMUL3Q $19,DX,DX
161
+	ADDQ DX,R8
162
+	ANDQ SI,R10
163
+	MOVQ R8,0(DI)
164
+	MOVQ CX,8(DI)
165
+	MOVQ R9,16(DI)
166
+	MOVQ AX,24(DI)
167
+	MOVQ R10,32(DI)
168
+	RET
0 169
new file mode 100644
... ...
@@ -0,0 +1,132 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// This code was translated into a form compatible with 6a from the public
5
+// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
6
+
7
+// +build amd64,!gccgo,!appengine
8
+
9
+#include "const_amd64.h"
10
+
11
+// func square(out, in *[5]uint64)
12
+TEXT ·square(SB),7,$0-16
13
+	MOVQ out+0(FP), DI
14
+	MOVQ in+8(FP), SI
15
+
16
+	MOVQ 0(SI),AX
17
+	MULQ 0(SI)
18
+	MOVQ AX,CX
19
+	MOVQ DX,R8
20
+	MOVQ 0(SI),AX
21
+	SHLQ $1,AX
22
+	MULQ 8(SI)
23
+	MOVQ AX,R9
24
+	MOVQ DX,R10
25
+	MOVQ 0(SI),AX
26
+	SHLQ $1,AX
27
+	MULQ 16(SI)
28
+	MOVQ AX,R11
29
+	MOVQ DX,R12
30
+	MOVQ 0(SI),AX
31
+	SHLQ $1,AX
32
+	MULQ 24(SI)
33
+	MOVQ AX,R13
34
+	MOVQ DX,R14
35
+	MOVQ 0(SI),AX
36
+	SHLQ $1,AX
37
+	MULQ 32(SI)
38
+	MOVQ AX,R15
39
+	MOVQ DX,BX
40
+	MOVQ 8(SI),AX
41
+	MULQ 8(SI)
42
+	ADDQ AX,R11
43
+	ADCQ DX,R12
44
+	MOVQ 8(SI),AX
45
+	SHLQ $1,AX
46
+	MULQ 16(SI)
47
+	ADDQ AX,R13
48
+	ADCQ DX,R14
49
+	MOVQ 8(SI),AX
50
+	SHLQ $1,AX
51
+	MULQ 24(SI)
52
+	ADDQ AX,R15
53
+	ADCQ DX,BX
54
+	MOVQ 8(SI),DX
55
+	IMUL3Q $38,DX,AX
56
+	MULQ 32(SI)
57
+	ADDQ AX,CX
58
+	ADCQ DX,R8
59
+	MOVQ 16(SI),AX
60
+	MULQ 16(SI)
61
+	ADDQ AX,R15
62
+	ADCQ DX,BX
63
+	MOVQ 16(SI),DX
64
+	IMUL3Q $38,DX,AX
65
+	MULQ 24(SI)
66
+	ADDQ AX,CX
67
+	ADCQ DX,R8
68
+	MOVQ 16(SI),DX
69
+	IMUL3Q $38,DX,AX
70
+	MULQ 32(SI)
71
+	ADDQ AX,R9
72
+	ADCQ DX,R10
73
+	MOVQ 24(SI),DX
74
+	IMUL3Q $19,DX,AX
75
+	MULQ 24(SI)
76
+	ADDQ AX,R9
77
+	ADCQ DX,R10
78
+	MOVQ 24(SI),DX
79
+	IMUL3Q $38,DX,AX
80
+	MULQ 32(SI)
81
+	ADDQ AX,R11
82
+	ADCQ DX,R12
83
+	MOVQ 32(SI),DX
84
+	IMUL3Q $19,DX,AX
85
+	MULQ 32(SI)
86
+	ADDQ AX,R13
87
+	ADCQ DX,R14
88
+	MOVQ $REDMASK51,SI
89
+	SHLQ $13,R8:CX
90
+	ANDQ SI,CX
91
+	SHLQ $13,R10:R9
92
+	ANDQ SI,R9
93
+	ADDQ R8,R9
94
+	SHLQ $13,R12:R11
95
+	ANDQ SI,R11
96
+	ADDQ R10,R11
97
+	SHLQ $13,R14:R13
98
+	ANDQ SI,R13
99
+	ADDQ R12,R13
100
+	SHLQ $13,BX:R15
101
+	ANDQ SI,R15
102
+	ADDQ R14,R15
103
+	IMUL3Q $19,BX,DX
104
+	ADDQ DX,CX
105
+	MOVQ CX,DX
106
+	SHRQ $51,DX
107
+	ADDQ R9,DX
108
+	ANDQ SI,CX
109
+	MOVQ DX,R8
110
+	SHRQ $51,DX
111
+	ADDQ R11,DX
112
+	ANDQ SI,R8
113
+	MOVQ DX,R9
114
+	SHRQ $51,DX
115
+	ADDQ R13,DX
116
+	ANDQ SI,R9
117
+	MOVQ DX,AX
118
+	SHRQ $51,DX
119
+	ADDQ R15,DX
120
+	ANDQ SI,AX
121
+	MOVQ DX,R10
122
+	SHRQ $51,DX
123
+	IMUL3Q $19,DX,DX
124
+	ADDQ DX,CX
125
+	ANDQ SI,R10
126
+	MOVQ CX,0(DI)
127
+	MOVQ R8,8(DI)
128
+	MOVQ R9,16(DI)
129
+	MOVQ AX,24(DI)
130
+	MOVQ R10,32(DI)
131
+	RET
... ...
@@ -13,7 +13,7 @@ example, by using nonce 1 for the first message, nonce 2 for the second
13 13
 message, etc. Nonces are long enough that randomly generated nonces have
14 14
 negligible risk of collision.
15 15
 
16
-This package is interoperable with NaCl: http://nacl.cr.yp.to/secretbox.html.
16
+This package is interoperable with NaCl: https://nacl.cr.yp.to/secretbox.html.
17 17
 */
18 18
 package secretbox // import "golang.org/x/crypto/nacl/secretbox"
19 19
 
... ...
@@ -13,7 +13,7 @@ import (
13 13
 func bmpString(s string) ([]byte, error) {
14 14
 	// References:
15 15
 	// https://tools.ietf.org/html/rfc7292#appendix-B.1
16
-	// http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
16
+	// https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
17 17
 	//  - non-BMP characters are encoded in UTF 16 by using a surrogate pair of 16-bit codes
18 18
 	//	  EncodeRune returns 0xfffd if the rune does not need special encoding
19 19
 	//  - the above RFC provides the info that BMPStrings are NULL terminated.
... ...
@@ -109,6 +109,10 @@ func ToPEM(pfxData []byte, password string) ([]*pem.Block, error) {
109 109
 
110 110
 	bags, encodedPassword, err := getSafeContents(pfxData, encodedPassword)
111 111
 
112
+	if err != nil {
113
+		return nil, err
114
+	}
115
+
112 116
 	blocks := make([]*pem.Block, 0, len(bags))
113 117
 	for _, bag := range bags {
114 118
 		block, err := convertBag(&bag, encodedPassword)
115 119
deleted file mode 100644
... ...
@@ -1,45 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 6a from the public
6
-// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
7
-
8
-// +build amd64,!gccgo,!appengine
9
-
10
-DATA ·SCALE(SB)/8, $0x37F4000000000000
11
-GLOBL ·SCALE(SB), 8, $8
12
-DATA ·TWO32(SB)/8, $0x41F0000000000000
13
-GLOBL ·TWO32(SB), 8, $8
14
-DATA ·TWO64(SB)/8, $0x43F0000000000000
15
-GLOBL ·TWO64(SB), 8, $8
16
-DATA ·TWO96(SB)/8, $0x45F0000000000000
17
-GLOBL ·TWO96(SB), 8, $8
18
-DATA ·ALPHA32(SB)/8, $0x45E8000000000000
19
-GLOBL ·ALPHA32(SB), 8, $8
20
-DATA ·ALPHA64(SB)/8, $0x47E8000000000000
21
-GLOBL ·ALPHA64(SB), 8, $8
22
-DATA ·ALPHA96(SB)/8, $0x49E8000000000000
23
-GLOBL ·ALPHA96(SB), 8, $8
24
-DATA ·ALPHA130(SB)/8, $0x4C08000000000000
25
-GLOBL ·ALPHA130(SB), 8, $8
26
-DATA ·DOFFSET0(SB)/8, $0x4330000000000000
27
-GLOBL ·DOFFSET0(SB), 8, $8
28
-DATA ·DOFFSET1(SB)/8, $0x4530000000000000
29
-GLOBL ·DOFFSET1(SB), 8, $8
30
-DATA ·DOFFSET2(SB)/8, $0x4730000000000000
31
-GLOBL ·DOFFSET2(SB), 8, $8
32
-DATA ·DOFFSET3(SB)/8, $0x4930000000000000
33
-GLOBL ·DOFFSET3(SB), 8, $8
34
-DATA ·DOFFSET3MINUSTWO128(SB)/8, $0x492FFFFE00000000
35
-GLOBL ·DOFFSET3MINUSTWO128(SB), 8, $8
36
-DATA ·HOFFSET0(SB)/8, $0x43300001FFFFFFFB
37
-GLOBL ·HOFFSET0(SB), 8, $8
38
-DATA ·HOFFSET1(SB)/8, $0x45300001FFFFFFFE
39
-GLOBL ·HOFFSET1(SB), 8, $8
40
-DATA ·HOFFSET2(SB)/8, $0x47300001FFFFFFFE
41
-GLOBL ·HOFFSET2(SB), 8, $8
42
-DATA ·HOFFSET3(SB)/8, $0x49300003FFFFFFFE
43
-GLOBL ·HOFFSET3(SB), 8, $8
44
-DATA ·ROUNDING(SB)/2, $0x137f
45
-GLOBL ·ROUNDING(SB), 8, $2
... ...
@@ -3,7 +3,8 @@
3 3
 // license that can be found in the LICENSE file.
4 4
 
5 5
 /*
6
-Package poly1305 implements Poly1305 one-time message authentication code as specified in http://cr.yp.to/mac/poly1305-20050329.pdf.
6
+Package poly1305 implements Poly1305 one-time message authentication code as
7
+specified in https://cr.yp.to/mac/poly1305-20050329.pdf.
7 8
 
8 9
 Poly1305 is a fast, one-time authentication function. It is infeasible for an
9 10
 attacker to generate an authenticator for a message without the key. However, a
10 11
deleted file mode 100644
... ...
@@ -1,497 +0,0 @@
1
-// Copyright 2012 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 6a from the public
6
-// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
7
-
8
-// +build amd64,!gccgo,!appengine
9
-
10
-// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
11
-TEXT ·poly1305(SB),0,$224-32
12
-	MOVQ out+0(FP),DI
13
-	MOVQ m+8(FP),SI
14
-	MOVQ mlen+16(FP),DX
15
-	MOVQ key+24(FP),CX
16
-
17
-	MOVQ SP,R11
18
-	MOVQ $31,R9
19
-	NOTQ R9
20
-	ANDQ R9,SP
21
-	ADDQ $32,SP
22
-
23
-	MOVQ R11,32(SP)
24
-	MOVQ R12,40(SP)
25
-	MOVQ R13,48(SP)
26
-	MOVQ R14,56(SP)
27
-	MOVQ R15,64(SP)
28
-	MOVQ BX,72(SP)
29
-	MOVQ BP,80(SP)
30
-	FLDCW ·ROUNDING(SB)
31
-	MOVL 0(CX),R8
32
-	MOVL 4(CX),R9
33
-	MOVL 8(CX),AX
34
-	MOVL 12(CX),R10
35
-	MOVQ DI,88(SP)
36
-	MOVQ CX,96(SP)
37
-	MOVL $0X43300000,108(SP)
38
-	MOVL $0X45300000,116(SP)
39
-	MOVL $0X47300000,124(SP)
40
-	MOVL $0X49300000,132(SP)
41
-	ANDL $0X0FFFFFFF,R8
42
-	ANDL $0X0FFFFFFC,R9
43
-	ANDL $0X0FFFFFFC,AX
44
-	ANDL $0X0FFFFFFC,R10
45
-	MOVL R8,104(SP)
46
-	MOVL R9,112(SP)
47
-	MOVL AX,120(SP)
48
-	MOVL R10,128(SP)
49
-	FMOVD 104(SP), F0
50
-	FSUBD ·DOFFSET0(SB), F0
51
-	FMOVD 112(SP), F0
52
-	FSUBD ·DOFFSET1(SB), F0
53
-	FMOVD 120(SP), F0
54
-	FSUBD ·DOFFSET2(SB), F0
55
-	FMOVD 128(SP), F0
56
-	FSUBD ·DOFFSET3(SB), F0
57
-	FXCHD F0, F3
58
-	FMOVDP F0, 136(SP)
59
-	FXCHD F0, F1
60
-	FMOVD F0, 144(SP)
61
-	FMULD ·SCALE(SB), F0
62
-	FMOVDP F0, 152(SP)
63
-	FMOVD F0, 160(SP)
64
-	FMULD ·SCALE(SB), F0
65
-	FMOVDP F0, 168(SP)
66
-	FMOVD F0, 176(SP)
67
-	FMULD ·SCALE(SB), F0
68
-	FMOVDP F0, 184(SP)
69
-	FLDZ
70
-	FLDZ
71
-	FLDZ
72
-	FLDZ
73
-	CMPQ DX,$16
74
-	JB ADDATMOST15BYTES
75
-	INITIALATLEAST16BYTES:
76
-	MOVL 12(SI),DI
77
-	MOVL 8(SI),CX
78
-	MOVL 4(SI),R8
79
-	MOVL 0(SI),R9
80
-	MOVL DI,128(SP)
81
-	MOVL CX,120(SP)
82
-	MOVL R8,112(SP)
83
-	MOVL R9,104(SP)
84
-	ADDQ $16,SI
85
-	SUBQ $16,DX
86
-	FXCHD F0, F3
87
-	FADDD 128(SP), F0
88
-	FSUBD ·DOFFSET3MINUSTWO128(SB), F0
89
-	FXCHD F0, F1
90
-	FADDD 112(SP), F0
91
-	FSUBD ·DOFFSET1(SB), F0
92
-	FXCHD F0, F2
93
-	FADDD 120(SP), F0
94
-	FSUBD ·DOFFSET2(SB), F0
95
-	FXCHD F0, F3
96
-	FADDD 104(SP), F0
97
-	FSUBD ·DOFFSET0(SB), F0
98
-	CMPQ DX,$16
99
-	JB MULTIPLYADDATMOST15BYTES
100
-	MULTIPLYADDATLEAST16BYTES:
101
-	MOVL 12(SI),DI
102
-	MOVL 8(SI),CX
103
-	MOVL 4(SI),R8
104
-	MOVL 0(SI),R9
105
-	MOVL DI,128(SP)
106
-	MOVL CX,120(SP)
107
-	MOVL R8,112(SP)
108
-	MOVL R9,104(SP)
109
-	ADDQ $16,SI
110
-	SUBQ $16,DX
111
-	FMOVD ·ALPHA130(SB), F0
112
-	FADDD F2,F0
113
-	FSUBD ·ALPHA130(SB), F0
114
-	FSUBD F0,F2
115
-	FMULD ·SCALE(SB), F0
116
-	FMOVD ·ALPHA32(SB), F0
117
-	FADDD F2,F0
118
-	FSUBD ·ALPHA32(SB), F0
119
-	FSUBD F0,F2
120
-	FXCHD F0, F2
121
-	FADDDP F0,F1
122
-	FMOVD ·ALPHA64(SB), F0
123
-	FADDD F4,F0
124
-	FSUBD ·ALPHA64(SB), F0
125
-	FSUBD F0,F4
126
-	FMOVD ·ALPHA96(SB), F0
127
-	FADDD F6,F0
128
-	FSUBD ·ALPHA96(SB), F0
129
-	FSUBD F0,F6
130
-	FXCHD F0, F6
131
-	FADDDP F0,F1
132
-	FXCHD F0, F3
133
-	FADDDP F0,F5
134
-	FXCHD F0, F3
135
-	FADDDP F0,F1
136
-	FMOVD 176(SP), F0
137
-	FMULD F3,F0
138
-	FMOVD 160(SP), F0
139
-	FMULD F4,F0
140
-	FMOVD 144(SP), F0
141
-	FMULD F5,F0
142
-	FMOVD 136(SP), F0
143
-	FMULDP F0,F6
144
-	FMOVD 160(SP), F0
145
-	FMULD F4,F0
146
-	FADDDP F0,F3
147
-	FMOVD 144(SP), F0
148
-	FMULD F4,F0
149
-	FADDDP F0,F2
150
-	FMOVD 136(SP), F0
151
-	FMULD F4,F0
152
-	FADDDP F0,F1
153
-	FMOVD 184(SP), F0
154
-	FMULDP F0,F4
155
-	FXCHD F0, F3
156
-	FADDDP F0,F5
157
-	FMOVD 144(SP), F0
158
-	FMULD F4,F0
159
-	FADDDP F0,F2
160
-	FMOVD 136(SP), F0
161
-	FMULD F4,F0
162
-	FADDDP F0,F1
163
-	FMOVD 184(SP), F0
164
-	FMULD F4,F0
165
-	FADDDP F0,F3
166
-	FMOVD 168(SP), F0
167
-	FMULDP F0,F4
168
-	FXCHD F0, F3
169
-	FADDDP F0,F4
170
-	FMOVD 136(SP), F0
171
-	FMULD F5,F0
172
-	FADDDP F0,F1
173
-	FXCHD F0, F3
174
-	FMOVD 184(SP), F0
175
-	FMULD F5,F0
176
-	FADDDP F0,F3
177
-	FXCHD F0, F1
178
-	FMOVD 168(SP), F0
179
-	FMULD F5,F0
180
-	FADDDP F0,F1
181
-	FMOVD 152(SP), F0
182
-	FMULDP F0,F5
183
-	FXCHD F0, F4
184
-	FADDDP F0,F1
185
-	CMPQ DX,$16
186
-	FXCHD F0, F2
187
-	FMOVD 128(SP), F0
188
-	FSUBD ·DOFFSET3MINUSTWO128(SB), F0
189
-	FADDDP F0,F1
190
-	FXCHD F0, F1
191
-	FMOVD 120(SP), F0
192
-	FSUBD ·DOFFSET2(SB), F0
193
-	FADDDP F0,F1
194
-	FXCHD F0, F3
195
-	FMOVD 112(SP), F0
196
-	FSUBD ·DOFFSET1(SB), F0
197
-	FADDDP F0,F1
198
-	FXCHD F0, F2
199
-	FMOVD 104(SP), F0
200
-	FSUBD ·DOFFSET0(SB), F0
201
-	FADDDP F0,F1
202
-	JAE MULTIPLYADDATLEAST16BYTES
203
-	MULTIPLYADDATMOST15BYTES:
204
-	FMOVD ·ALPHA130(SB), F0
205
-	FADDD F2,F0
206
-	FSUBD ·ALPHA130(SB), F0
207
-	FSUBD F0,F2
208
-	FMULD ·SCALE(SB), F0
209
-	FMOVD ·ALPHA32(SB), F0
210
-	FADDD F2,F0
211
-	FSUBD ·ALPHA32(SB), F0
212
-	FSUBD F0,F2
213
-	FMOVD ·ALPHA64(SB), F0
214
-	FADDD F5,F0
215
-	FSUBD ·ALPHA64(SB), F0
216
-	FSUBD F0,F5
217
-	FMOVD ·ALPHA96(SB), F0
218
-	FADDD F7,F0
219
-	FSUBD ·ALPHA96(SB), F0
220
-	FSUBD F0,F7
221
-	FXCHD F0, F7
222
-	FADDDP F0,F1
223
-	FXCHD F0, F5
224
-	FADDDP F0,F1
225
-	FXCHD F0, F3
226
-	FADDDP F0,F5
227
-	FADDDP F0,F1
228
-	FMOVD 176(SP), F0
229
-	FMULD F1,F0
230
-	FMOVD 160(SP), F0
231
-	FMULD F2,F0
232
-	FMOVD 144(SP), F0
233
-	FMULD F3,F0
234
-	FMOVD 136(SP), F0
235
-	FMULDP F0,F4
236
-	FMOVD 160(SP), F0
237
-	FMULD F5,F0
238
-	FADDDP F0,F3
239
-	FMOVD 144(SP), F0
240
-	FMULD F5,F0
241
-	FADDDP F0,F2
242
-	FMOVD 136(SP), F0
243
-	FMULD F5,F0
244
-	FADDDP F0,F1
245
-	FMOVD 184(SP), F0
246
-	FMULDP F0,F5
247
-	FXCHD F0, F4
248
-	FADDDP F0,F3
249
-	FMOVD 144(SP), F0
250
-	FMULD F5,F0
251
-	FADDDP F0,F2
252
-	FMOVD 136(SP), F0
253
-	FMULD F5,F0
254
-	FADDDP F0,F1
255
-	FMOVD 184(SP), F0
256
-	FMULD F5,F0
257
-	FADDDP F0,F4
258
-	FMOVD 168(SP), F0
259
-	FMULDP F0,F5
260
-	FXCHD F0, F4
261
-	FADDDP F0,F2
262
-	FMOVD 136(SP), F0
263
-	FMULD F5,F0
264
-	FADDDP F0,F1
265
-	FMOVD 184(SP), F0
266
-	FMULD F5,F0
267
-	FADDDP F0,F4
268
-	FMOVD 168(SP), F0
269
-	FMULD F5,F0
270
-	FADDDP F0,F3
271
-	FMOVD 152(SP), F0
272
-	FMULDP F0,F5
273
-	FXCHD F0, F4
274
-	FADDDP F0,F1
275
-	ADDATMOST15BYTES:
276
-	CMPQ DX,$0
277
-	JE NOMOREBYTES
278
-	MOVL $0,0(SP)
279
-	MOVL $0, 4 (SP)
280
-	MOVL $0, 8 (SP)
281
-	MOVL $0, 12 (SP)
282
-	LEAQ 0(SP),DI
283
-	MOVQ DX,CX
284
-	REP; MOVSB
285
-	MOVB $1,0(DI)
286
-	MOVL  12 (SP),DI
287
-	MOVL  8 (SP),SI
288
-	MOVL  4 (SP),DX
289
-	MOVL 0(SP),CX
290
-	MOVL DI,128(SP)
291
-	MOVL SI,120(SP)
292
-	MOVL DX,112(SP)
293
-	MOVL CX,104(SP)
294
-	FXCHD F0, F3
295
-	FADDD 128(SP), F0
296
-	FSUBD ·DOFFSET3(SB), F0
297
-	FXCHD F0, F2
298
-	FADDD 120(SP), F0
299
-	FSUBD ·DOFFSET2(SB), F0
300
-	FXCHD F0, F1
301
-	FADDD 112(SP), F0
302
-	FSUBD ·DOFFSET1(SB), F0
303
-	FXCHD F0, F3
304
-	FADDD 104(SP), F0
305
-	FSUBD ·DOFFSET0(SB), F0
306
-	FMOVD ·ALPHA130(SB), F0
307
-	FADDD F3,F0
308
-	FSUBD ·ALPHA130(SB), F0
309
-	FSUBD F0,F3
310
-	FMULD ·SCALE(SB), F0
311
-	FMOVD ·ALPHA32(SB), F0
312
-	FADDD F2,F0
313
-	FSUBD ·ALPHA32(SB), F0
314
-	FSUBD F0,F2
315
-	FMOVD ·ALPHA64(SB), F0
316
-	FADDD F6,F0
317
-	FSUBD ·ALPHA64(SB), F0
318
-	FSUBD F0,F6
319
-	FMOVD ·ALPHA96(SB), F0
320
-	FADDD F5,F0
321
-	FSUBD ·ALPHA96(SB), F0
322
-	FSUBD F0,F5
323
-	FXCHD F0, F4
324
-	FADDDP F0,F3
325
-	FXCHD F0, F6
326
-	FADDDP F0,F1
327
-	FXCHD F0, F3
328
-	FADDDP F0,F5
329
-	FXCHD F0, F3
330
-	FADDDP F0,F1
331
-	FMOVD 176(SP), F0
332
-	FMULD F3,F0
333
-	FMOVD 160(SP), F0
334
-	FMULD F4,F0
335
-	FMOVD 144(SP), F0
336
-	FMULD F5,F0
337
-	FMOVD 136(SP), F0
338
-	FMULDP F0,F6
339
-	FMOVD 160(SP), F0
340
-	FMULD F5,F0
341
-	FADDDP F0,F3
342
-	FMOVD 144(SP), F0
343
-	FMULD F5,F0
344
-	FADDDP F0,F2
345
-	FMOVD 136(SP), F0
346
-	FMULD F5,F0
347
-	FADDDP F0,F1
348
-	FMOVD 184(SP), F0
349
-	FMULDP F0,F5
350
-	FXCHD F0, F4
351
-	FADDDP F0,F5
352
-	FMOVD 144(SP), F0
353
-	FMULD F6,F0
354
-	FADDDP F0,F2
355
-	FMOVD 136(SP), F0
356
-	FMULD F6,F0
357
-	FADDDP F0,F1
358
-	FMOVD 184(SP), F0
359
-	FMULD F6,F0
360
-	FADDDP F0,F4
361
-	FMOVD 168(SP), F0
362
-	FMULDP F0,F6
363
-	FXCHD F0, F5
364
-	FADDDP F0,F4
365
-	FMOVD 136(SP), F0
366
-	FMULD F2,F0
367
-	FADDDP F0,F1
368
-	FMOVD 184(SP), F0
369
-	FMULD F2,F0
370
-	FADDDP F0,F5
371
-	FMOVD 168(SP), F0
372
-	FMULD F2,F0
373
-	FADDDP F0,F3
374
-	FMOVD 152(SP), F0
375
-	FMULDP F0,F2
376
-	FXCHD F0, F1
377
-	FADDDP F0,F3
378
-	FXCHD F0, F3
379
-	FXCHD F0, F2
380
-	NOMOREBYTES:
381
-	MOVL $0,R10
382
-	FMOVD ·ALPHA130(SB), F0
383
-	FADDD F4,F0
384
-	FSUBD ·ALPHA130(SB), F0
385
-	FSUBD F0,F4
386
-	FMULD ·SCALE(SB), F0
387
-	FMOVD ·ALPHA32(SB), F0
388
-	FADDD F2,F0
389
-	FSUBD ·ALPHA32(SB), F0
390
-	FSUBD F0,F2
391
-	FMOVD ·ALPHA64(SB), F0
392
-	FADDD F4,F0
393
-	FSUBD ·ALPHA64(SB), F0
394
-	FSUBD F0,F4
395
-	FMOVD ·ALPHA96(SB), F0
396
-	FADDD F6,F0
397
-	FSUBD ·ALPHA96(SB), F0
398
-	FXCHD F0, F6
399
-	FSUBD F6,F0
400
-	FXCHD F0, F4
401
-	FADDDP F0,F3
402
-	FXCHD F0, F4
403
-	FADDDP F0,F1
404
-	FXCHD F0, F2
405
-	FADDDP F0,F3
406
-	FXCHD F0, F4
407
-	FADDDP F0,F3
408
-	FXCHD F0, F3
409
-	FADDD ·HOFFSET0(SB), F0
410
-	FXCHD F0, F3
411
-	FADDD ·HOFFSET1(SB), F0
412
-	FXCHD F0, F1
413
-	FADDD ·HOFFSET2(SB), F0
414
-	FXCHD F0, F2
415
-	FADDD ·HOFFSET3(SB), F0
416
-	FXCHD F0, F3
417
-	FMOVDP F0, 104(SP)
418
-	FMOVDP F0, 112(SP)
419
-	FMOVDP F0, 120(SP)
420
-	FMOVDP F0, 128(SP)
421
-	MOVL 108(SP),DI
422
-	ANDL $63,DI
423
-	MOVL 116(SP),SI
424
-	ANDL $63,SI
425
-	MOVL 124(SP),DX
426
-	ANDL $63,DX
427
-	MOVL 132(SP),CX
428
-	ANDL $63,CX
429
-	MOVL 112(SP),R8
430
-	ADDL DI,R8
431
-	MOVQ R8,112(SP)
432
-	MOVL 120(SP),DI
433
-	ADCL SI,DI
434
-	MOVQ DI,120(SP)
435
-	MOVL 128(SP),DI
436
-	ADCL DX,DI
437
-	MOVQ DI,128(SP)
438
-	MOVL R10,DI
439
-	ADCL CX,DI
440
-	MOVQ DI,136(SP)
441
-	MOVQ $5,DI
442
-	MOVL 104(SP),SI
443
-	ADDL SI,DI
444
-	MOVQ DI,104(SP)
445
-	MOVL R10,DI
446
-	MOVQ 112(SP),DX
447
-	ADCL DX,DI
448
-	MOVQ DI,112(SP)
449
-	MOVL R10,DI
450
-	MOVQ 120(SP),CX
451
-	ADCL CX,DI
452
-	MOVQ DI,120(SP)
453
-	MOVL R10,DI
454
-	MOVQ 128(SP),R8
455
-	ADCL R8,DI
456
-	MOVQ DI,128(SP)
457
-	MOVQ $0XFFFFFFFC,DI
458
-	MOVQ 136(SP),R9
459
-	ADCL R9,DI
460
-	SARL $16,DI
461
-	MOVQ DI,R9
462
-	XORL $0XFFFFFFFF,R9
463
-	ANDQ DI,SI
464
-	MOVQ 104(SP),AX
465
-	ANDQ R9,AX
466
-	ORQ AX,SI
467
-	ANDQ DI,DX
468
-	MOVQ 112(SP),AX
469
-	ANDQ R9,AX
470
-	ORQ AX,DX
471
-	ANDQ DI,CX
472
-	MOVQ 120(SP),AX
473
-	ANDQ R9,AX
474
-	ORQ AX,CX
475
-	ANDQ DI,R8
476
-	MOVQ 128(SP),DI
477
-	ANDQ R9,DI
478
-	ORQ DI,R8
479
-	MOVQ 88(SP),DI
480
-	MOVQ 96(SP),R9
481
-	ADDL 16(R9),SI
482
-	ADCL 20(R9),DX
483
-	ADCL 24(R9),CX
484
-	ADCL 28(R9),R8
485
-	MOVL SI,0(DI)
486
-	MOVL DX,4(DI)
487
-	MOVL CX,8(DI)
488
-	MOVL R8,12(DI)
489
-	MOVQ 32(SP),R11
490
-	MOVQ 40(SP),R12
491
-	MOVQ 48(SP),R13
492
-	MOVQ 56(SP),R14
493
-	MOVQ 64(SP),R15
494
-	MOVQ 72(SP),BX
495
-	MOVQ 80(SP),BP
496
-	MOVQ R11,SP
497
-	RET
498 1
deleted file mode 100644
... ...
@@ -1,379 +0,0 @@
1
-// Copyright 2015 The Go Authors. All rights reserved.
2
-// Use of this source code is governed by a BSD-style
3
-// license that can be found in the LICENSE file.
4
-
5
-// This code was translated into a form compatible with 5a from the public
6
-// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
7
-
8
-// +build arm,!gccgo,!appengine
9
-
10
-DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
11
-DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
12
-DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
13
-DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
14
-DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
15
-GLOBL poly1305_init_constants_armv6<>(SB), 8, $20
16
-
17
-// Warning: the linker may use R11 to synthesize certain instructions. Please
18
-// take care and verify that no synthetic instructions use it.
19
-
20
-TEXT poly1305_init_ext_armv6<>(SB),4,$-4
21
-  MOVM.DB.W [R4-R11], (R13)
22
-  MOVM.IA.W (R1), [R2-R5]
23
-  MOVW $poly1305_init_constants_armv6<>(SB), R7
24
-  MOVW R2, R8
25
-  MOVW R2>>26, R9
26
-  MOVW R3>>20, g
27
-  MOVW R4>>14, R11
28
-  MOVW R5>>8, R12
29
-  ORR R3<<6, R9, R9
30
-  ORR R4<<12, g, g
31
-  ORR R5<<18, R11, R11
32
-  MOVM.IA (R7), [R2-R6]
33
-  AND R8, R2, R2
34
-  AND R9, R3, R3
35
-  AND g, R4, R4
36
-  AND R11, R5, R5
37
-  AND R12, R6, R6
38
-  MOVM.IA.W [R2-R6], (R0)
39
-  EOR R2, R2, R2
40
-  EOR R3, R3, R3
41
-  EOR R4, R4, R4
42
-  EOR R5, R5, R5
43
-  EOR R6, R6, R6
44
-  MOVM.IA.W [R2-R6], (R0)
45
-  MOVM.IA.W (R1), [R2-R5]
46
-  MOVM.IA [R2-R6], (R0)
47
-  MOVM.IA.W (R13), [R4-R11]
48
-  RET
49
-
50
-#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
51
-  MOVBU (offset+0)(Rsrc), Rtmp; \
52
-  MOVBU Rtmp, (offset+0)(Rdst); \
53
-  MOVBU (offset+1)(Rsrc), Rtmp; \
54
-  MOVBU Rtmp, (offset+1)(Rdst); \
55
-  MOVBU (offset+2)(Rsrc), Rtmp; \
56
-  MOVBU Rtmp, (offset+2)(Rdst); \
57
-  MOVBU (offset+3)(Rsrc), Rtmp; \
58
-  MOVBU Rtmp, (offset+3)(Rdst)
59
-
60
-TEXT poly1305_blocks_armv6<>(SB),4,$-4
61
-  MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
62
-  SUB $128, R13
63
-  MOVW R0, 36(R13)
64
-  MOVW R1, 40(R13)
65
-  MOVW R2, 44(R13)
66
-  MOVW R1, R14
67
-  MOVW R2, R12
68
-  MOVW 56(R0), R8
69
-  WORD $0xe1180008 // TST R8, R8 not working see issue 5921
70
-  EOR R6, R6, R6
71
-  MOVW.EQ $(1<<24), R6
72
-  MOVW R6, 32(R13)
73
-  ADD $64, R13, g
74
-  MOVM.IA (R0), [R0-R9]
75
-  MOVM.IA [R0-R4], (g)
76
-  CMP $16, R12
77
-  BLO poly1305_blocks_armv6_done
78
-poly1305_blocks_armv6_mainloop:
79
-  WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
80
-  BEQ poly1305_blocks_armv6_mainloop_aligned
81
-  ADD $48, R13, g
82
-  MOVW_UNALIGNED(R14, g, R0, 0)
83
-  MOVW_UNALIGNED(R14, g, R0, 4)
84
-  MOVW_UNALIGNED(R14, g, R0, 8)
85
-  MOVW_UNALIGNED(R14, g, R0, 12)
86
-  MOVM.IA (g), [R0-R3]
87
-  ADD $16, R14
88
-  B poly1305_blocks_armv6_mainloop_loaded
89
-poly1305_blocks_armv6_mainloop_aligned:
90
-  MOVM.IA.W (R14), [R0-R3]
91
-poly1305_blocks_armv6_mainloop_loaded:
92
-  MOVW R0>>26, g
93
-  MOVW R1>>20, R11
94
-  MOVW R2>>14, R12
95
-  MOVW R14, 40(R13)
96
-  MOVW R3>>8, R4
97
-  ORR R1<<6, g, g
98
-  ORR R2<<12, R11, R11
99
-  ORR R3<<18, R12, R12
100
-  BIC $0xfc000000, R0, R0
101
-  BIC $0xfc000000, g, g
102
-  MOVW 32(R13), R3
103
-  BIC $0xfc000000, R11, R11
104
-  BIC $0xfc000000, R12, R12
105
-  ADD R0, R5, R5
106
-  ADD g, R6, R6
107
-  ORR R3, R4, R4
108
-  ADD R11, R7, R7
109
-  ADD $64, R13, R14
110
-  ADD R12, R8, R8
111
-  ADD R4, R9, R9
112
-  MOVM.IA (R14), [R0-R4]
113
-  MULLU R4, R5, (R11, g)
114
-  MULLU R3, R5, (R14, R12)
115
-  MULALU R3, R6, (R11, g)
116
-  MULALU R2, R6, (R14, R12)
117
-  MULALU R2, R7, (R11, g)
118
-  MULALU R1, R7, (R14, R12)
119
-  ADD R4<<2, R4, R4
120
-  ADD R3<<2, R3, R3
121
-  MULALU R1, R8, (R11, g)
122
-  MULALU R0, R8, (R14, R12)
123
-  MULALU R0, R9, (R11, g)
124
-  MULALU R4, R9, (R14, R12)
125
-  MOVW g, 24(R13)
126
-  MOVW R11, 28(R13)
127
-  MOVW R12, 16(R13)
128
-  MOVW R14, 20(R13)
129
-  MULLU R2, R5, (R11, g)
130
-  MULLU R1, R5, (R14, R12)
131
-  MULALU R1, R6, (R11, g)
132
-  MULALU R0, R6, (R14, R12)
133
-  MULALU R0, R7, (R11, g)
134
-  MULALU R4, R7, (R14, R12)
135
-  ADD R2<<2, R2, R2
136
-  ADD R1<<2, R1, R1
137
-  MULALU R4, R8, (R11, g)
138
-  MULALU R3, R8, (R14, R12)
139
-  MULALU R3, R9, (R11, g)
140
-  MULALU R2, R9, (R14, R12)
141
-  MOVW g, 8(R13)
142
-  MOVW R11, 12(R13)
143
-  MOVW R12, 0(R13)
144
-  MOVW R14, w+4(SP)
145
-  MULLU R0, R5, (R11, g)
146
-  MULALU R4, R6, (R11, g)
147
-  MULALU R3, R7, (R11, g)
148
-  MULALU R2, R8, (R11, g)
149
-  MULALU R1, R9, (R11, g)
150
-  MOVM.IA (R13), [R0-R7]
151
-  MOVW g>>26, R12
152
-  MOVW R4>>26, R14
153
-  ORR R11<<6, R12, R12
154
-  ORR R5<<6, R14, R14
155
-  BIC $0xfc000000, g, g
156
-  BIC $0xfc000000, R4, R4
157
-  ADD.S R12, R0, R0
158
-  ADC $0, R1, R1
159
-  ADD.S R14, R6, R6
160
-  ADC $0, R7, R7
161
-  MOVW R0>>26, R12
162
-  MOVW R6>>26, R14
163
-  ORR R1<<6, R12, R12
164
-  ORR R7<<6, R14, R14
165
-  BIC $0xfc000000, R0, R0
166
-  BIC $0xfc000000, R6, R6
167
-  ADD R14<<2, R14, R14
168
-  ADD.S R12, R2, R2
169
-  ADC $0, R3, R3
170
-  ADD R14, g, g
171
-  MOVW R2>>26, R12
172
-  MOVW g>>26, R14
173
-  ORR R3<<6, R12, R12
174
-  BIC $0xfc000000, g, R5
175
-  BIC $0xfc000000, R2, R7
176
-  ADD R12, R4, R4
177
-  ADD R14, R0, R0
178
-  MOVW R4>>26, R12
179
-  BIC $0xfc000000, R4, R8
180
-  ADD R12, R6, R9
181
-  MOVW w+44(SP), R12
182
-  MOVW w+40(SP), R14
183
-  MOVW R0, R6
184
-  CMP $32, R12
185
-  SUB $16, R12, R12
186
-  MOVW R12, 44(R13)
187
-  BHS poly1305_blocks_armv6_mainloop
188
-poly1305_blocks_armv6_done:
189
-  MOVW 36(R13), R12
190
-  MOVW R5, 20(R12)
191
-  MOVW R6, 24(R12)
192
-  MOVW R7, 28(R12)
193
-  MOVW R8, 32(R12)
194
-  MOVW R9, 36(R12)
195
-  ADD $128, R13, R13
196
-  MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
197
-  RET
198
-
199
-#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
200
-  MOVBU.P 1(Rsrc), Rtmp; \
201
-  MOVBU.P Rtmp, 1(Rdst); \
202
-  MOVBU.P 1(Rsrc), Rtmp; \
203
-  MOVBU.P Rtmp, 1(Rdst)
204
-
205
-#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
206
-  MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
207
-  MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
208
-
209
-TEXT poly1305_finish_ext_armv6<>(SB),4,$-4
210
-  MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
211
-  SUB $16, R13, R13
212
-  MOVW R0, R5
213
-  MOVW R1, R6
214
-  MOVW R2, R7
215
-  MOVW R3, R8
216
-  AND.S R2, R2, R2
217
-  BEQ poly1305_finish_ext_armv6_noremaining
218
-  EOR R0, R0
219
-  MOVW R13, R9
220
-  MOVW R0, 0(R13)
221
-  MOVW R0, 4(R13)
222
-  MOVW R0, 8(R13)
223
-  MOVW R0, 12(R13)
224
-  WORD $0xe3110003 // TST R1, #3 not working see issue 5921
225
-  BEQ poly1305_finish_ext_armv6_aligned
226
-  WORD $0xe3120008 // TST R2, #8 not working see issue 5921
227
-  BEQ poly1305_finish_ext_armv6_skip8
228
-  MOVWP_UNALIGNED(R1, R9, g)
229
-  MOVWP_UNALIGNED(R1, R9, g)
230
-poly1305_finish_ext_armv6_skip8:
231
-  WORD $0xe3120004 // TST $4, R2 not working see issue 5921
232
-  BEQ poly1305_finish_ext_armv6_skip4
233
-  MOVWP_UNALIGNED(R1, R9, g)
234
-poly1305_finish_ext_armv6_skip4:
235
-  WORD $0xe3120002 // TST $2, R2 not working see issue 5921
236
-  BEQ poly1305_finish_ext_armv6_skip2
237
-  MOVHUP_UNALIGNED(R1, R9, g)
238
-  B poly1305_finish_ext_armv6_skip2
239
-poly1305_finish_ext_armv6_aligned:
240
-  WORD $0xe3120008 // TST R2, #8 not working see issue 5921
241
-  BEQ poly1305_finish_ext_armv6_skip8_aligned
242
-  MOVM.IA.W (R1), [g-R11]
243
-  MOVM.IA.W [g-R11], (R9)
244
-poly1305_finish_ext_armv6_skip8_aligned:
245
-  WORD $0xe3120004 // TST $4, R2 not working see issue 5921
246
-  BEQ poly1305_finish_ext_armv6_skip4_aligned
247
-  MOVW.P 4(R1), g
248
-  MOVW.P g, 4(R9)
249
-poly1305_finish_ext_armv6_skip4_aligned:
250
-  WORD $0xe3120002 // TST $2, R2 not working see issue 5921
251
-  BEQ poly1305_finish_ext_armv6_skip2
252
-  MOVHU.P 2(R1), g
253
-  MOVH.P g, 2(R9)
254
-poly1305_finish_ext_armv6_skip2:
255
-  WORD $0xe3120001 // TST $1, R2 not working see issue 5921
256
-  BEQ poly1305_finish_ext_armv6_skip1
257
-  MOVBU.P 1(R1), g
258
-  MOVBU.P g, 1(R9)
259
-poly1305_finish_ext_armv6_skip1:
260
-  MOVW $1, R11
261
-  MOVBU R11, 0(R9)
262
-  MOVW R11, 56(R5)
263
-  MOVW R5, R0
264
-  MOVW R13, R1
265
-  MOVW $16, R2
266
-  BL poly1305_blocks_armv6<>(SB)
267
-poly1305_finish_ext_armv6_noremaining:
268
-  MOVW 20(R5), R0
269
-  MOVW 24(R5), R1
270
-  MOVW 28(R5), R2
271
-  MOVW 32(R5), R3
272
-  MOVW 36(R5), R4
273
-  MOVW R4>>26, R12
274
-  BIC $0xfc000000, R4, R4
275
-  ADD R12<<2, R12, R12
276
-  ADD R12, R0, R0
277
-  MOVW R0>>26, R12
278
-  BIC $0xfc000000, R0, R0
279
-  ADD R12, R1, R1
280
-  MOVW R1>>26, R12
281
-  BIC $0xfc000000, R1, R1
282
-  ADD R12, R2, R2
283
-  MOVW R2>>26, R12
284
-  BIC $0xfc000000, R2, R2
285
-  ADD R12, R3, R3
286
-  MOVW R3>>26, R12
287
-  BIC $0xfc000000, R3, R3
288
-  ADD R12, R4, R4
289
-  ADD $5, R0, R6
290
-  MOVW R6>>26, R12
291
-  BIC $0xfc000000, R6, R6
292
-  ADD R12, R1, R7
293
-  MOVW R7>>26, R12
294
-  BIC $0xfc000000, R7, R7
295
-  ADD R12, R2, g
296
-  MOVW g>>26, R12
297
-  BIC $0xfc000000, g, g
298
-  ADD R12, R3, R11
299
-  MOVW $-(1<<26), R12
300
-  ADD R11>>26, R12, R12
301
-  BIC $0xfc000000, R11, R11
302
-  ADD R12, R4, R14
303
-  MOVW R14>>31, R12
304
-  SUB $1, R12
305
-  AND R12, R6, R6
306
-  AND R12, R7, R7
307
-  AND R12, g, g
308
-  AND R12, R11, R11
309
-  AND R12, R14, R14
310
-  MVN R12, R12
311
-  AND R12, R0, R0
312
-  AND R12, R1, R1
313
-  AND R12, R2, R2
314
-  AND R12, R3, R3
315
-  AND R12, R4, R4
316
-  ORR R6, R0, R0
317
-  ORR R7, R1, R1
318
-  ORR g, R2, R2
319
-  ORR R11, R3, R3
320
-  ORR R14, R4, R4
321
-  ORR R1<<26, R0, R0
322
-  MOVW R1>>6, R1
323
-  ORR R2<<20, R1, R1
324
-  MOVW R2>>12, R2
325
-  ORR R3<<14, R2, R2
326
-  MOVW R3>>18, R3
327
-  ORR R4<<8, R3, R3
328
-  MOVW 40(R5), R6
329
-  MOVW 44(R5), R7
330
-  MOVW 48(R5), g
331
-  MOVW 52(R5), R11
332
-  ADD.S R6, R0, R0
333
-  ADC.S R7, R1, R1
334
-  ADC.S g, R2, R2
335
-  ADC.S R11, R3, R3
336
-  MOVM.IA [R0-R3], (R8)
337
-  MOVW R5, R12
338
-  EOR R0, R0, R0
339
-  EOR R1, R1, R1
340
-  EOR R2, R2, R2
341
-  EOR R3, R3, R3
342
-  EOR R4, R4, R4
343
-  EOR R5, R5, R5
344
-  EOR R6, R6, R6
345
-  EOR R7, R7, R7
346
-  MOVM.IA.W [R0-R7], (R12)
347
-  MOVM.IA [R0-R7], (R12)
348
-  ADD $16, R13, R13
349
-  MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
350
-  RET
351
-
352
-// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
353
-TEXT ·poly1305_auth_armv6(SB),0,$280-16
354
-  MOVW  out+0(FP), R4
355
-  MOVW  m+4(FP), R5
356
-  MOVW  mlen+8(FP), R6
357
-  MOVW  key+12(FP), R7
358
-
359
-  MOVW R13, R8
360
-  BIC $63, R13
361
-  SUB $64, R13, R13
362
-  MOVW  R13, R0
363
-  MOVW  R7, R1
364
-  BL poly1305_init_ext_armv6<>(SB)
365
-  BIC.S $15, R6, R2
366
-  BEQ poly1305_auth_armv6_noblocks
367
-  MOVW R13, R0
368
-  MOVW R5, R1
369
-  ADD R2, R5, R5
370
-  SUB R2, R6, R6
371
-  BL poly1305_blocks_armv6<>(SB)
372
-poly1305_auth_armv6_noblocks:
373
-  MOVW R13, R0
374
-  MOVW R5, R1
375
-  MOVW R6, R2
376
-  MOVW R4, R3
377
-  BL poly1305_finish_ext_armv6<>(SB)
378
-  MOVW R8, R13
379
-  RET
... ...
@@ -6,10 +6,8 @@
6 6
 
7 7
 package poly1305
8 8
 
9
-// This function is implemented in poly1305_amd64.s
10
-
9
+// This function is implemented in sum_amd64.s
11 10
 //go:noescape
12
-
13 11
 func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
14 12
 
15 13
 // Sum generates an authenticator for m using a one-time key and puts the
16 14
new file mode 100644
... ...
@@ -0,0 +1,125 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build amd64,!gccgo,!appengine
5
+
6
+#include "textflag.h"
7
+
8
+#define POLY1305_ADD(msg, h0, h1, h2) \
9
+	ADDQ 0(msg), h0;  \
10
+	ADCQ 8(msg), h1;  \
11
+	ADCQ $1, h2;      \
12
+	LEAQ 16(msg), msg
13
+
14
+#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
15
+	MOVQ  r0, AX;                  \
16
+	MULQ  h0;                      \
17
+	MOVQ  AX, t0;                  \
18
+	MOVQ  DX, t1;                  \
19
+	MOVQ  r0, AX;                  \
20
+	MULQ  h1;                      \
21
+	ADDQ  AX, t1;                  \
22
+	ADCQ  $0, DX;                  \
23
+	MOVQ  r0, t2;                  \
24
+	IMULQ h2, t2;                  \
25
+	ADDQ  DX, t2;                  \
26
+	                               \
27
+	MOVQ  r1, AX;                  \
28
+	MULQ  h0;                      \
29
+	ADDQ  AX, t1;                  \
30
+	ADCQ  $0, DX;                  \
31
+	MOVQ  DX, h0;                  \
32
+	MOVQ  r1, t3;                  \
33
+	IMULQ h2, t3;                  \
34
+	MOVQ  r1, AX;                  \
35
+	MULQ  h1;                      \
36
+	ADDQ  AX, t2;                  \
37
+	ADCQ  DX, t3;                  \
38
+	ADDQ  h0, t2;                  \
39
+	ADCQ  $0, t3;                  \
40
+	                               \
41
+	MOVQ  t0, h0;                  \
42
+	MOVQ  t1, h1;                  \
43
+	MOVQ  t2, h2;                  \
44
+	ANDQ  $3, h2;                  \
45
+	MOVQ  t2, t0;                  \
46
+	ANDQ  $0xFFFFFFFFFFFFFFFC, t0; \
47
+	ADDQ  t0, h0;                  \
48
+	ADCQ  t3, h1;                  \
49
+	ADCQ  $0, h2;                  \
50
+	SHRQ  $2, t3, t2;              \
51
+	SHRQ  $2, t3;                  \
52
+	ADDQ  t2, h0;                  \
53
+	ADCQ  t3, h1;                  \
54
+	ADCQ  $0, h2
55
+
56
+DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
57
+DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
58
+GLOBL ·poly1305Mask<>(SB), RODATA, $16
59
+
60
+// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
61
+TEXT ·poly1305(SB), $0-32
62
+	MOVQ out+0(FP), DI
63
+	MOVQ m+8(FP), SI
64
+	MOVQ mlen+16(FP), R15
65
+	MOVQ key+24(FP), AX
66
+
67
+	MOVQ 0(AX), R11
68
+	MOVQ 8(AX), R12
69
+	ANDQ ·poly1305Mask<>(SB), R11   // r0
70
+	ANDQ ·poly1305Mask<>+8(SB), R12 // r1
71
+	XORQ R8, R8                    // h0
72
+	XORQ R9, R9                    // h1
73
+	XORQ R10, R10                  // h2
74
+
75
+	CMPQ R15, $16
76
+	JB   bytes_between_0_and_15
77
+
78
+loop:
79
+	POLY1305_ADD(SI, R8, R9, R10)
80
+
81
+multiply:
82
+	POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
83
+	SUBQ $16, R15
84
+	CMPQ R15, $16
85
+	JAE  loop
86
+
87
+bytes_between_0_and_15:
88
+	TESTQ R15, R15
89
+	JZ    done
90
+	MOVQ  $1, BX
91
+	XORQ  CX, CX
92
+	XORQ  R13, R13
93
+	ADDQ  R15, SI
94
+
95
+flush_buffer:
96
+	SHLQ $8, BX, CX
97
+	SHLQ $8, BX
98
+	MOVB -1(SI), R13
99
+	XORQ R13, BX
100
+	DECQ SI
101
+	DECQ R15
102
+	JNZ  flush_buffer
103
+
104
+	ADDQ BX, R8
105
+	ADCQ CX, R9
106
+	ADCQ $0, R10
107
+	MOVQ $16, R15
108
+	JMP  multiply
109
+
110
+done:
111
+	MOVQ    R8, AX
112
+	MOVQ    R9, BX
113
+	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
114
+	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
115
+	SBBQ    $3, R10
116
+	CMOVQCS R8, AX
117
+	CMOVQCS R9, BX
118
+	MOVQ    key+24(FP), R8
119
+	ADDQ    16(R8), AX
120
+	ADCQ    24(R8), BX
121
+
122
+	MOVQ AX, 0(DI)
123
+	MOVQ BX, 8(DI)
124
+	RET
... ...
@@ -2,14 +2,12 @@
2 2
 // Use of this source code is governed by a BSD-style
3 3
 // license that can be found in the LICENSE file.
4 4
 
5
-// +build arm,!gccgo,!appengine
5
+// +build arm,!gccgo,!appengine,!nacl
6 6
 
7 7
 package poly1305
8 8
 
9
-// This function is implemented in poly1305_arm.s
10
-
9
+// This function is implemented in sum_arm.s
11 10
 //go:noescape
12
-
13 11
 func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)
14 12
 
15 13
 // Sum generates an authenticator for m using a one-time key and puts the
16 14
new file mode 100644
... ...
@@ -0,0 +1,427 @@
0
+// Copyright 2015 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build arm,!gccgo,!appengine,!nacl
5
+
6
+#include "textflag.h"
7
+
8
+// This code was translated into a form compatible with 5a from the public
9
+// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
10
+
11
+DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
12
+DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
13
+DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
14
+DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
15
+DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
16
+GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20
17
+
18
+// Warning: the linker may use R11 to synthesize certain instructions. Please
19
+// take care and verify that no synthetic instructions use it.
20
+
21
+TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0
22
+	// Needs 16 bytes of stack and 64 bytes of space pointed to by R0.  (It
23
+	// might look like it's only 60 bytes of space but the final four bytes
24
+	// will be written by another function.) We need to skip over four
25
+	// bytes of stack because that's saving the value of 'g'.
26
+	ADD       $4, R13, R8
27
+	MOVM.IB   [R4-R7], (R8)
28
+	MOVM.IA.W (R1), [R2-R5]
29
+	MOVW      $·poly1305_init_constants_armv6<>(SB), R7
30
+	MOVW      R2, R8
31
+	MOVW      R2>>26, R9
32
+	MOVW      R3>>20, g
33
+	MOVW      R4>>14, R11
34
+	MOVW      R5>>8, R12
35
+	ORR       R3<<6, R9, R9
36
+	ORR       R4<<12, g, g
37
+	ORR       R5<<18, R11, R11
38
+	MOVM.IA   (R7), [R2-R6]
39
+	AND       R8, R2, R2
40
+	AND       R9, R3, R3
41
+	AND       g, R4, R4
42
+	AND       R11, R5, R5
43
+	AND       R12, R6, R6
44
+	MOVM.IA.W [R2-R6], (R0)
45
+	EOR       R2, R2, R2
46
+	EOR       R3, R3, R3
47
+	EOR       R4, R4, R4
48
+	EOR       R5, R5, R5
49
+	EOR       R6, R6, R6
50
+	MOVM.IA.W [R2-R6], (R0)
51
+	MOVM.IA.W (R1), [R2-R5]
52
+	MOVM.IA   [R2-R6], (R0)
53
+	ADD       $20, R13, R0
54
+	MOVM.DA   (R0), [R4-R7]
55
+	RET
56
+
57
+#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
58
+	MOVBU (offset+0)(Rsrc), Rtmp; \
59
+	MOVBU Rtmp, (offset+0)(Rdst); \
60
+	MOVBU (offset+1)(Rsrc), Rtmp; \
61
+	MOVBU Rtmp, (offset+1)(Rdst); \
62
+	MOVBU (offset+2)(Rsrc), Rtmp; \
63
+	MOVBU Rtmp, (offset+2)(Rdst); \
64
+	MOVBU (offset+3)(Rsrc), Rtmp; \
65
+	MOVBU Rtmp, (offset+3)(Rdst)
66
+
67
+TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0
68
+	// Needs 24 bytes of stack for saved registers and then 88 bytes of
69
+	// scratch space after that. We assume that 24 bytes at (R13) have
70
+	// already been used: four bytes for the link register saved in the
71
+	// prelude of poly1305_auth_armv6, four bytes for saving the value of g
72
+	// in that function and 16 bytes of scratch space used around
73
+	// poly1305_finish_ext_armv6_skip1.
74
+	ADD     $24, R13, R12
75
+	MOVM.IB [R4-R8, R14], (R12)
76
+	MOVW    R0, 88(R13)
77
+	MOVW    R1, 92(R13)
78
+	MOVW    R2, 96(R13)
79
+	MOVW    R1, R14
80
+	MOVW    R2, R12
81
+	MOVW    56(R0), R8
82
+	WORD    $0xe1180008                // TST R8, R8 not working see issue 5921
83
+	EOR     R6, R6, R6
84
+	MOVW.EQ $(1<<24), R6
85
+	MOVW    R6, 84(R13)
86
+	ADD     $116, R13, g
87
+	MOVM.IA (R0), [R0-R9]
88
+	MOVM.IA [R0-R4], (g)
89
+	CMP     $16, R12
90
+	BLO     poly1305_blocks_armv6_done
91
+
92
+poly1305_blocks_armv6_mainloop:
93
+	WORD    $0xe31e0003                            // TST R14, #3 not working see issue 5921
94
+	BEQ     poly1305_blocks_armv6_mainloop_aligned
95
+	ADD     $100, R13, g
96
+	MOVW_UNALIGNED(R14, g, R0, 0)
97
+	MOVW_UNALIGNED(R14, g, R0, 4)
98
+	MOVW_UNALIGNED(R14, g, R0, 8)
99
+	MOVW_UNALIGNED(R14, g, R0, 12)
100
+	MOVM.IA (g), [R0-R3]
101
+	ADD     $16, R14
102
+	B       poly1305_blocks_armv6_mainloop_loaded
103
+
104
+poly1305_blocks_armv6_mainloop_aligned:
105
+	MOVM.IA.W (R14), [R0-R3]
106
+
107
+poly1305_blocks_armv6_mainloop_loaded:
108
+	MOVW    R0>>26, g
109
+	MOVW    R1>>20, R11
110
+	MOVW    R2>>14, R12
111
+	MOVW    R14, 92(R13)
112
+	MOVW    R3>>8, R4
113
+	ORR     R1<<6, g, g
114
+	ORR     R2<<12, R11, R11
115
+	ORR     R3<<18, R12, R12
116
+	BIC     $0xfc000000, R0, R0
117
+	BIC     $0xfc000000, g, g
118
+	MOVW    84(R13), R3
119
+	BIC     $0xfc000000, R11, R11
120
+	BIC     $0xfc000000, R12, R12
121
+	ADD     R0, R5, R5
122
+	ADD     g, R6, R6
123
+	ORR     R3, R4, R4
124
+	ADD     R11, R7, R7
125
+	ADD     $116, R13, R14
126
+	ADD     R12, R8, R8
127
+	ADD     R4, R9, R9
128
+	MOVM.IA (R14), [R0-R4]
129
+	MULLU   R4, R5, (R11, g)
130
+	MULLU   R3, R5, (R14, R12)
131
+	MULALU  R3, R6, (R11, g)
132
+	MULALU  R2, R6, (R14, R12)
133
+	MULALU  R2, R7, (R11, g)
134
+	MULALU  R1, R7, (R14, R12)
135
+	ADD     R4<<2, R4, R4
136
+	ADD     R3<<2, R3, R3
137
+	MULALU  R1, R8, (R11, g)
138
+	MULALU  R0, R8, (R14, R12)
139
+	MULALU  R0, R9, (R11, g)
140
+	MULALU  R4, R9, (R14, R12)
141
+	MOVW    g, 76(R13)
142
+	MOVW    R11, 80(R13)
143
+	MOVW    R12, 68(R13)
144
+	MOVW    R14, 72(R13)
145
+	MULLU   R2, R5, (R11, g)
146
+	MULLU   R1, R5, (R14, R12)
147
+	MULALU  R1, R6, (R11, g)
148
+	MULALU  R0, R6, (R14, R12)
149
+	MULALU  R0, R7, (R11, g)
150
+	MULALU  R4, R7, (R14, R12)
151
+	ADD     R2<<2, R2, R2
152
+	ADD     R1<<2, R1, R1
153
+	MULALU  R4, R8, (R11, g)
154
+	MULALU  R3, R8, (R14, R12)
155
+	MULALU  R3, R9, (R11, g)
156
+	MULALU  R2, R9, (R14, R12)
157
+	MOVW    g, 60(R13)
158
+	MOVW    R11, 64(R13)
159
+	MOVW    R12, 52(R13)
160
+	MOVW    R14, 56(R13)
161
+	MULLU   R0, R5, (R11, g)
162
+	MULALU  R4, R6, (R11, g)
163
+	MULALU  R3, R7, (R11, g)
164
+	MULALU  R2, R8, (R11, g)
165
+	MULALU  R1, R9, (R11, g)
166
+	ADD     $52, R13, R0
167
+	MOVM.IA (R0), [R0-R7]
168
+	MOVW    g>>26, R12
169
+	MOVW    R4>>26, R14
170
+	ORR     R11<<6, R12, R12
171
+	ORR     R5<<6, R14, R14
172
+	BIC     $0xfc000000, g, g
173
+	BIC     $0xfc000000, R4, R4
174
+	ADD.S   R12, R0, R0
175
+	ADC     $0, R1, R1
176
+	ADD.S   R14, R6, R6
177
+	ADC     $0, R7, R7
178
+	MOVW    R0>>26, R12
179
+	MOVW    R6>>26, R14
180
+	ORR     R1<<6, R12, R12
181
+	ORR     R7<<6, R14, R14
182
+	BIC     $0xfc000000, R0, R0
183
+	BIC     $0xfc000000, R6, R6
184
+	ADD     R14<<2, R14, R14
185
+	ADD.S   R12, R2, R2
186
+	ADC     $0, R3, R3
187
+	ADD     R14, g, g
188
+	MOVW    R2>>26, R12
189
+	MOVW    g>>26, R14
190
+	ORR     R3<<6, R12, R12
191
+	BIC     $0xfc000000, g, R5
192
+	BIC     $0xfc000000, R2, R7
193
+	ADD     R12, R4, R4
194
+	ADD     R14, R0, R0
195
+	MOVW    R4>>26, R12
196
+	BIC     $0xfc000000, R4, R8
197
+	ADD     R12, R6, R9
198
+	MOVW    96(R13), R12
199
+	MOVW    92(R13), R14
200
+	MOVW    R0, R6
201
+	CMP     $32, R12
202
+	SUB     $16, R12, R12
203
+	MOVW    R12, 96(R13)
204
+	BHS     poly1305_blocks_armv6_mainloop
205
+
206
+poly1305_blocks_armv6_done:
207
+	MOVW    88(R13), R12
208
+	MOVW    R5, 20(R12)
209
+	MOVW    R6, 24(R12)
210
+	MOVW    R7, 28(R12)
211
+	MOVW    R8, 32(R12)
212
+	MOVW    R9, 36(R12)
213
+	ADD     $48, R13, R0
214
+	MOVM.DA (R0), [R4-R8, R14]
215
+	RET
216
+
217
+#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
218
+	MOVBU.P 1(Rsrc), Rtmp; \
219
+	MOVBU.P Rtmp, 1(Rdst); \
220
+	MOVBU.P 1(Rsrc), Rtmp; \
221
+	MOVBU.P Rtmp, 1(Rdst)
222
+
223
+#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
224
+	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
225
+	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
226
+
227
+// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
228
+TEXT ·poly1305_auth_armv6(SB), $196-16
229
+	// The value 196, just above, is the sum of 64 (the size of the context
230
+	// structure) and 132 (the amount of stack needed).
231
+	//
232
+	// At this point, the stack pointer (R13) has been moved down. It
233
+	// points to the saved link register and there's 196 bytes of free
234
+	// space above it.
235
+	//
236
+	// The stack for this function looks like:
237
+	//
238
+	// +---------------------
239
+	// |
240
+	// | 64 bytes of context structure
241
+	// |
242
+	// +---------------------
243
+	// |
244
+	// | 112 bytes for poly1305_blocks_armv6
245
+	// |
246
+	// +---------------------
247
+	// | 16 bytes of final block, constructed at
248
+	// | poly1305_finish_ext_armv6_skip8
249
+	// +---------------------
250
+	// | four bytes of saved 'g'
251
+	// +---------------------
252
+	// | lr, saved by prelude    <- R13 points here
253
+	// +---------------------
254
+	MOVW g, 4(R13)
255
+
256
+	MOVW out+0(FP), R4
257
+	MOVW m+4(FP), R5
258
+	MOVW mlen+8(FP), R6
259
+	MOVW key+12(FP), R7
260
+
261
+	ADD  $136, R13, R0 // 136 = 4 + 4 + 16 + 112
262
+	MOVW R7, R1
263
+
264
+	// poly1305_init_ext_armv6 will write to the stack from R13+4, but
265
+	// that's ok because none of the other values have been written yet.
266
+	BL    poly1305_init_ext_armv6<>(SB)
267
+	BIC.S $15, R6, R2
268
+	BEQ   poly1305_auth_armv6_noblocks
269
+	ADD   $136, R13, R0
270
+	MOVW  R5, R1
271
+	ADD   R2, R5, R5
272
+	SUB   R2, R6, R6
273
+	BL    poly1305_blocks_armv6<>(SB)
274
+
275
+poly1305_auth_armv6_noblocks:
276
+	ADD  $136, R13, R0
277
+	MOVW R5, R1
278
+	MOVW R6, R2
279
+	MOVW R4, R3
280
+
281
+	MOVW  R0, R5
282
+	MOVW  R1, R6
283
+	MOVW  R2, R7
284
+	MOVW  R3, R8
285
+	AND.S R2, R2, R2
286
+	BEQ   poly1305_finish_ext_armv6_noremaining
287
+	EOR   R0, R0
288
+	ADD   $8, R13, R9                           // 8 = offset to 16 byte scratch space
289
+	MOVW  R0, (R9)
290
+	MOVW  R0, 4(R9)
291
+	MOVW  R0, 8(R9)
292
+	MOVW  R0, 12(R9)
293
+	WORD  $0xe3110003                           // TST R1, #3 not working see issue 5921
294
+	BEQ   poly1305_finish_ext_armv6_aligned
295
+	WORD  $0xe3120008                           // TST R2, #8 not working see issue 5921
296
+	BEQ   poly1305_finish_ext_armv6_skip8
297
+	MOVWP_UNALIGNED(R1, R9, g)
298
+	MOVWP_UNALIGNED(R1, R9, g)
299
+
300
+poly1305_finish_ext_armv6_skip8:
301
+	WORD $0xe3120004                     // TST $4, R2 not working see issue 5921
302
+	BEQ  poly1305_finish_ext_armv6_skip4
303
+	MOVWP_UNALIGNED(R1, R9, g)
304
+
305
+poly1305_finish_ext_armv6_skip4:
306
+	WORD $0xe3120002                     // TST $2, R2 not working see issue 5921
307
+	BEQ  poly1305_finish_ext_armv6_skip2
308
+	MOVHUP_UNALIGNED(R1, R9, g)
309
+	B    poly1305_finish_ext_armv6_skip2
310
+
311
+poly1305_finish_ext_armv6_aligned:
312
+	WORD      $0xe3120008                             // TST R2, #8 not working see issue 5921
313
+	BEQ       poly1305_finish_ext_armv6_skip8_aligned
314
+	MOVM.IA.W (R1), [g-R11]
315
+	MOVM.IA.W [g-R11], (R9)
316
+
317
+poly1305_finish_ext_armv6_skip8_aligned:
318
+	WORD   $0xe3120004                             // TST $4, R2 not working see issue 5921
319
+	BEQ    poly1305_finish_ext_armv6_skip4_aligned
320
+	MOVW.P 4(R1), g
321
+	MOVW.P g, 4(R9)
322
+
323
+poly1305_finish_ext_armv6_skip4_aligned:
324
+	WORD    $0xe3120002                     // TST $2, R2 not working see issue 5921
325
+	BEQ     poly1305_finish_ext_armv6_skip2
326
+	MOVHU.P 2(R1), g
327
+	MOVH.P  g, 2(R9)
328
+
329
+poly1305_finish_ext_armv6_skip2:
330
+	WORD    $0xe3120001                     // TST $1, R2 not working see issue 5921
331
+	BEQ     poly1305_finish_ext_armv6_skip1
332
+	MOVBU.P 1(R1), g
333
+	MOVBU.P g, 1(R9)
334
+
335
+poly1305_finish_ext_armv6_skip1:
336
+	MOVW  $1, R11
337
+	MOVBU R11, 0(R9)
338
+	MOVW  R11, 56(R5)
339
+	MOVW  R5, R0
340
+	ADD   $8, R13, R1
341
+	MOVW  $16, R2
342
+	BL    poly1305_blocks_armv6<>(SB)
343
+
344
+poly1305_finish_ext_armv6_noremaining:
345
+	MOVW      20(R5), R0
346
+	MOVW      24(R5), R1
347
+	MOVW      28(R5), R2
348
+	MOVW      32(R5), R3
349
+	MOVW      36(R5), R4
350
+	MOVW      R4>>26, R12
351
+	BIC       $0xfc000000, R4, R4
352
+	ADD       R12<<2, R12, R12
353
+	ADD       R12, R0, R0
354
+	MOVW      R0>>26, R12
355
+	BIC       $0xfc000000, R0, R0
356
+	ADD       R12, R1, R1
357
+	MOVW      R1>>26, R12
358
+	BIC       $0xfc000000, R1, R1
359
+	ADD       R12, R2, R2
360
+	MOVW      R2>>26, R12
361
+	BIC       $0xfc000000, R2, R2
362
+	ADD       R12, R3, R3
363
+	MOVW      R3>>26, R12
364
+	BIC       $0xfc000000, R3, R3
365
+	ADD       R12, R4, R4
366
+	ADD       $5, R0, R6
367
+	MOVW      R6>>26, R12
368
+	BIC       $0xfc000000, R6, R6
369
+	ADD       R12, R1, R7
370
+	MOVW      R7>>26, R12
371
+	BIC       $0xfc000000, R7, R7
372
+	ADD       R12, R2, g
373
+	MOVW      g>>26, R12
374
+	BIC       $0xfc000000, g, g
375
+	ADD       R12, R3, R11
376
+	MOVW      $-(1<<26), R12
377
+	ADD       R11>>26, R12, R12
378
+	BIC       $0xfc000000, R11, R11
379
+	ADD       R12, R4, R9
380
+	MOVW      R9>>31, R12
381
+	SUB       $1, R12
382
+	AND       R12, R6, R6
383
+	AND       R12, R7, R7
384
+	AND       R12, g, g
385
+	AND       R12, R11, R11
386
+	AND       R12, R9, R9
387
+	MVN       R12, R12
388
+	AND       R12, R0, R0
389
+	AND       R12, R1, R1
390
+	AND       R12, R2, R2
391
+	AND       R12, R3, R3
392
+	AND       R12, R4, R4
393
+	ORR       R6, R0, R0
394
+	ORR       R7, R1, R1
395
+	ORR       g, R2, R2
396
+	ORR       R11, R3, R3
397
+	ORR       R9, R4, R4
398
+	ORR       R1<<26, R0, R0
399
+	MOVW      R1>>6, R1
400
+	ORR       R2<<20, R1, R1
401
+	MOVW      R2>>12, R2
402
+	ORR       R3<<14, R2, R2
403
+	MOVW      R3>>18, R3
404
+	ORR       R4<<8, R3, R3
405
+	MOVW      40(R5), R6
406
+	MOVW      44(R5), R7
407
+	MOVW      48(R5), g
408
+	MOVW      52(R5), R11
409
+	ADD.S     R6, R0, R0
410
+	ADC.S     R7, R1, R1
411
+	ADC.S     g, R2, R2
412
+	ADC.S     R11, R3, R3
413
+	MOVM.IA   [R0-R3], (R8)
414
+	MOVW      R5, R12
415
+	EOR       R0, R0, R0
416
+	EOR       R1, R1, R1
417
+	EOR       R2, R2, R2
418
+	EOR       R3, R3, R3
419
+	EOR       R4, R4, R4
420
+	EOR       R5, R5, R5
421
+	EOR       R6, R6, R6
422
+	EOR       R7, R7, R7
423
+	MOVM.IA.W [R0-R7], (R12)
424
+	MOVM.IA   [R0-R7], (R12)
425
+	MOVW      4(R13), g
426
+	RET
... ...
@@ -2,1530 +2,140 @@
2 2
 // Use of this source code is governed by a BSD-style
3 3
 // license that can be found in the LICENSE file.
4 4
 
5
-// +build !amd64,!arm gccgo appengine
5
+// +build !amd64,!arm gccgo appengine nacl
6 6
 
7 7
 package poly1305
8 8
 
9
-// Based on original, public domain implementation from NaCl by D. J.
10
-// Bernstein.
9
+import "encoding/binary"
11 10
 
12
-import "math"
13
-
14
-const (
15
-	alpham80 = 0.00000000558793544769287109375
16
-	alpham48 = 24.0
17
-	alpham16 = 103079215104.0
18
-	alpha0   = 6755399441055744.0
19
-	alpha18  = 1770887431076116955136.0
20
-	alpha32  = 29014219670751100192948224.0
21
-	alpha50  = 7605903601369376408980219232256.0
22
-	alpha64  = 124615124604835863084731911901282304.0
23
-	alpha82  = 32667107224410092492483962313449748299776.0
24
-	alpha96  = 535217884764734955396857238543560676143529984.0
25
-	alpha112 = 35076039295941670036888435985190792471742381031424.0
26
-	alpha130 = 9194973245195333150150082162901855101712434733101613056.0
27
-	scale    = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125
28
-	offset0  = 6755408030990331.0
29
-	offset1  = 29014256564239239022116864.0
30
-	offset2  = 124615283061160854719918951570079744.0
31
-	offset3  = 535219245894202480694386063513315216128475136.0
32
-)
33
-
34
-// Sum generates an authenticator for m using a one-time key and puts the
11
+// Sum generates an authenticator for msg using a one-time key and puts the
35 12
 // 16-byte result into out. Authenticating two different messages with the same
36 13
 // key allows an attacker to forge messages at will.
37
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
38
-	r := key
39
-	s := key[16:]
14
+func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
40 15
 	var (
41
-		y7        float64
42
-		y6        float64
43
-		y1        float64
44
-		y0        float64
45
-		y5        float64
46
-		y4        float64
47
-		x7        float64
48
-		x6        float64
49
-		x1        float64
50
-		x0        float64
51
-		y3        float64
52
-		y2        float64
53
-		x5        float64
54
-		r3lowx0   float64
55
-		x4        float64
56
-		r0lowx6   float64
57
-		x3        float64
58
-		r3highx0  float64
59
-		x2        float64
60
-		r0highx6  float64
61
-		r0lowx0   float64
62
-		sr1lowx6  float64
63
-		r0highx0  float64
64
-		sr1highx6 float64
65
-		sr3low    float64
66
-		r1lowx0   float64
67
-		sr2lowx6  float64
68
-		r1highx0  float64
69
-		sr2highx6 float64
70
-		r2lowx0   float64
71
-		sr3lowx6  float64
72
-		r2highx0  float64
73
-		sr3highx6 float64
74
-		r1highx4  float64
75
-		r1lowx4   float64
76
-		r0highx4  float64
77
-		r0lowx4   float64
78
-		sr3highx4 float64
79
-		sr3lowx4  float64
80
-		sr2highx4 float64
81
-		sr2lowx4  float64
82
-		r0lowx2   float64
83
-		r0highx2  float64
84
-		r1lowx2   float64
85
-		r1highx2  float64
86
-		r2lowx2   float64
87
-		r2highx2  float64
88
-		sr3lowx2  float64
89
-		sr3highx2 float64
90
-		z0        float64
91
-		z1        float64
92
-		z2        float64
93
-		z3        float64
94
-		m0        int64
95
-		m1        int64
96
-		m2        int64
97
-		m3        int64
98
-		m00       uint32
99
-		m01       uint32
100
-		m02       uint32
101
-		m03       uint32
102
-		m10       uint32
103
-		m11       uint32
104
-		m12       uint32
105
-		m13       uint32
106
-		m20       uint32
107
-		m21       uint32
108
-		m22       uint32
109
-		m23       uint32
110
-		m30       uint32
111
-		m31       uint32
112
-		m32       uint32
113
-		m33       uint64
114
-		lbelow2   int32
115
-		lbelow3   int32
116
-		lbelow4   int32
117
-		lbelow5   int32
118
-		lbelow6   int32
119
-		lbelow7   int32
120
-		lbelow8   int32
121
-		lbelow9   int32
122
-		lbelow10  int32
123
-		lbelow11  int32
124
-		lbelow12  int32
125
-		lbelow13  int32
126
-		lbelow14  int32
127
-		lbelow15  int32
128
-		s00       uint32
129
-		s01       uint32
130
-		s02       uint32
131
-		s03       uint32
132
-		s10       uint32
133
-		s11       uint32
134
-		s12       uint32
135
-		s13       uint32
136
-		s20       uint32
137
-		s21       uint32
138
-		s22       uint32
139
-		s23       uint32
140
-		s30       uint32
141
-		s31       uint32
142
-		s32       uint32
143
-		s33       uint32
144
-		bits32    uint64
145
-		f         uint64
146
-		f0        uint64
147
-		f1        uint64
148
-		f2        uint64
149
-		f3        uint64
150
-		f4        uint64
151
-		g         uint64
152
-		g0        uint64
153
-		g1        uint64
154
-		g2        uint64
155
-		g3        uint64
156
-		g4        uint64
16
+		h0, h1, h2, h3, h4 uint32 // the hash accumulators
17
+		r0, r1, r2, r3, r4 uint64 // the r part of the key
157 18
 	)
158 19
 
159
-	var p int32
160
-
161
-	l := int32(len(m))
162
-
163
-	r00 := uint32(r[0])
164
-
165
-	r01 := uint32(r[1])
166
-
167
-	r02 := uint32(r[2])
168
-	r0 := int64(2151)
169
-
170
-	r03 := uint32(r[3])
171
-	r03 &= 15
172
-	r0 <<= 51
173
-
174
-	r10 := uint32(r[4])
175
-	r10 &= 252
176
-	r01 <<= 8
177
-	r0 += int64(r00)
178
-
179
-	r11 := uint32(r[5])
180
-	r02 <<= 16
181
-	r0 += int64(r01)
182
-
183
-	r12 := uint32(r[6])
184
-	r03 <<= 24
185
-	r0 += int64(r02)
186
-
187
-	r13 := uint32(r[7])
188
-	r13 &= 15
189
-	r1 := int64(2215)
190
-	r0 += int64(r03)
191
-
192
-	d0 := r0
193
-	r1 <<= 51
194
-	r2 := int64(2279)
195
-
196
-	r20 := uint32(r[8])
197
-	r20 &= 252
198
-	r11 <<= 8
199
-	r1 += int64(r10)
200
-
201
-	r21 := uint32(r[9])
202
-	r12 <<= 16
203
-	r1 += int64(r11)
204
-
205
-	r22 := uint32(r[10])
206
-	r13 <<= 24
207
-	r1 += int64(r12)
208
-
209
-	r23 := uint32(r[11])
210
-	r23 &= 15
211
-	r2 <<= 51
212
-	r1 += int64(r13)
213
-
214
-	d1 := r1
215
-	r21 <<= 8
216
-	r2 += int64(r20)
217
-
218
-	r30 := uint32(r[12])
219
-	r30 &= 252
220
-	r22 <<= 16
221
-	r2 += int64(r21)
222
-
223
-	r31 := uint32(r[13])
224
-	r23 <<= 24
225
-	r2 += int64(r22)
226
-
227
-	r32 := uint32(r[14])
228
-	r2 += int64(r23)
229
-	r3 := int64(2343)
230
-
231
-	d2 := r2
232
-	r3 <<= 51
233
-
234
-	r33 := uint32(r[15])
235
-	r33 &= 15
236
-	r31 <<= 8
237
-	r3 += int64(r30)
238
-
239
-	r32 <<= 16
240
-	r3 += int64(r31)
241
-
242
-	r33 <<= 24
243
-	r3 += int64(r32)
244
-
245
-	r3 += int64(r33)
246
-	h0 := alpha32 - alpha32
247
-
248
-	d3 := r3
249
-	h1 := alpha32 - alpha32
250
-
251
-	h2 := alpha32 - alpha32
252
-
253
-	h3 := alpha32 - alpha32
254
-
255
-	h4 := alpha32 - alpha32
256
-
257
-	r0low := math.Float64frombits(uint64(d0))
258
-	h5 := alpha32 - alpha32
259
-
260
-	r1low := math.Float64frombits(uint64(d1))
261
-	h6 := alpha32 - alpha32
262
-
263
-	r2low := math.Float64frombits(uint64(d2))
264
-	h7 := alpha32 - alpha32
265
-
266
-	r0low -= alpha0
267
-
268
-	r1low -= alpha32
269
-
270
-	r2low -= alpha64
271
-
272
-	r0high := r0low + alpha18
273
-
274
-	r3low := math.Float64frombits(uint64(d3))
275
-
276
-	r1high := r1low + alpha50
277
-	sr1low := scale * r1low
278
-
279
-	r2high := r2low + alpha82
280
-	sr2low := scale * r2low
281
-
282
-	r0high -= alpha18
283
-	r0high_stack := r0high
284
-
285
-	r3low -= alpha96
286
-
287
-	r1high -= alpha50
288
-	r1high_stack := r1high
289
-
290
-	sr1high := sr1low + alpham80
291
-
292
-	r0low -= r0high
293
-
294
-	r2high -= alpha82
295
-	sr3low = scale * r3low
296
-
297
-	sr2high := sr2low + alpham48
298
-
299
-	r1low -= r1high
300
-	r1low_stack := r1low
301
-
302
-	sr1high -= alpham80
303
-	sr1high_stack := sr1high
304
-
305
-	r2low -= r2high
306
-	r2low_stack := r2low
307
-
308
-	sr2high -= alpham48
309
-	sr2high_stack := sr2high
310
-
311
-	r3high := r3low + alpha112
312
-	r0low_stack := r0low
313
-
314
-	sr1low -= sr1high
315
-	sr1low_stack := sr1low
316
-
317
-	sr3high := sr3low + alpham16
318
-	r2high_stack := r2high
319
-
320
-	sr2low -= sr2high
321
-	sr2low_stack := sr2low
322
-
323
-	r3high -= alpha112
324
-	r3high_stack := r3high
325
-
326
-	sr3high -= alpham16
327
-	sr3high_stack := sr3high
328
-
329
-	r3low -= r3high
330
-	r3low_stack := r3low
331
-
332
-	sr3low -= sr3high
333
-	sr3low_stack := sr3low
334
-
335
-	if l < 16 {
336
-		goto addatmost15bytes
337
-	}
338
-
339
-	m00 = uint32(m[p+0])
340
-	m0 = 2151
341
-
342
-	m0 <<= 51
343
-	m1 = 2215
344
-	m01 = uint32(m[p+1])
345
-
346
-	m1 <<= 51
347
-	m2 = 2279
348
-	m02 = uint32(m[p+2])
349
-
350
-	m2 <<= 51
351
-	m3 = 2343
352
-	m03 = uint32(m[p+3])
353
-
354
-	m10 = uint32(m[p+4])
355
-	m01 <<= 8
356
-	m0 += int64(m00)
357
-
358
-	m11 = uint32(m[p+5])
359
-	m02 <<= 16
360
-	m0 += int64(m01)
361
-
362
-	m12 = uint32(m[p+6])
363
-	m03 <<= 24
364
-	m0 += int64(m02)
365
-
366
-	m13 = uint32(m[p+7])
367
-	m3 <<= 51
368
-	m0 += int64(m03)
369
-
370
-	m20 = uint32(m[p+8])
371
-	m11 <<= 8
372
-	m1 += int64(m10)
373
-
374
-	m21 = uint32(m[p+9])
375
-	m12 <<= 16
376
-	m1 += int64(m11)
377
-
378
-	m22 = uint32(m[p+10])
379
-	m13 <<= 24
380
-	m1 += int64(m12)
381
-
382
-	m23 = uint32(m[p+11])
383
-	m1 += int64(m13)
384
-
385
-	m30 = uint32(m[p+12])
386
-	m21 <<= 8
387
-	m2 += int64(m20)
388
-
389
-	m31 = uint32(m[p+13])
390
-	m22 <<= 16
391
-	m2 += int64(m21)
392
-
393
-	m32 = uint32(m[p+14])
394
-	m23 <<= 24
395
-	m2 += int64(m22)
396
-
397
-	m33 = uint64(m[p+15])
398
-	m2 += int64(m23)
399
-
400
-	d0 = m0
401
-	m31 <<= 8
402
-	m3 += int64(m30)
403
-
404
-	d1 = m1
405
-	m32 <<= 16
406
-	m3 += int64(m31)
407
-
408
-	d2 = m2
409
-	m33 += 256
410
-
411
-	m33 <<= 24
412
-	m3 += int64(m32)
413
-
414
-	m3 += int64(m33)
415
-	d3 = m3
416
-
417
-	p += 16
418
-	l -= 16
419
-
420
-	z0 = math.Float64frombits(uint64(d0))
421
-
422
-	z1 = math.Float64frombits(uint64(d1))
423
-
424
-	z2 = math.Float64frombits(uint64(d2))
425
-
426
-	z3 = math.Float64frombits(uint64(d3))
427
-
428
-	z0 -= alpha0
429
-
430
-	z1 -= alpha32
431
-
432
-	z2 -= alpha64
433
-
434
-	z3 -= alpha96
435
-
436
-	h0 += z0
437
-
438
-	h1 += z1
439
-
440
-	h3 += z2
441
-
442
-	h5 += z3
443
-
444
-	if l < 16 {
445
-		goto multiplyaddatmost15bytes
20
+	r0 = uint64(binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff)
21
+	r1 = uint64((binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03)
22
+	r2 = uint64((binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff)
23
+	r3 = uint64((binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff)
24
+	r4 = uint64((binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff)
25
+
26
+	R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
27
+
28
+	for len(msg) >= TagSize {
29
+		// h += msg
30
+		h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
31
+		h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
32
+		h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
33
+		h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
34
+		h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | (1 << 24)
35
+
36
+		// h *= r
37
+		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
38
+		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
39
+		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
40
+		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
41
+		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
42
+
43
+		// h %= p
44
+		h0 = uint32(d0) & 0x3ffffff
45
+		h1 = uint32(d1) & 0x3ffffff
46
+		h2 = uint32(d2) & 0x3ffffff
47
+		h3 = uint32(d3) & 0x3ffffff
48
+		h4 = uint32(d4) & 0x3ffffff
49
+
50
+		h0 += uint32(d4>>26) * 5
51
+		h1 += h0 >> 26
52
+		h0 = h0 & 0x3ffffff
53
+
54
+		msg = msg[TagSize:]
446 55
 	}
447 56
 
448
-multiplyaddatleast16bytes:
449
-
450
-	m2 = 2279
451
-	m20 = uint32(m[p+8])
452
-	y7 = h7 + alpha130
453
-
454
-	m2 <<= 51
455
-	m3 = 2343
456
-	m21 = uint32(m[p+9])
457
-	y6 = h6 + alpha130
458
-
459
-	m3 <<= 51
460
-	m0 = 2151
461
-	m22 = uint32(m[p+10])
462
-	y1 = h1 + alpha32
463
-
464
-	m0 <<= 51
465
-	m1 = 2215
466
-	m23 = uint32(m[p+11])
467
-	y0 = h0 + alpha32
468
-
469
-	m1 <<= 51
470
-	m30 = uint32(m[p+12])
471
-	y7 -= alpha130
472
-
473
-	m21 <<= 8
474
-	m2 += int64(m20)
475
-	m31 = uint32(m[p+13])
476
-	y6 -= alpha130
477
-
478
-	m22 <<= 16
479
-	m2 += int64(m21)
480
-	m32 = uint32(m[p+14])
481
-	y1 -= alpha32
482
-
483
-	m23 <<= 24
484
-	m2 += int64(m22)
485
-	m33 = uint64(m[p+15])
486
-	y0 -= alpha32
487
-
488
-	m2 += int64(m23)
489
-	m00 = uint32(m[p+0])
490
-	y5 = h5 + alpha96
491
-
492
-	m31 <<= 8
493
-	m3 += int64(m30)
494
-	m01 = uint32(m[p+1])
495
-	y4 = h4 + alpha96
496
-
497
-	m32 <<= 16
498
-	m02 = uint32(m[p+2])
499
-	x7 = h7 - y7
500
-	y7 *= scale
501
-
502
-	m33 += 256
503
-	m03 = uint32(m[p+3])
504
-	x6 = h6 - y6
505
-	y6 *= scale
506
-
507
-	m33 <<= 24
508
-	m3 += int64(m31)
509
-	m10 = uint32(m[p+4])
510
-	x1 = h1 - y1
511
-
512
-	m01 <<= 8
513
-	m3 += int64(m32)
514
-	m11 = uint32(m[p+5])
515
-	x0 = h0 - y0
516
-
517
-	m3 += int64(m33)
518
-	m0 += int64(m00)
519
-	m12 = uint32(m[p+6])
520
-	y5 -= alpha96
521
-
522
-	m02 <<= 16
523
-	m0 += int64(m01)
524
-	m13 = uint32(m[p+7])
525
-	y4 -= alpha96
526
-
527
-	m03 <<= 24
528
-	m0 += int64(m02)
529
-	d2 = m2
530
-	x1 += y7
531
-
532
-	m0 += int64(m03)
533
-	d3 = m3
534
-	x0 += y6
535
-
536
-	m11 <<= 8
537
-	m1 += int64(m10)
538
-	d0 = m0
539
-	x7 += y5
540
-
541
-	m12 <<= 16
542
-	m1 += int64(m11)
543
-	x6 += y4
544
-
545
-	m13 <<= 24
546
-	m1 += int64(m12)
547
-	y3 = h3 + alpha64
548
-
549
-	m1 += int64(m13)
550
-	d1 = m1
551
-	y2 = h2 + alpha64
552
-
553
-	x0 += x1
554
-
555
-	x6 += x7
556
-
557
-	y3 -= alpha64
558
-	r3low = r3low_stack
559
-
560
-	y2 -= alpha64
561
-	r0low = r0low_stack
562
-
563
-	x5 = h5 - y5
564
-	r3lowx0 = r3low * x0
565
-	r3high = r3high_stack
566
-
567
-	x4 = h4 - y4
568
-	r0lowx6 = r0low * x6
569
-	r0high = r0high_stack
570
-
571
-	x3 = h3 - y3
572
-	r3highx0 = r3high * x0
573
-	sr1low = sr1low_stack
574
-
575
-	x2 = h2 - y2
576
-	r0highx6 = r0high * x6
577
-	sr1high = sr1high_stack
578
-
579
-	x5 += y3
580
-	r0lowx0 = r0low * x0
581
-	r1low = r1low_stack
582
-
583
-	h6 = r3lowx0 + r0lowx6
584
-	sr1lowx6 = sr1low * x6
585
-	r1high = r1high_stack
586
-
587
-	x4 += y2
588
-	r0highx0 = r0high * x0
589
-	sr2low = sr2low_stack
590
-
591
-	h7 = r3highx0 + r0highx6
592
-	sr1highx6 = sr1high * x6
593
-	sr2high = sr2high_stack
594
-
595
-	x3 += y1
596
-	r1lowx0 = r1low * x0
597
-	r2low = r2low_stack
598
-
599
-	h0 = r0lowx0 + sr1lowx6
600
-	sr2lowx6 = sr2low * x6
601
-	r2high = r2high_stack
602
-
603
-	x2 += y0
604
-	r1highx0 = r1high * x0
605
-	sr3low = sr3low_stack
606
-
607
-	h1 = r0highx0 + sr1highx6
608
-	sr2highx6 = sr2high * x6
609
-	sr3high = sr3high_stack
610
-
611
-	x4 += x5
612
-	r2lowx0 = r2low * x0
613
-	z2 = math.Float64frombits(uint64(d2))
614
-
615
-	h2 = r1lowx0 + sr2lowx6
616
-	sr3lowx6 = sr3low * x6
617
-
618
-	x2 += x3
619
-	r2highx0 = r2high * x0
620
-	z3 = math.Float64frombits(uint64(d3))
621
-
622
-	h3 = r1highx0 + sr2highx6
623
-	sr3highx6 = sr3high * x6
624
-
625
-	r1highx4 = r1high * x4
626
-	z2 -= alpha64
627
-
628
-	h4 = r2lowx0 + sr3lowx6
629
-	r1lowx4 = r1low * x4
630
-
631
-	r0highx4 = r0high * x4
632
-	z3 -= alpha96
633
-
634
-	h5 = r2highx0 + sr3highx6
635
-	r0lowx4 = r0low * x4
636
-
637
-	h7 += r1highx4
638
-	sr3highx4 = sr3high * x4
639
-
640
-	h6 += r1lowx4
641
-	sr3lowx4 = sr3low * x4
642
-
643
-	h5 += r0highx4
644
-	sr2highx4 = sr2high * x4
645
-
646
-	h4 += r0lowx4
647
-	sr2lowx4 = sr2low * x4
648
-
649
-	h3 += sr3highx4
650
-	r0lowx2 = r0low * x2
651
-
652
-	h2 += sr3lowx4
653
-	r0highx2 = r0high * x2
654
-
655
-	h1 += sr2highx4
656
-	r1lowx2 = r1low * x2
657
-
658
-	h0 += sr2lowx4
659
-	r1highx2 = r1high * x2
660
-
661
-	h2 += r0lowx2
662
-	r2lowx2 = r2low * x2
663
-
664
-	h3 += r0highx2
665
-	r2highx2 = r2high * x2
666
-
667
-	h4 += r1lowx2
668
-	sr3lowx2 = sr3low * x2
669
-
670
-	h5 += r1highx2
671
-	sr3highx2 = sr3high * x2
672
-
673
-	p += 16
674
-	l -= 16
675
-	h6 += r2lowx2
676
-
677
-	h7 += r2highx2
678
-
679
-	z1 = math.Float64frombits(uint64(d1))
680
-	h0 += sr3lowx2
681
-
682
-	z0 = math.Float64frombits(uint64(d0))
683
-	h1 += sr3highx2
684
-
685
-	z1 -= alpha32
686
-
687
-	z0 -= alpha0
688
-
689
-	h5 += z3
690
-
691
-	h3 += z2
692
-
693
-	h1 += z1
694
-
695
-	h0 += z0
696
-
697
-	if l >= 16 {
698
-		goto multiplyaddatleast16bytes
699
-	}
700
-
701
-multiplyaddatmost15bytes:
702
-
703
-	y7 = h7 + alpha130
704
-
705
-	y6 = h6 + alpha130
706
-
707
-	y1 = h1 + alpha32
708
-
709
-	y0 = h0 + alpha32
710
-
711
-	y7 -= alpha130
712
-
713
-	y6 -= alpha130
714
-
715
-	y1 -= alpha32
716
-
717
-	y0 -= alpha32
718
-
719
-	y5 = h5 + alpha96
720
-
721
-	y4 = h4 + alpha96
722
-
723
-	x7 = h7 - y7
724
-	y7 *= scale
725
-
726
-	x6 = h6 - y6
727
-	y6 *= scale
728
-
729
-	x1 = h1 - y1
730
-
731
-	x0 = h0 - y0
732
-
733
-	y5 -= alpha96
734
-
735
-	y4 -= alpha96
736
-
737
-	x1 += y7
738
-
739
-	x0 += y6
740
-
741
-	x7 += y5
742
-
743
-	x6 += y4
744
-
745
-	y3 = h3 + alpha64
746
-
747
-	y2 = h2 + alpha64
748
-
749
-	x0 += x1
750
-
751
-	x6 += x7
752
-
753
-	y3 -= alpha64
754
-	r3low = r3low_stack
755
-
756
-	y2 -= alpha64
757
-	r0low = r0low_stack
758
-
759
-	x5 = h5 - y5
760
-	r3lowx0 = r3low * x0
761
-	r3high = r3high_stack
762
-
763
-	x4 = h4 - y4
764
-	r0lowx6 = r0low * x6
765
-	r0high = r0high_stack
766
-
767
-	x3 = h3 - y3
768
-	r3highx0 = r3high * x0
769
-	sr1low = sr1low_stack
770
-
771
-	x2 = h2 - y2
772
-	r0highx6 = r0high * x6
773
-	sr1high = sr1high_stack
774
-
775
-	x5 += y3
776
-	r0lowx0 = r0low * x0
777
-	r1low = r1low_stack
778
-
779
-	h6 = r3lowx0 + r0lowx6
780
-	sr1lowx6 = sr1low * x6
781
-	r1high = r1high_stack
782
-
783
-	x4 += y2
784
-	r0highx0 = r0high * x0
785
-	sr2low = sr2low_stack
786
-
787
-	h7 = r3highx0 + r0highx6
788
-	sr1highx6 = sr1high * x6
789
-	sr2high = sr2high_stack
790
-
791
-	x3 += y1
792
-	r1lowx0 = r1low * x0
793
-	r2low = r2low_stack
794
-
795
-	h0 = r0lowx0 + sr1lowx6
796
-	sr2lowx6 = sr2low * x6
797
-	r2high = r2high_stack
798
-
799
-	x2 += y0
800
-	r1highx0 = r1high * x0
801
-	sr3low = sr3low_stack
802
-
803
-	h1 = r0highx0 + sr1highx6
804
-	sr2highx6 = sr2high * x6
805
-	sr3high = sr3high_stack
806
-
807
-	x4 += x5
808
-	r2lowx0 = r2low * x0
809
-
810
-	h2 = r1lowx0 + sr2lowx6
811
-	sr3lowx6 = sr3low * x6
812
-
813
-	x2 += x3
814
-	r2highx0 = r2high * x0
815
-
816
-	h3 = r1highx0 + sr2highx6
817
-	sr3highx6 = sr3high * x6
818
-
819
-	r1highx4 = r1high * x4
820
-
821
-	h4 = r2lowx0 + sr3lowx6
822
-	r1lowx4 = r1low * x4
823
-
824
-	r0highx4 = r0high * x4
825
-
826
-	h5 = r2highx0 + sr3highx6
827
-	r0lowx4 = r0low * x4
828
-
829
-	h7 += r1highx4
830
-	sr3highx4 = sr3high * x4
831
-
832
-	h6 += r1lowx4
833
-	sr3lowx4 = sr3low * x4
834
-
835
-	h5 += r0highx4
836
-	sr2highx4 = sr2high * x4
837
-
838
-	h4 += r0lowx4
839
-	sr2lowx4 = sr2low * x4
840
-
841
-	h3 += sr3highx4
842
-	r0lowx2 = r0low * x2
843
-
844
-	h2 += sr3lowx4
845
-	r0highx2 = r0high * x2
846
-
847
-	h1 += sr2highx4
848
-	r1lowx2 = r1low * x2
849
-
850
-	h0 += sr2lowx4
851
-	r1highx2 = r1high * x2
852
-
853
-	h2 += r0lowx2
854
-	r2lowx2 = r2low * x2
855
-
856
-	h3 += r0highx2
857
-	r2highx2 = r2high * x2
858
-
859
-	h4 += r1lowx2
860
-	sr3lowx2 = sr3low * x2
861
-
862
-	h5 += r1highx2
863
-	sr3highx2 = sr3high * x2
864
-
865
-	h6 += r2lowx2
866
-
867
-	h7 += r2highx2
868
-
869
-	h0 += sr3lowx2
870
-
871
-	h1 += sr3highx2
872
-
873
-addatmost15bytes:
874
-
875
-	if l == 0 {
876
-		goto nomorebytes
57
+	if len(msg) > 0 {
58
+		var block [TagSize]byte
59
+		off := copy(block[:], msg)
60
+		block[off] = 0x01
61
+
62
+		// h += msg
63
+		h0 += binary.LittleEndian.Uint32(block[0:]) & 0x3ffffff
64
+		h1 += (binary.LittleEndian.Uint32(block[3:]) >> 2) & 0x3ffffff
65
+		h2 += (binary.LittleEndian.Uint32(block[6:]) >> 4) & 0x3ffffff
66
+		h3 += (binary.LittleEndian.Uint32(block[9:]) >> 6) & 0x3ffffff
67
+		h4 += (binary.LittleEndian.Uint32(block[12:]) >> 8)
68
+
69
+		// h *= r
70
+		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
71
+		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
72
+		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
73
+		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
74
+		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
75
+
76
+		// h %= p
77
+		h0 = uint32(d0) & 0x3ffffff
78
+		h1 = uint32(d1) & 0x3ffffff
79
+		h2 = uint32(d2) & 0x3ffffff
80
+		h3 = uint32(d3) & 0x3ffffff
81
+		h4 = uint32(d4) & 0x3ffffff
82
+
83
+		h0 += uint32(d4>>26) * 5
84
+		h1 += h0 >> 26
85
+		h0 = h0 & 0x3ffffff
877 86
 	}
878 87
 
879
-	lbelow2 = l - 2
880
-
881
-	lbelow3 = l - 3
882
-
883
-	lbelow2 >>= 31
884
-	lbelow4 = l - 4
885
-
886
-	m00 = uint32(m[p+0])
887
-	lbelow3 >>= 31
888
-	p += lbelow2
889
-
890
-	m01 = uint32(m[p+1])
891
-	lbelow4 >>= 31
892
-	p += lbelow3
893
-
894
-	m02 = uint32(m[p+2])
895
-	p += lbelow4
896
-	m0 = 2151
897
-
898
-	m03 = uint32(m[p+3])
899
-	m0 <<= 51
900
-	m1 = 2215
901
-
902
-	m0 += int64(m00)
903
-	m01 &^= uint32(lbelow2)
904
-
905
-	m02 &^= uint32(lbelow3)
906
-	m01 -= uint32(lbelow2)
907
-
908
-	m01 <<= 8
909
-	m03 &^= uint32(lbelow4)
910
-
911
-	m0 += int64(m01)
912
-	lbelow2 -= lbelow3
913
-
914
-	m02 += uint32(lbelow2)
915
-	lbelow3 -= lbelow4
916
-
917
-	m02 <<= 16
918
-	m03 += uint32(lbelow3)
919
-
920
-	m03 <<= 24
921
-	m0 += int64(m02)
922
-
923
-	m0 += int64(m03)
924
-	lbelow5 = l - 5
925
-
926
-	lbelow6 = l - 6
927
-	lbelow7 = l - 7
928
-
929
-	lbelow5 >>= 31
930
-	lbelow8 = l - 8
931
-
932
-	lbelow6 >>= 31
933
-	p += lbelow5
934
-
935
-	m10 = uint32(m[p+4])
936
-	lbelow7 >>= 31
937
-	p += lbelow6
938
-
939
-	m11 = uint32(m[p+5])
940
-	lbelow8 >>= 31
941
-	p += lbelow7
942
-
943
-	m12 = uint32(m[p+6])
944
-	m1 <<= 51
945
-	p += lbelow8
946
-
947
-	m13 = uint32(m[p+7])
948
-	m10 &^= uint32(lbelow5)
949
-	lbelow4 -= lbelow5
950
-
951
-	m10 += uint32(lbelow4)
952
-	lbelow5 -= lbelow6
953
-
954
-	m11 &^= uint32(lbelow6)
955
-	m11 += uint32(lbelow5)
956
-
957
-	m11 <<= 8
958
-	m1 += int64(m10)
959
-
960
-	m1 += int64(m11)
961
-	m12 &^= uint32(lbelow7)
962
-
963
-	lbelow6 -= lbelow7
964
-	m13 &^= uint32(lbelow8)
965
-
966
-	m12 += uint32(lbelow6)
967
-	lbelow7 -= lbelow8
968
-
969
-	m12 <<= 16
970
-	m13 += uint32(lbelow7)
971
-
972
-	m13 <<= 24
973
-	m1 += int64(m12)
974
-
975
-	m1 += int64(m13)
976
-	m2 = 2279
977
-
978
-	lbelow9 = l - 9
979
-	m3 = 2343
980
-
981
-	lbelow10 = l - 10
982
-	lbelow11 = l - 11
983
-
984
-	lbelow9 >>= 31
985
-	lbelow12 = l - 12
986
-
987
-	lbelow10 >>= 31
988
-	p += lbelow9
989
-
990
-	m20 = uint32(m[p+8])
991
-	lbelow11 >>= 31
992
-	p += lbelow10
993
-
994
-	m21 = uint32(m[p+9])
995
-	lbelow12 >>= 31
996
-	p += lbelow11
997
-
998
-	m22 = uint32(m[p+10])
999
-	m2 <<= 51
1000
-	p += lbelow12
1001
-
1002
-	m23 = uint32(m[p+11])
1003
-	m20 &^= uint32(lbelow9)
1004
-	lbelow8 -= lbelow9
1005
-
1006
-	m20 += uint32(lbelow8)
1007
-	lbelow9 -= lbelow10
1008
-
1009
-	m21 &^= uint32(lbelow10)
1010
-	m21 += uint32(lbelow9)
1011
-
1012
-	m21 <<= 8
1013
-	m2 += int64(m20)
1014
-
1015
-	m2 += int64(m21)
1016
-	m22 &^= uint32(lbelow11)
1017
-
1018
-	lbelow10 -= lbelow11
1019
-	m23 &^= uint32(lbelow12)
1020
-
1021
-	m22 += uint32(lbelow10)
1022
-	lbelow11 -= lbelow12
1023
-
1024
-	m22 <<= 16
1025
-	m23 += uint32(lbelow11)
1026
-
1027
-	m23 <<= 24
1028
-	m2 += int64(m22)
1029
-
1030
-	m3 <<= 51
1031
-	lbelow13 = l - 13
1032
-
1033
-	lbelow13 >>= 31
1034
-	lbelow14 = l - 14
1035
-
1036
-	lbelow14 >>= 31
1037
-	p += lbelow13
1038
-	lbelow15 = l - 15
1039
-
1040
-	m30 = uint32(m[p+12])
1041
-	lbelow15 >>= 31
1042
-	p += lbelow14
1043
-
1044
-	m31 = uint32(m[p+13])
1045
-	p += lbelow15
1046
-	m2 += int64(m23)
1047
-
1048
-	m32 = uint32(m[p+14])
1049
-	m30 &^= uint32(lbelow13)
1050
-	lbelow12 -= lbelow13
1051
-
1052
-	m30 += uint32(lbelow12)
1053
-	lbelow13 -= lbelow14
1054
-
1055
-	m3 += int64(m30)
1056
-	m31 &^= uint32(lbelow14)
1057
-
1058
-	m31 += uint32(lbelow13)
1059
-	m32 &^= uint32(lbelow15)
1060
-
1061
-	m31 <<= 8
1062
-	lbelow14 -= lbelow15
1063
-
1064
-	m3 += int64(m31)
1065
-	m32 += uint32(lbelow14)
1066
-	d0 = m0
1067
-
1068
-	m32 <<= 16
1069
-	m33 = uint64(lbelow15 + 1)
1070
-	d1 = m1
1071
-
1072
-	m33 <<= 24
1073
-	m3 += int64(m32)
1074
-	d2 = m2
1075
-
1076
-	m3 += int64(m33)
1077
-	d3 = m3
1078
-
1079
-	z3 = math.Float64frombits(uint64(d3))
1080
-
1081
-	z2 = math.Float64frombits(uint64(d2))
1082
-
1083
-	z1 = math.Float64frombits(uint64(d1))
1084
-
1085
-	z0 = math.Float64frombits(uint64(d0))
1086
-
1087
-	z3 -= alpha96
1088
-
1089
-	z2 -= alpha64
1090
-
1091
-	z1 -= alpha32
1092
-
1093
-	z0 -= alpha0
1094
-
1095
-	h5 += z3
1096
-
1097
-	h3 += z2
1098
-
1099
-	h1 += z1
1100
-
1101
-	h0 += z0
1102
-
1103
-	y7 = h7 + alpha130
1104
-
1105
-	y6 = h6 + alpha130
1106
-
1107
-	y1 = h1 + alpha32
1108
-
1109
-	y0 = h0 + alpha32
1110
-
1111
-	y7 -= alpha130
1112
-
1113
-	y6 -= alpha130
1114
-
1115
-	y1 -= alpha32
1116
-
1117
-	y0 -= alpha32
1118
-
1119
-	y5 = h5 + alpha96
1120
-
1121
-	y4 = h4 + alpha96
1122
-
1123
-	x7 = h7 - y7
1124
-	y7 *= scale
1125
-
1126
-	x6 = h6 - y6
1127
-	y6 *= scale
1128
-
1129
-	x1 = h1 - y1
1130
-
1131
-	x0 = h0 - y0
1132
-
1133
-	y5 -= alpha96
1134
-
1135
-	y4 -= alpha96
1136
-
1137
-	x1 += y7
1138
-
1139
-	x0 += y6
1140
-
1141
-	x7 += y5
1142
-
1143
-	x6 += y4
1144
-
1145
-	y3 = h3 + alpha64
1146
-
1147
-	y2 = h2 + alpha64
1148
-
1149
-	x0 += x1
1150
-
1151
-	x6 += x7
1152
-
1153
-	y3 -= alpha64
1154
-	r3low = r3low_stack
1155
-
1156
-	y2 -= alpha64
1157
-	r0low = r0low_stack
1158
-
1159
-	x5 = h5 - y5
1160
-	r3lowx0 = r3low * x0
1161
-	r3high = r3high_stack
1162
-
1163
-	x4 = h4 - y4
1164
-	r0lowx6 = r0low * x6
1165
-	r0high = r0high_stack
1166
-
1167
-	x3 = h3 - y3
1168
-	r3highx0 = r3high * x0
1169
-	sr1low = sr1low_stack
1170
-
1171
-	x2 = h2 - y2
1172
-	r0highx6 = r0high * x6
1173
-	sr1high = sr1high_stack
1174
-
1175
-	x5 += y3
1176
-	r0lowx0 = r0low * x0
1177
-	r1low = r1low_stack
1178
-
1179
-	h6 = r3lowx0 + r0lowx6
1180
-	sr1lowx6 = sr1low * x6
1181
-	r1high = r1high_stack
1182
-
1183
-	x4 += y2
1184
-	r0highx0 = r0high * x0
1185
-	sr2low = sr2low_stack
1186
-
1187
-	h7 = r3highx0 + r0highx6
1188
-	sr1highx6 = sr1high * x6
1189
-	sr2high = sr2high_stack
1190
-
1191
-	x3 += y1
1192
-	r1lowx0 = r1low * x0
1193
-	r2low = r2low_stack
1194
-
1195
-	h0 = r0lowx0 + sr1lowx6
1196
-	sr2lowx6 = sr2low * x6
1197
-	r2high = r2high_stack
1198
-
1199
-	x2 += y0
1200
-	r1highx0 = r1high * x0
1201
-	sr3low = sr3low_stack
1202
-
1203
-	h1 = r0highx0 + sr1highx6
1204
-	sr2highx6 = sr2high * x6
1205
-	sr3high = sr3high_stack
1206
-
1207
-	x4 += x5
1208
-	r2lowx0 = r2low * x0
1209
-
1210
-	h2 = r1lowx0 + sr2lowx6
1211
-	sr3lowx6 = sr3low * x6
1212
-
1213
-	x2 += x3
1214
-	r2highx0 = r2high * x0
1215
-
1216
-	h3 = r1highx0 + sr2highx6
1217
-	sr3highx6 = sr3high * x6
1218
-
1219
-	r1highx4 = r1high * x4
1220
-
1221
-	h4 = r2lowx0 + sr3lowx6
1222
-	r1lowx4 = r1low * x4
1223
-
1224
-	r0highx4 = r0high * x4
1225
-
1226
-	h5 = r2highx0 + sr3highx6
1227
-	r0lowx4 = r0low * x4
1228
-
1229
-	h7 += r1highx4
1230
-	sr3highx4 = sr3high * x4
1231
-
1232
-	h6 += r1lowx4
1233
-	sr3lowx4 = sr3low * x4
1234
-
1235
-	h5 += r0highx4
1236
-	sr2highx4 = sr2high * x4
1237
-
1238
-	h4 += r0lowx4
1239
-	sr2lowx4 = sr2low * x4
1240
-
1241
-	h3 += sr3highx4
1242
-	r0lowx2 = r0low * x2
1243
-
1244
-	h2 += sr3lowx4
1245
-	r0highx2 = r0high * x2
1246
-
1247
-	h1 += sr2highx4
1248
-	r1lowx2 = r1low * x2
1249
-
1250
-	h0 += sr2lowx4
1251
-	r1highx2 = r1high * x2
1252
-
1253
-	h2 += r0lowx2
1254
-	r2lowx2 = r2low * x2
1255
-
1256
-	h3 += r0highx2
1257
-	r2highx2 = r2high * x2
1258
-
1259
-	h4 += r1lowx2
1260
-	sr3lowx2 = sr3low * x2
1261
-
1262
-	h5 += r1highx2
1263
-	sr3highx2 = sr3high * x2
1264
-
1265
-	h6 += r2lowx2
1266
-
1267
-	h7 += r2highx2
1268
-
1269
-	h0 += sr3lowx2
1270
-
1271
-	h1 += sr3highx2
1272
-
1273
-nomorebytes:
1274
-
1275
-	y7 = h7 + alpha130
1276
-
1277
-	y0 = h0 + alpha32
1278
-
1279
-	y1 = h1 + alpha32
1280
-
1281
-	y2 = h2 + alpha64
1282
-
1283
-	y7 -= alpha130
1284
-
1285
-	y3 = h3 + alpha64
1286
-
1287
-	y4 = h4 + alpha96
1288
-
1289
-	y5 = h5 + alpha96
1290
-
1291
-	x7 = h7 - y7
1292
-	y7 *= scale
1293
-
1294
-	y0 -= alpha32
1295
-
1296
-	y1 -= alpha32
1297
-
1298
-	y2 -= alpha64
1299
-
1300
-	h6 += x7
1301
-
1302
-	y3 -= alpha64
1303
-
1304
-	y4 -= alpha96
1305
-
1306
-	y5 -= alpha96
1307
-
1308
-	y6 = h6 + alpha130
1309
-
1310
-	x0 = h0 - y0
1311
-
1312
-	x1 = h1 - y1
1313
-
1314
-	x2 = h2 - y2
1315
-
1316
-	y6 -= alpha130
1317
-
1318
-	x0 += y7
1319
-
1320
-	x3 = h3 - y3
1321
-
1322
-	x4 = h4 - y4
1323
-
1324
-	x5 = h5 - y5
1325
-
1326
-	x6 = h6 - y6
1327
-
1328
-	y6 *= scale
1329
-
1330
-	x2 += y0
1331
-
1332
-	x3 += y1
1333
-
1334
-	x4 += y2
1335
-
1336
-	x0 += y6
1337
-
1338
-	x5 += y3
1339
-
1340
-	x6 += y4
1341
-
1342
-	x2 += x3
1343
-
1344
-	x0 += x1
1345
-
1346
-	x4 += x5
1347
-
1348
-	x6 += y5
1349
-
1350
-	x2 += offset1
1351
-	d1 = int64(math.Float64bits(x2))
1352
-
1353
-	x0 += offset0
1354
-	d0 = int64(math.Float64bits(x0))
1355
-
1356
-	x4 += offset2
1357
-	d2 = int64(math.Float64bits(x4))
1358
-
1359
-	x6 += offset3
1360
-	d3 = int64(math.Float64bits(x6))
1361
-
1362
-	f0 = uint64(d0)
1363
-
1364
-	f1 = uint64(d1)
1365
-	bits32 = math.MaxUint64
1366
-
1367
-	f2 = uint64(d2)
1368
-	bits32 >>= 32
1369
-
1370
-	f3 = uint64(d3)
1371
-	f = f0 >> 32
1372
-
1373
-	f0 &= bits32
1374
-	f &= 255
1375
-
1376
-	f1 += f
1377
-	g0 = f0 + 5
1378
-
1379
-	g = g0 >> 32
1380
-	g0 &= bits32
1381
-
1382
-	f = f1 >> 32
1383
-	f1 &= bits32
1384
-
1385
-	f &= 255
1386
-	g1 = f1 + g
1387
-
1388
-	g = g1 >> 32
1389
-	f2 += f
1390
-
1391
-	f = f2 >> 32
1392
-	g1 &= bits32
1393
-
1394
-	f2 &= bits32
1395
-	f &= 255
1396
-
1397
-	f3 += f
1398
-	g2 = f2 + g
1399
-
1400
-	g = g2 >> 32
1401
-	g2 &= bits32
1402
-
1403
-	f4 = f3 >> 32
1404
-	f3 &= bits32
1405
-
1406
-	f4 &= 255
1407
-	g3 = f3 + g
1408
-
1409
-	g = g3 >> 32
1410
-	g3 &= bits32
1411
-
1412
-	g4 = f4 + g
1413
-
1414
-	g4 = g4 - 4
1415
-	s00 = uint32(s[0])
1416
-
1417
-	f = uint64(int64(g4) >> 63)
1418
-	s01 = uint32(s[1])
1419
-
1420
-	f0 &= f
1421
-	g0 &^= f
1422
-	s02 = uint32(s[2])
1423
-
1424
-	f1 &= f
1425
-	f0 |= g0
1426
-	s03 = uint32(s[3])
1427
-
1428
-	g1 &^= f
1429
-	f2 &= f
1430
-	s10 = uint32(s[4])
1431
-
1432
-	f3 &= f
1433
-	g2 &^= f
1434
-	s11 = uint32(s[5])
1435
-
1436
-	g3 &^= f
1437
-	f1 |= g1
1438
-	s12 = uint32(s[6])
1439
-
1440
-	f2 |= g2
1441
-	f3 |= g3
1442
-	s13 = uint32(s[7])
1443
-
1444
-	s01 <<= 8
1445
-	f0 += uint64(s00)
1446
-	s20 = uint32(s[8])
1447
-
1448
-	s02 <<= 16
1449
-	f0 += uint64(s01)
1450
-	s21 = uint32(s[9])
1451
-
1452
-	s03 <<= 24
1453
-	f0 += uint64(s02)
1454
-	s22 = uint32(s[10])
1455
-
1456
-	s11 <<= 8
1457
-	f1 += uint64(s10)
1458
-	s23 = uint32(s[11])
1459
-
1460
-	s12 <<= 16
1461
-	f1 += uint64(s11)
1462
-	s30 = uint32(s[12])
1463
-
1464
-	s13 <<= 24
1465
-	f1 += uint64(s12)
1466
-	s31 = uint32(s[13])
1467
-
1468
-	f0 += uint64(s03)
1469
-	f1 += uint64(s13)
1470
-	s32 = uint32(s[14])
1471
-
1472
-	s21 <<= 8
1473
-	f2 += uint64(s20)
1474
-	s33 = uint32(s[15])
1475
-
1476
-	s22 <<= 16
1477
-	f2 += uint64(s21)
1478
-
1479
-	s23 <<= 24
1480
-	f2 += uint64(s22)
1481
-
1482
-	s31 <<= 8
1483
-	f3 += uint64(s30)
1484
-
1485
-	s32 <<= 16
1486
-	f3 += uint64(s31)
1487
-
1488
-	s33 <<= 24
1489
-	f3 += uint64(s32)
1490
-
1491
-	f2 += uint64(s23)
1492
-	f3 += uint64(s33)
1493
-
1494
-	out[0] = byte(f0)
1495
-	f0 >>= 8
1496
-	out[1] = byte(f0)
1497
-	f0 >>= 8
1498
-	out[2] = byte(f0)
1499
-	f0 >>= 8
1500
-	out[3] = byte(f0)
1501
-	f0 >>= 8
1502
-	f1 += f0
1503
-
1504
-	out[4] = byte(f1)
1505
-	f1 >>= 8
1506
-	out[5] = byte(f1)
1507
-	f1 >>= 8
1508
-	out[6] = byte(f1)
1509
-	f1 >>= 8
1510
-	out[7] = byte(f1)
1511
-	f1 >>= 8
1512
-	f2 += f1
1513
-
1514
-	out[8] = byte(f2)
1515
-	f2 >>= 8
1516
-	out[9] = byte(f2)
1517
-	f2 >>= 8
1518
-	out[10] = byte(f2)
1519
-	f2 >>= 8
1520
-	out[11] = byte(f2)
1521
-	f2 >>= 8
1522
-	f3 += f2
1523
-
1524
-	out[12] = byte(f3)
1525
-	f3 >>= 8
1526
-	out[13] = byte(f3)
1527
-	f3 >>= 8
1528
-	out[14] = byte(f3)
1529
-	f3 >>= 8
1530
-	out[15] = byte(f3)
88
+	// h %= p reduction
89
+	h2 += h1 >> 26
90
+	h1 &= 0x3ffffff
91
+	h3 += h2 >> 26
92
+	h2 &= 0x3ffffff
93
+	h4 += h3 >> 26
94
+	h3 &= 0x3ffffff
95
+	h0 += 5 * (h4 >> 26)
96
+	h4 &= 0x3ffffff
97
+	h1 += h0 >> 26
98
+	h0 &= 0x3ffffff
99
+
100
+	// h - p
101
+	t0 := h0 + 5
102
+	t1 := h1 + (t0 >> 26)
103
+	t2 := h2 + (t1 >> 26)
104
+	t3 := h3 + (t2 >> 26)
105
+	t4 := h4 + (t3 >> 26) - (1 << 26)
106
+	t0 &= 0x3ffffff
107
+	t1 &= 0x3ffffff
108
+	t2 &= 0x3ffffff
109
+	t3 &= 0x3ffffff
110
+
111
+	// select h if h < p else h - p
112
+	t_mask := (t4 >> 31) - 1
113
+	h_mask := ^t_mask
114
+	h0 = (h0 & h_mask) | (t0 & t_mask)
115
+	h1 = (h1 & h_mask) | (t1 & t_mask)
116
+	h2 = (h2 & h_mask) | (t2 & t_mask)
117
+	h3 = (h3 & h_mask) | (t3 & t_mask)
118
+	h4 = (h4 & h_mask) | (t4 & t_mask)
119
+
120
+	// h %= 2^128
121
+	h0 |= h1 << 26
122
+	h1 = ((h1 >> 6) | (h2 << 20))
123
+	h2 = ((h2 >> 12) | (h3 << 14))
124
+	h3 = ((h3 >> 18) | (h4 << 8))
125
+
126
+	// s: the s part of the key
127
+	// tag = (h + s) % (2^128)
128
+	t := uint64(h0) + uint64(binary.LittleEndian.Uint32(key[16:]))
129
+	h0 = uint32(t)
130
+	t = uint64(h1) + uint64(binary.LittleEndian.Uint32(key[20:])) + (t >> 32)
131
+	h1 = uint32(t)
132
+	t = uint64(h2) + uint64(binary.LittleEndian.Uint32(key[24:])) + (t >> 32)
133
+	h2 = uint32(t)
134
+	t = uint64(h3) + uint64(binary.LittleEndian.Uint32(key[28:])) + (t >> 32)
135
+	h3 = uint32(t)
136
+
137
+	binary.LittleEndian.PutUint32(out[0:], h0)
138
+	binary.LittleEndian.PutUint32(out[4:], h1)
139
+	binary.LittleEndian.PutUint32(out[8:], h2)
140
+	binary.LittleEndian.PutUint32(out[12:], h3)
1531 141
 }
... ...
@@ -5,29 +5,23 @@
5 5
 // +build amd64,!appengine,!gccgo
6 6
 
7 7
 // This code was translated into a form compatible with 6a from the public
8
-// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
8
+// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
9 9
 
10 10
 // func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
11
-TEXT ·salsa2020XORKeyStream(SB),0,$512-40
11
+// This needs up to 64 bytes at 360(SP); hence the non-obvious frame size.
12
+TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
12 13
 	MOVQ out+0(FP),DI
13 14
 	MOVQ in+8(FP),SI
14 15
 	MOVQ n+16(FP),DX
15 16
 	MOVQ nonce+24(FP),CX
16 17
 	MOVQ key+32(FP),R8
17 18
 
18
-	MOVQ SP,R11
19
-	MOVQ $31,R9
20
-	NOTQ R9
21
-	ANDQ R9,SP
22
-	ADDQ $32,SP
19
+	MOVQ SP,R12
20
+	MOVQ SP,R9
21
+	ADDQ $31, R9
22
+	ANDQ $~31, R9
23
+	MOVQ R9, SP
23 24
 
24
-	MOVQ R11,352(SP)
25
-	MOVQ R12,360(SP)
26
-	MOVQ R13,368(SP)
27
-	MOVQ R14,376(SP)
28
-	MOVQ R15,384(SP)
29
-	MOVQ BX,392(SP)
30
-	MOVQ BP,400(SP)
31 25
 	MOVQ DX,R9
32 26
 	MOVQ CX,DX
33 27
 	MOVQ R8,R10
... ...
@@ -133,7 +127,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
133 133
 	SHRQ $32,CX
134 134
 	MOVL DX,16(SP)
135 135
 	MOVL CX, 36 (SP)
136
-	MOVQ R9,408(SP)
136
+	MOVQ R9,352(SP)
137 137
 	MOVQ $20,DX
138 138
 	MOVOA 64(SP),X0
139 139
 	MOVOA 80(SP),X1
... ...
@@ -650,7 +644,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
650 650
 	MOVL CX,244(DI)
651 651
 	MOVL R8,248(DI)
652 652
 	MOVL R9,252(DI)
653
-	MOVQ 408(SP),R9
653
+	MOVQ 352(SP),R9
654 654
 	SUBQ $256,R9
655 655
 	ADDQ $256,SI
656 656
 	ADDQ $256,DI
... ...
@@ -662,13 +656,13 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
662 662
 	CMPQ R9,$64
663 663
 	JAE NOCOPY
664 664
 	MOVQ DI,DX
665
-	LEAQ 416(SP),DI
665
+	LEAQ 360(SP),DI
666 666
 	MOVQ R9,CX
667 667
 	REP; MOVSB
668
-	LEAQ 416(SP),DI
669
-	LEAQ 416(SP),SI
668
+	LEAQ 360(SP),DI
669
+	LEAQ 360(SP),SI
670 670
 	NOCOPY:
671
-	MOVQ R9,408(SP)
671
+	MOVQ R9,352(SP)
672 672
 	MOVOA 48(SP),X0
673 673
 	MOVOA 0(SP),X1
674 674
 	MOVOA 16(SP),X2
... ...
@@ -867,7 +861,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
867 867
 	MOVL R8,44(DI)
868 868
 	MOVL R9,28(DI)
869 869
 	MOVL AX,12(DI)
870
-	MOVQ 408(SP),R9
870
+	MOVQ 352(SP),R9
871 871
 	MOVL 16(SP),CX
872 872
 	MOVL  36 (SP),R8
873 873
 	ADDQ $1,CX
... ...
@@ -886,14 +880,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$512-40
886 886
 	REP; MOVSB
887 887
 	BYTESATLEAST64:
888 888
 	DONE:
889
-	MOVQ 352(SP),R11
890
-	MOVQ 360(SP),R12
891
-	MOVQ 368(SP),R13
892
-	MOVQ 376(SP),R14
893
-	MOVQ 384(SP),R15
894
-	MOVQ 392(SP),BX
895
-	MOVQ 400(SP),BP
896
-	MOVQ R11,SP
889
+	MOVQ R12,SP
897 890
 	RET
898 891
 	BYTESATLEAST65:
899 892
 	SUBQ $64,R9
900 893
new file mode 100644
... ...
@@ -0,0 +1,951 @@
0
+// Copyright 2011 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+package terminal
5
+
6
+import (
7
+	"bytes"
8
+	"io"
9
+	"sync"
10
+	"unicode/utf8"
11
+)
12
+
13
+// EscapeCodes contains escape sequences that can be written to the terminal in
14
+// order to achieve different styles of text.
15
+type EscapeCodes struct {
16
+	// Foreground colors
17
+	Black, Red, Green, Yellow, Blue, Magenta, Cyan, White []byte
18
+
19
+	// Reset all attributes
20
+	Reset []byte
21
+}
22
+
23
+var vt100EscapeCodes = EscapeCodes{
24
+	Black:   []byte{keyEscape, '[', '3', '0', 'm'},
25
+	Red:     []byte{keyEscape, '[', '3', '1', 'm'},
26
+	Green:   []byte{keyEscape, '[', '3', '2', 'm'},
27
+	Yellow:  []byte{keyEscape, '[', '3', '3', 'm'},
28
+	Blue:    []byte{keyEscape, '[', '3', '4', 'm'},
29
+	Magenta: []byte{keyEscape, '[', '3', '5', 'm'},
30
+	Cyan:    []byte{keyEscape, '[', '3', '6', 'm'},
31
+	White:   []byte{keyEscape, '[', '3', '7', 'm'},
32
+
33
+	Reset: []byte{keyEscape, '[', '0', 'm'},
34
+}
35
+
36
+// Terminal contains the state for running a VT100 terminal that is capable of
37
+// reading lines of input.
38
+type Terminal struct {
39
+	// AutoCompleteCallback, if non-null, is called for each keypress with
40
+	// the full input line and the current position of the cursor (in
41
+	// bytes, as an index into |line|). If it returns ok=false, the key
42
+	// press is processed normally. Otherwise it returns a replacement line
43
+	// and the new cursor position.
44
+	AutoCompleteCallback func(line string, pos int, key rune) (newLine string, newPos int, ok bool)
45
+
46
+	// Escape contains a pointer to the escape codes for this terminal.
47
+	// It's always a valid pointer, although the escape codes themselves
48
+	// may be empty if the terminal doesn't support them.
49
+	Escape *EscapeCodes
50
+
51
+	// lock protects the terminal and the state in this object from
52
+	// concurrent processing of a key press and a Write() call.
53
+	lock sync.Mutex
54
+
55
+	c      io.ReadWriter
56
+	prompt []rune
57
+
58
+	// line is the current line being entered.
59
+	line []rune
60
+	// pos is the logical position of the cursor in line
61
+	pos int
62
+	// echo is true if local echo is enabled
63
+	echo bool
64
+	// pasteActive is true iff there is a bracketed paste operation in
65
+	// progress.
66
+	pasteActive bool
67
+
68
+	// cursorX contains the current X value of the cursor where the left
69
+	// edge is 0. cursorY contains the row number where the first row of
70
+	// the current line is 0.
71
+	cursorX, cursorY int
72
+	// maxLine is the greatest value of cursorY so far.
73
+	maxLine int
74
+
75
+	termWidth, termHeight int
76
+
77
+	// outBuf contains the terminal data to be sent.
78
+	outBuf []byte
79
+	// remainder contains the remainder of any partial key sequences after
80
+	// a read. It aliases into inBuf.
81
+	remainder []byte
82
+	inBuf     [256]byte
83
+
84
+	// history contains previously entered commands so that they can be
85
+	// accessed with the up and down keys.
86
+	history stRingBuffer
87
+	// historyIndex stores the currently accessed history entry, where zero
88
+	// means the immediately previous entry.
89
+	historyIndex int
90
+	// When navigating up and down the history it's possible to return to
91
+	// the incomplete, initial line. That value is stored in
92
+	// historyPending.
93
+	historyPending string
94
+}
95
+
96
+// NewTerminal runs a VT100 terminal on the given ReadWriter. If the ReadWriter is
97
+// a local terminal, that terminal must first have been put into raw mode.
98
+// prompt is a string that is written at the start of each input line (i.e.
99
+// "> ").
100
+func NewTerminal(c io.ReadWriter, prompt string) *Terminal {
101
+	return &Terminal{
102
+		Escape:       &vt100EscapeCodes,
103
+		c:            c,
104
+		prompt:       []rune(prompt),
105
+		termWidth:    80,
106
+		termHeight:   24,
107
+		echo:         true,
108
+		historyIndex: -1,
109
+	}
110
+}
111
+
112
+const (
113
+	keyCtrlD     = 4
114
+	keyCtrlU     = 21
115
+	keyEnter     = '\r'
116
+	keyEscape    = 27
117
+	keyBackspace = 127
118
+	keyUnknown   = 0xd800 /* UTF-16 surrogate area */ + iota
119
+	keyUp
120
+	keyDown
121
+	keyLeft
122
+	keyRight
123
+	keyAltLeft
124
+	keyAltRight
125
+	keyHome
126
+	keyEnd
127
+	keyDeleteWord
128
+	keyDeleteLine
129
+	keyClearScreen
130
+	keyPasteStart
131
+	keyPasteEnd
132
+)
133
+
134
+var (
135
+	crlf       = []byte{'\r', '\n'}
136
+	pasteStart = []byte{keyEscape, '[', '2', '0', '0', '~'}
137
+	pasteEnd   = []byte{keyEscape, '[', '2', '0', '1', '~'}
138
+)
139
+
140
+// bytesToKey tries to parse a key sequence from b. If successful, it returns
141
+// the key and the remainder of the input. Otherwise it returns utf8.RuneError.
142
+func bytesToKey(b []byte, pasteActive bool) (rune, []byte) {
143
+	if len(b) == 0 {
144
+		return utf8.RuneError, nil
145
+	}
146
+
147
+	if !pasteActive {
148
+		switch b[0] {
149
+		case 1: // ^A
150
+			return keyHome, b[1:]
151
+		case 5: // ^E
152
+			return keyEnd, b[1:]
153
+		case 8: // ^H
154
+			return keyBackspace, b[1:]
155
+		case 11: // ^K
156
+			return keyDeleteLine, b[1:]
157
+		case 12: // ^L
158
+			return keyClearScreen, b[1:]
159
+		case 23: // ^W
160
+			return keyDeleteWord, b[1:]
161
+		}
162
+	}
163
+
164
+	if b[0] != keyEscape {
165
+		if !utf8.FullRune(b) {
166
+			return utf8.RuneError, b
167
+		}
168
+		r, l := utf8.DecodeRune(b)
169
+		return r, b[l:]
170
+	}
171
+
172
+	if !pasteActive && len(b) >= 3 && b[0] == keyEscape && b[1] == '[' {
173
+		switch b[2] {
174
+		case 'A':
175
+			return keyUp, b[3:]
176
+		case 'B':
177
+			return keyDown, b[3:]
178
+		case 'C':
179
+			return keyRight, b[3:]
180
+		case 'D':
181
+			return keyLeft, b[3:]
182
+		case 'H':
183
+			return keyHome, b[3:]
184
+		case 'F':
185
+			return keyEnd, b[3:]
186
+		}
187
+	}
188
+
189
+	if !pasteActive && len(b) >= 6 && b[0] == keyEscape && b[1] == '[' && b[2] == '1' && b[3] == ';' && b[4] == '3' {
190
+		switch b[5] {
191
+		case 'C':
192
+			return keyAltRight, b[6:]
193
+		case 'D':
194
+			return keyAltLeft, b[6:]
195
+		}
196
+	}
197
+
198
+	if !pasteActive && len(b) >= 6 && bytes.Equal(b[:6], pasteStart) {
199
+		return keyPasteStart, b[6:]
200
+	}
201
+
202
+	if pasteActive && len(b) >= 6 && bytes.Equal(b[:6], pasteEnd) {
203
+		return keyPasteEnd, b[6:]
204
+	}
205
+
206
+	// If we get here then we have a key that we don't recognise, or a
207
+	// partial sequence. It's not clear how one should find the end of a
208
+	// sequence without knowing them all, but it seems that [a-zA-Z~] only
209
+	// appears at the end of a sequence.
210
+	for i, c := range b[0:] {
211
+		if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '~' {
212
+			return keyUnknown, b[i+1:]
213
+		}
214
+	}
215
+
216
+	return utf8.RuneError, b
217
+}
218
+
219
+// queue appends data to the end of t.outBuf
220
+func (t *Terminal) queue(data []rune) {
221
+	t.outBuf = append(t.outBuf, []byte(string(data))...)
222
+}
223
+
224
+var eraseUnderCursor = []rune{' ', keyEscape, '[', 'D'}
225
+var space = []rune{' '}
226
+
227
+func isPrintable(key rune) bool {
228
+	isInSurrogateArea := key >= 0xd800 && key <= 0xdbff
229
+	return key >= 32 && !isInSurrogateArea
230
+}
231
+
232
+// moveCursorToPos appends data to t.outBuf which will move the cursor to the
233
+// given, logical position in the text.
234
+func (t *Terminal) moveCursorToPos(pos int) {
235
+	if !t.echo {
236
+		return
237
+	}
238
+
239
+	x := visualLength(t.prompt) + pos
240
+	y := x / t.termWidth
241
+	x = x % t.termWidth
242
+
243
+	up := 0
244
+	if y < t.cursorY {
245
+		up = t.cursorY - y
246
+	}
247
+
248
+	down := 0
249
+	if y > t.cursorY {
250
+		down = y - t.cursorY
251
+	}
252
+
253
+	left := 0
254
+	if x < t.cursorX {
255
+		left = t.cursorX - x
256
+	}
257
+
258
+	right := 0
259
+	if x > t.cursorX {
260
+		right = x - t.cursorX
261
+	}
262
+
263
+	t.cursorX = x
264
+	t.cursorY = y
265
+	t.move(up, down, left, right)
266
+}
267
+
268
+func (t *Terminal) move(up, down, left, right int) {
269
+	movement := make([]rune, 3*(up+down+left+right))
270
+	m := movement
271
+	for i := 0; i < up; i++ {
272
+		m[0] = keyEscape
273
+		m[1] = '['
274
+		m[2] = 'A'
275
+		m = m[3:]
276
+	}
277
+	for i := 0; i < down; i++ {
278
+		m[0] = keyEscape
279
+		m[1] = '['
280
+		m[2] = 'B'
281
+		m = m[3:]
282
+	}
283
+	for i := 0; i < left; i++ {
284
+		m[0] = keyEscape
285
+		m[1] = '['
286
+		m[2] = 'D'
287
+		m = m[3:]
288
+	}
289
+	for i := 0; i < right; i++ {
290
+		m[0] = keyEscape
291
+		m[1] = '['
292
+		m[2] = 'C'
293
+		m = m[3:]
294
+	}
295
+
296
+	t.queue(movement)
297
+}
298
+
299
+func (t *Terminal) clearLineToRight() {
300
+	op := []rune{keyEscape, '[', 'K'}
301
+	t.queue(op)
302
+}
303
+
304
+const maxLineLength = 4096
305
+
306
+func (t *Terminal) setLine(newLine []rune, newPos int) {
307
+	if t.echo {
308
+		t.moveCursorToPos(0)
309
+		t.writeLine(newLine)
310
+		for i := len(newLine); i < len(t.line); i++ {
311
+			t.writeLine(space)
312
+		}
313
+		t.moveCursorToPos(newPos)
314
+	}
315
+	t.line = newLine
316
+	t.pos = newPos
317
+}
318
+
319
+func (t *Terminal) advanceCursor(places int) {
320
+	t.cursorX += places
321
+	t.cursorY += t.cursorX / t.termWidth
322
+	if t.cursorY > t.maxLine {
323
+		t.maxLine = t.cursorY
324
+	}
325
+	t.cursorX = t.cursorX % t.termWidth
326
+
327
+	if places > 0 && t.cursorX == 0 {
328
+		// Normally terminals will advance the current position
329
+		// when writing a character. But that doesn't happen
330
+		// for the last character in a line. However, when
331
+		// writing a character (except a new line) that causes
332
+		// a line wrap, the position will be advanced two
333
+		// places.
334
+		//
335
+		// So, if we are stopping at the end of a line, we
336
+		// need to write a newline so that our cursor can be
337
+		// advanced to the next line.
338
+		t.outBuf = append(t.outBuf, '\r', '\n')
339
+	}
340
+}
341
+
342
+func (t *Terminal) eraseNPreviousChars(n int) {
343
+	if n == 0 {
344
+		return
345
+	}
346
+
347
+	if t.pos < n {
348
+		n = t.pos
349
+	}
350
+	t.pos -= n
351
+	t.moveCursorToPos(t.pos)
352
+
353
+	copy(t.line[t.pos:], t.line[n+t.pos:])
354
+	t.line = t.line[:len(t.line)-n]
355
+	if t.echo {
356
+		t.writeLine(t.line[t.pos:])
357
+		for i := 0; i < n; i++ {
358
+			t.queue(space)
359
+		}
360
+		t.advanceCursor(n)
361
+		t.moveCursorToPos(t.pos)
362
+	}
363
+}
364
+
365
+// countToLeftWord returns then number of characters from the cursor to the
366
+// start of the previous word.
367
+func (t *Terminal) countToLeftWord() int {
368
+	if t.pos == 0 {
369
+		return 0
370
+	}
371
+
372
+	pos := t.pos - 1
373
+	for pos > 0 {
374
+		if t.line[pos] != ' ' {
375
+			break
376
+		}
377
+		pos--
378
+	}
379
+	for pos > 0 {
380
+		if t.line[pos] == ' ' {
381
+			pos++
382
+			break
383
+		}
384
+		pos--
385
+	}
386
+
387
+	return t.pos - pos
388
+}
389
+
390
+// countToRightWord returns then number of characters from the cursor to the
391
+// start of the next word.
392
+func (t *Terminal) countToRightWord() int {
393
+	pos := t.pos
394
+	for pos < len(t.line) {
395
+		if t.line[pos] == ' ' {
396
+			break
397
+		}
398
+		pos++
399
+	}
400
+	for pos < len(t.line) {
401
+		if t.line[pos] != ' ' {
402
+			break
403
+		}
404
+		pos++
405
+	}
406
+	return pos - t.pos
407
+}
408
+
409
+// visualLength returns the number of visible glyphs in s.
410
+func visualLength(runes []rune) int {
411
+	inEscapeSeq := false
412
+	length := 0
413
+
414
+	for _, r := range runes {
415
+		switch {
416
+		case inEscapeSeq:
417
+			if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') {
418
+				inEscapeSeq = false
419
+			}
420
+		case r == '\x1b':
421
+			inEscapeSeq = true
422
+		default:
423
+			length++
424
+		}
425
+	}
426
+
427
+	return length
428
+}
429
+
430
+// handleKey processes the given key and, optionally, returns a line of text
431
+// that the user has entered.
432
+func (t *Terminal) handleKey(key rune) (line string, ok bool) {
433
+	if t.pasteActive && key != keyEnter {
434
+		t.addKeyToLine(key)
435
+		return
436
+	}
437
+
438
+	switch key {
439
+	case keyBackspace:
440
+		if t.pos == 0 {
441
+			return
442
+		}
443
+		t.eraseNPreviousChars(1)
444
+	case keyAltLeft:
445
+		// move left by a word.
446
+		t.pos -= t.countToLeftWord()
447
+		t.moveCursorToPos(t.pos)
448
+	case keyAltRight:
449
+		// move right by a word.
450
+		t.pos += t.countToRightWord()
451
+		t.moveCursorToPos(t.pos)
452
+	case keyLeft:
453
+		if t.pos == 0 {
454
+			return
455
+		}
456
+		t.pos--
457
+		t.moveCursorToPos(t.pos)
458
+	case keyRight:
459
+		if t.pos == len(t.line) {
460
+			return
461
+		}
462
+		t.pos++
463
+		t.moveCursorToPos(t.pos)
464
+	case keyHome:
465
+		if t.pos == 0 {
466
+			return
467
+		}
468
+		t.pos = 0
469
+		t.moveCursorToPos(t.pos)
470
+	case keyEnd:
471
+		if t.pos == len(t.line) {
472
+			return
473
+		}
474
+		t.pos = len(t.line)
475
+		t.moveCursorToPos(t.pos)
476
+	case keyUp:
477
+		entry, ok := t.history.NthPreviousEntry(t.historyIndex + 1)
478
+		if !ok {
479
+			return "", false
480
+		}
481
+		if t.historyIndex == -1 {
482
+			t.historyPending = string(t.line)
483
+		}
484
+		t.historyIndex++
485
+		runes := []rune(entry)
486
+		t.setLine(runes, len(runes))
487
+	case keyDown:
488
+		switch t.historyIndex {
489
+		case -1:
490
+			return
491
+		case 0:
492
+			runes := []rune(t.historyPending)
493
+			t.setLine(runes, len(runes))
494
+			t.historyIndex--
495
+		default:
496
+			entry, ok := t.history.NthPreviousEntry(t.historyIndex - 1)
497
+			if ok {
498
+				t.historyIndex--
499
+				runes := []rune(entry)
500
+				t.setLine(runes, len(runes))
501
+			}
502
+		}
503
+	case keyEnter:
504
+		t.moveCursorToPos(len(t.line))
505
+		t.queue([]rune("\r\n"))
506
+		line = string(t.line)
507
+		ok = true
508
+		t.line = t.line[:0]
509
+		t.pos = 0
510
+		t.cursorX = 0
511
+		t.cursorY = 0
512
+		t.maxLine = 0
513
+	case keyDeleteWord:
514
+		// Delete zero or more spaces and then one or more characters.
515
+		t.eraseNPreviousChars(t.countToLeftWord())
516
+	case keyDeleteLine:
517
+		// Delete everything from the current cursor position to the
518
+		// end of line.
519
+		for i := t.pos; i < len(t.line); i++ {
520
+			t.queue(space)
521
+			t.advanceCursor(1)
522
+		}
523
+		t.line = t.line[:t.pos]
524
+		t.moveCursorToPos(t.pos)
525
+	case keyCtrlD:
526
+		// Erase the character under the current position.
527
+		// The EOF case when the line is empty is handled in
528
+		// readLine().
529
+		if t.pos < len(t.line) {
530
+			t.pos++
531
+			t.eraseNPreviousChars(1)
532
+		}
533
+	case keyCtrlU:
534
+		t.eraseNPreviousChars(t.pos)
535
+	case keyClearScreen:
536
+		// Erases the screen and moves the cursor to the home position.
537
+		t.queue([]rune("\x1b[2J\x1b[H"))
538
+		t.queue(t.prompt)
539
+		t.cursorX, t.cursorY = 0, 0
540
+		t.advanceCursor(visualLength(t.prompt))
541
+		t.setLine(t.line, t.pos)
542
+	default:
543
+		if t.AutoCompleteCallback != nil {
544
+			prefix := string(t.line[:t.pos])
545
+			suffix := string(t.line[t.pos:])
546
+
547
+			t.lock.Unlock()
548
+			newLine, newPos, completeOk := t.AutoCompleteCallback(prefix+suffix, len(prefix), key)
549
+			t.lock.Lock()
550
+
551
+			if completeOk {
552
+				t.setLine([]rune(newLine), utf8.RuneCount([]byte(newLine)[:newPos]))
553
+				return
554
+			}
555
+		}
556
+		if !isPrintable(key) {
557
+			return
558
+		}
559
+		if len(t.line) == maxLineLength {
560
+			return
561
+		}
562
+		t.addKeyToLine(key)
563
+	}
564
+	return
565
+}
566
+
567
+// addKeyToLine inserts the given key at the current position in the current
568
+// line.
569
+func (t *Terminal) addKeyToLine(key rune) {
570
+	if len(t.line) == cap(t.line) {
571
+		newLine := make([]rune, len(t.line), 2*(1+len(t.line)))
572
+		copy(newLine, t.line)
573
+		t.line = newLine
574
+	}
575
+	t.line = t.line[:len(t.line)+1]
576
+	copy(t.line[t.pos+1:], t.line[t.pos:])
577
+	t.line[t.pos] = key
578
+	if t.echo {
579
+		t.writeLine(t.line[t.pos:])
580
+	}
581
+	t.pos++
582
+	t.moveCursorToPos(t.pos)
583
+}
584
+
585
+func (t *Terminal) writeLine(line []rune) {
586
+	for len(line) != 0 {
587
+		remainingOnLine := t.termWidth - t.cursorX
588
+		todo := len(line)
589
+		if todo > remainingOnLine {
590
+			todo = remainingOnLine
591
+		}
592
+		t.queue(line[:todo])
593
+		t.advanceCursor(visualLength(line[:todo]))
594
+		line = line[todo:]
595
+	}
596
+}
597
+
598
+// writeWithCRLF writes buf to w but replaces all occurrences of \n with \r\n.
599
+func writeWithCRLF(w io.Writer, buf []byte) (n int, err error) {
600
+	for len(buf) > 0 {
601
+		i := bytes.IndexByte(buf, '\n')
602
+		todo := len(buf)
603
+		if i >= 0 {
604
+			todo = i
605
+		}
606
+
607
+		var nn int
608
+		nn, err = w.Write(buf[:todo])
609
+		n += nn
610
+		if err != nil {
611
+			return n, err
612
+		}
613
+		buf = buf[todo:]
614
+
615
+		if i >= 0 {
616
+			if _, err = w.Write(crlf); err != nil {
617
+				return n, err
618
+			}
619
+			n += 1
620
+			buf = buf[1:]
621
+		}
622
+	}
623
+
624
+	return n, nil
625
+}
626
+
627
+func (t *Terminal) Write(buf []byte) (n int, err error) {
628
+	t.lock.Lock()
629
+	defer t.lock.Unlock()
630
+
631
+	if t.cursorX == 0 && t.cursorY == 0 {
632
+		// This is the easy case: there's nothing on the screen that we
633
+		// have to move out of the way.
634
+		return writeWithCRLF(t.c, buf)
635
+	}
636
+
637
+	// We have a prompt and possibly user input on the screen. We
638
+	// have to clear it first.
639
+	t.move(0 /* up */, 0 /* down */, t.cursorX /* left */, 0 /* right */)
640
+	t.cursorX = 0
641
+	t.clearLineToRight()
642
+
643
+	for t.cursorY > 0 {
644
+		t.move(1 /* up */, 0, 0, 0)
645
+		t.cursorY--
646
+		t.clearLineToRight()
647
+	}
648
+
649
+	if _, err = t.c.Write(t.outBuf); err != nil {
650
+		return
651
+	}
652
+	t.outBuf = t.outBuf[:0]
653
+
654
+	if n, err = writeWithCRLF(t.c, buf); err != nil {
655
+		return
656
+	}
657
+
658
+	t.writeLine(t.prompt)
659
+	if t.echo {
660
+		t.writeLine(t.line)
661
+	}
662
+
663
+	t.moveCursorToPos(t.pos)
664
+
665
+	if _, err = t.c.Write(t.outBuf); err != nil {
666
+		return
667
+	}
668
+	t.outBuf = t.outBuf[:0]
669
+	return
670
+}
671
+
672
+// ReadPassword temporarily changes the prompt and reads a password, without
673
+// echo, from the terminal.
674
+func (t *Terminal) ReadPassword(prompt string) (line string, err error) {
675
+	t.lock.Lock()
676
+	defer t.lock.Unlock()
677
+
678
+	oldPrompt := t.prompt
679
+	t.prompt = []rune(prompt)
680
+	t.echo = false
681
+
682
+	line, err = t.readLine()
683
+
684
+	t.prompt = oldPrompt
685
+	t.echo = true
686
+
687
+	return
688
+}
689
+
690
+// ReadLine returns a line of input from the terminal.
691
+func (t *Terminal) ReadLine() (line string, err error) {
692
+	t.lock.Lock()
693
+	defer t.lock.Unlock()
694
+
695
+	return t.readLine()
696
+}
697
+
698
+func (t *Terminal) readLine() (line string, err error) {
699
+	// t.lock must be held at this point
700
+
701
+	if t.cursorX == 0 && t.cursorY == 0 {
702
+		t.writeLine(t.prompt)
703
+		t.c.Write(t.outBuf)
704
+		t.outBuf = t.outBuf[:0]
705
+	}
706
+
707
+	lineIsPasted := t.pasteActive
708
+
709
+	for {
710
+		rest := t.remainder
711
+		lineOk := false
712
+		for !lineOk {
713
+			var key rune
714
+			key, rest = bytesToKey(rest, t.pasteActive)
715
+			if key == utf8.RuneError {
716
+				break
717
+			}
718
+			if !t.pasteActive {
719
+				if key == keyCtrlD {
720
+					if len(t.line) == 0 {
721
+						return "", io.EOF
722
+					}
723
+				}
724
+				if key == keyPasteStart {
725
+					t.pasteActive = true
726
+					if len(t.line) == 0 {
727
+						lineIsPasted = true
728
+					}
729
+					continue
730
+				}
731
+			} else if key == keyPasteEnd {
732
+				t.pasteActive = false
733
+				continue
734
+			}
735
+			if !t.pasteActive {
736
+				lineIsPasted = false
737
+			}
738
+			line, lineOk = t.handleKey(key)
739
+		}
740
+		if len(rest) > 0 {
741
+			n := copy(t.inBuf[:], rest)
742
+			t.remainder = t.inBuf[:n]
743
+		} else {
744
+			t.remainder = nil
745
+		}
746
+		t.c.Write(t.outBuf)
747
+		t.outBuf = t.outBuf[:0]
748
+		if lineOk {
749
+			if t.echo {
750
+				t.historyIndex = -1
751
+				t.history.Add(line)
752
+			}
753
+			if lineIsPasted {
754
+				err = ErrPasteIndicator
755
+			}
756
+			return
757
+		}
758
+
759
+		// t.remainder is a slice at the beginning of t.inBuf
760
+		// containing a partial key sequence
761
+		readBuf := t.inBuf[len(t.remainder):]
762
+		var n int
763
+
764
+		t.lock.Unlock()
765
+		n, err = t.c.Read(readBuf)
766
+		t.lock.Lock()
767
+
768
+		if err != nil {
769
+			return
770
+		}
771
+
772
+		t.remainder = t.inBuf[:n+len(t.remainder)]
773
+	}
774
+}
775
+
776
+// SetPrompt sets the prompt to be used when reading subsequent lines.
777
+func (t *Terminal) SetPrompt(prompt string) {
778
+	t.lock.Lock()
779
+	defer t.lock.Unlock()
780
+
781
+	t.prompt = []rune(prompt)
782
+}
783
+
784
+func (t *Terminal) clearAndRepaintLinePlusNPrevious(numPrevLines int) {
785
+	// Move cursor to column zero at the start of the line.
786
+	t.move(t.cursorY, 0, t.cursorX, 0)
787
+	t.cursorX, t.cursorY = 0, 0
788
+	t.clearLineToRight()
789
+	for t.cursorY < numPrevLines {
790
+		// Move down a line
791
+		t.move(0, 1, 0, 0)
792
+		t.cursorY++
793
+		t.clearLineToRight()
794
+	}
795
+	// Move back to beginning.
796
+	t.move(t.cursorY, 0, 0, 0)
797
+	t.cursorX, t.cursorY = 0, 0
798
+
799
+	t.queue(t.prompt)
800
+	t.advanceCursor(visualLength(t.prompt))
801
+	t.writeLine(t.line)
802
+	t.moveCursorToPos(t.pos)
803
+}
804
+
805
+func (t *Terminal) SetSize(width, height int) error {
806
+	t.lock.Lock()
807
+	defer t.lock.Unlock()
808
+
809
+	if width == 0 {
810
+		width = 1
811
+	}
812
+
813
+	oldWidth := t.termWidth
814
+	t.termWidth, t.termHeight = width, height
815
+
816
+	switch {
817
+	case width == oldWidth:
818
+		// If the width didn't change then nothing else needs to be
819
+		// done.
820
+		return nil
821
+	case len(t.line) == 0 && t.cursorX == 0 && t.cursorY == 0:
822
+		// If there is nothing on current line and no prompt printed,
823
+		// just do nothing
824
+		return nil
825
+	case width < oldWidth:
826
+		// Some terminals (e.g. xterm) will truncate lines that were
827
+		// too long when shinking. Others, (e.g. gnome-terminal) will
828
+		// attempt to wrap them. For the former, repainting t.maxLine
829
+		// works great, but that behaviour goes badly wrong in the case
830
+		// of the latter because they have doubled every full line.
831
+
832
+		// We assume that we are working on a terminal that wraps lines
833
+		// and adjust the cursor position based on every previous line
834
+		// wrapping and turning into two. This causes the prompt on
835
+		// xterms to move upwards, which isn't great, but it avoids a
836
+		// huge mess with gnome-terminal.
837
+		if t.cursorX >= t.termWidth {
838
+			t.cursorX = t.termWidth - 1
839
+		}
840
+		t.cursorY *= 2
841
+		t.clearAndRepaintLinePlusNPrevious(t.maxLine * 2)
842
+	case width > oldWidth:
843
+		// If the terminal expands then our position calculations will
844
+		// be wrong in the future because we think the cursor is
845
+		// |t.pos| chars into the string, but there will be a gap at
846
+		// the end of any wrapped line.
847
+		//
848
+		// But the position will actually be correct until we move, so
849
+		// we can move back to the beginning and repaint everything.
850
+		t.clearAndRepaintLinePlusNPrevious(t.maxLine)
851
+	}
852
+
853
+	_, err := t.c.Write(t.outBuf)
854
+	t.outBuf = t.outBuf[:0]
855
+	return err
856
+}
857
+
858
+type pasteIndicatorError struct{}
859
+
860
+func (pasteIndicatorError) Error() string {
861
+	return "terminal: ErrPasteIndicator not correctly handled"
862
+}
863
+
864
+// ErrPasteIndicator may be returned from ReadLine as the error, in addition
865
+// to valid line data. It indicates that bracketed paste mode is enabled and
866
+// that the returned line consists only of pasted data. Programs may wish to
867
+// interpret pasted data more literally than typed data.
868
+var ErrPasteIndicator = pasteIndicatorError{}
869
+
870
+// SetBracketedPasteMode requests that the terminal bracket paste operations
871
+// with markers. Not all terminals support this but, if it is supported, then
872
+// enabling this mode will stop any autocomplete callback from running due to
873
+// pastes. Additionally, any lines that are completely pasted will be returned
874
+// from ReadLine with the error set to ErrPasteIndicator.
875
+func (t *Terminal) SetBracketedPasteMode(on bool) {
876
+	if on {
877
+		io.WriteString(t.c, "\x1b[?2004h")
878
+	} else {
879
+		io.WriteString(t.c, "\x1b[?2004l")
880
+	}
881
+}
882
+
883
+// stRingBuffer is a ring buffer of strings.
884
+type stRingBuffer struct {
885
+	// entries contains max elements.
886
+	entries []string
887
+	max     int
888
+	// head contains the index of the element most recently added to the ring.
889
+	head int
890
+	// size contains the number of elements in the ring.
891
+	size int
892
+}
893
+
894
+func (s *stRingBuffer) Add(a string) {
895
+	if s.entries == nil {
896
+		const defaultNumEntries = 100
897
+		s.entries = make([]string, defaultNumEntries)
898
+		s.max = defaultNumEntries
899
+	}
900
+
901
+	s.head = (s.head + 1) % s.max
902
+	s.entries[s.head] = a
903
+	if s.size < s.max {
904
+		s.size++
905
+	}
906
+}
907
+
908
+// NthPreviousEntry returns the value passed to the nth previous call to Add.
909
+// If n is zero then the immediately prior value is returned, if one, then the
910
+// next most recent, and so on. If such an element doesn't exist then ok is
911
+// false.
912
+func (s *stRingBuffer) NthPreviousEntry(n int) (value string, ok bool) {
913
+	if n >= s.size {
914
+		return "", false
915
+	}
916
+	index := s.head - n
917
+	if index < 0 {
918
+		index += s.max
919
+	}
920
+	return s.entries[index], true
921
+}
922
+
923
+// readPasswordLine reads from reader until it finds \n or io.EOF.
924
+// The slice returned does not include the \n.
925
+// readPasswordLine also ignores any \r it finds.
926
+func readPasswordLine(reader io.Reader) ([]byte, error) {
927
+	var buf [1]byte
928
+	var ret []byte
929
+
930
+	for {
931
+		n, err := reader.Read(buf[:])
932
+		if n > 0 {
933
+			switch buf[0] {
934
+			case '\n':
935
+				return ret, nil
936
+			case '\r':
937
+				// remove \r from passwords on Windows
938
+			default:
939
+				ret = append(ret, buf[0])
940
+			}
941
+			continue
942
+		}
943
+		if err != nil {
944
+			if err == io.EOF && len(ret) > 0 {
945
+				return ret, nil
946
+			}
947
+			return ret, err
948
+		}
949
+	}
950
+}
0 951
new file mode 100644
... ...
@@ -0,0 +1,119 @@
0
+// Copyright 2011 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build darwin dragonfly freebsd linux,!appengine netbsd openbsd
5
+
6
+// Package terminal provides support functions for dealing with terminals, as
7
+// commonly found on UNIX systems.
8
+//
9
+// Putting a terminal into raw mode is the most common requirement:
10
+//
11
+// 	oldState, err := terminal.MakeRaw(0)
12
+// 	if err != nil {
13
+// 	        panic(err)
14
+// 	}
15
+// 	defer terminal.Restore(0, oldState)
16
+package terminal // import "golang.org/x/crypto/ssh/terminal"
17
+
18
+import (
19
+	"syscall"
20
+	"unsafe"
21
+)
22
+
23
+// State contains the state of a terminal.
24
+type State struct {
25
+	termios syscall.Termios
26
+}
27
+
28
+// IsTerminal returns true if the given file descriptor is a terminal.
29
+func IsTerminal(fd int) bool {
30
+	var termios syscall.Termios
31
+	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlReadTermios, uintptr(unsafe.Pointer(&termios)), 0, 0, 0)
32
+	return err == 0
33
+}
34
+
35
+// MakeRaw put the terminal connected to the given file descriptor into raw
36
+// mode and returns the previous state of the terminal so that it can be
37
+// restored.
38
+func MakeRaw(fd int) (*State, error) {
39
+	var oldState State
40
+	if _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlReadTermios, uintptr(unsafe.Pointer(&oldState.termios)), 0, 0, 0); err != 0 {
41
+		return nil, err
42
+	}
43
+
44
+	newState := oldState.termios
45
+	// This attempts to replicate the behaviour documented for cfmakeraw in
46
+	// the termios(3) manpage.
47
+	newState.Iflag &^= syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON
48
+	newState.Oflag &^= syscall.OPOST
49
+	newState.Lflag &^= syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN
50
+	newState.Cflag &^= syscall.CSIZE | syscall.PARENB
51
+	newState.Cflag |= syscall.CS8
52
+	if _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlWriteTermios, uintptr(unsafe.Pointer(&newState)), 0, 0, 0); err != 0 {
53
+		return nil, err
54
+	}
55
+
56
+	return &oldState, nil
57
+}
58
+
59
+// GetState returns the current state of a terminal which may be useful to
60
+// restore the terminal after a signal.
61
+func GetState(fd int) (*State, error) {
62
+	var oldState State
63
+	if _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlReadTermios, uintptr(unsafe.Pointer(&oldState.termios)), 0, 0, 0); err != 0 {
64
+		return nil, err
65
+	}
66
+
67
+	return &oldState, nil
68
+}
69
+
70
+// Restore restores the terminal connected to the given file descriptor to a
71
+// previous state.
72
+func Restore(fd int, state *State) error {
73
+	if _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlWriteTermios, uintptr(unsafe.Pointer(&state.termios)), 0, 0, 0); err != 0 {
74
+		return err
75
+	}
76
+	return nil
77
+}
78
+
79
+// GetSize returns the dimensions of the given terminal.
80
+func GetSize(fd int) (width, height int, err error) {
81
+	var dimensions [4]uint16
82
+
83
+	if _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), uintptr(syscall.TIOCGWINSZ), uintptr(unsafe.Pointer(&dimensions)), 0, 0, 0); err != 0 {
84
+		return -1, -1, err
85
+	}
86
+	return int(dimensions[1]), int(dimensions[0]), nil
87
+}
88
+
89
+// passwordReader is an io.Reader that reads from a specific file descriptor.
90
+type passwordReader int
91
+
92
+func (r passwordReader) Read(buf []byte) (int, error) {
93
+	return syscall.Read(int(r), buf)
94
+}
95
+
96
+// ReadPassword reads a line of input from a terminal without local echo.  This
97
+// is commonly used for inputting passwords and other sensitive data. The slice
98
+// returned does not include the \n.
99
+func ReadPassword(fd int) ([]byte, error) {
100
+	var oldState syscall.Termios
101
+	if _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlReadTermios, uintptr(unsafe.Pointer(&oldState)), 0, 0, 0); err != 0 {
102
+		return nil, err
103
+	}
104
+
105
+	newState := oldState
106
+	newState.Lflag &^= syscall.ECHO
107
+	newState.Lflag |= syscall.ICANON | syscall.ISIG
108
+	newState.Iflag |= syscall.ICRNL
109
+	if _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlWriteTermios, uintptr(unsafe.Pointer(&newState)), 0, 0, 0); err != 0 {
110
+		return nil, err
111
+	}
112
+
113
+	defer func() {
114
+		syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlWriteTermios, uintptr(unsafe.Pointer(&oldState)), 0, 0, 0)
115
+	}()
116
+
117
+	return readPasswordLine(passwordReader(fd))
118
+}
0 119
new file mode 100644
... ...
@@ -0,0 +1,12 @@
0
+// Copyright 2013 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build darwin dragonfly freebsd netbsd openbsd
5
+
6
+package terminal
7
+
8
+import "golang.org/x/sys/unix"
9
+
10
+const ioctlReadTermios = unix.TIOCGETA
11
+const ioctlWriteTermios = unix.TIOCSETA
0 12
new file mode 100644
... ...
@@ -0,0 +1,10 @@
0
+// Copyright 2013 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+package terminal
5
+
6
+import "golang.org/x/sys/unix"
7
+
8
+const ioctlReadTermios = unix.TCGETS
9
+const ioctlWriteTermios = unix.TCSETS
0 10
new file mode 100644
... ...
@@ -0,0 +1,58 @@
0
+// Copyright 2016 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// Package terminal provides support functions for dealing with terminals, as
5
+// commonly found on UNIX systems.
6
+//
7
+// Putting a terminal into raw mode is the most common requirement:
8
+//
9
+// 	oldState, err := terminal.MakeRaw(0)
10
+// 	if err != nil {
11
+// 	        panic(err)
12
+// 	}
13
+// 	defer terminal.Restore(0, oldState)
14
+package terminal
15
+
16
+import (
17
+	"fmt"
18
+	"runtime"
19
+)
20
+
21
+type State struct{}
22
+
23
+// IsTerminal returns true if the given file descriptor is a terminal.
24
+func IsTerminal(fd int) bool {
25
+	return false
26
+}
27
+
28
+// MakeRaw put the terminal connected to the given file descriptor into raw
29
+// mode and returns the previous state of the terminal so that it can be
30
+// restored.
31
+func MakeRaw(fd int) (*State, error) {
32
+	return nil, fmt.Errorf("terminal: MakeRaw not implemented on %s/%s", runtime.GOOS, runtime.GOARCH)
33
+}
34
+
35
+// GetState returns the current state of a terminal which may be useful to
36
+// restore the terminal after a signal.
37
+func GetState(fd int) (*State, error) {
38
+	return nil, fmt.Errorf("terminal: GetState not implemented on %s/%s", runtime.GOOS, runtime.GOARCH)
39
+}
40
+
41
+// Restore restores the terminal connected to the given file descriptor to a
42
+// previous state.
43
+func Restore(fd int, state *State) error {
44
+	return fmt.Errorf("terminal: Restore not implemented on %s/%s", runtime.GOOS, runtime.GOARCH)
45
+}
46
+
47
+// GetSize returns the dimensions of the given terminal.
48
+func GetSize(fd int) (width, height int, err error) {
49
+	return 0, 0, fmt.Errorf("terminal: GetSize not implemented on %s/%s", runtime.GOOS, runtime.GOARCH)
50
+}
51
+
52
+// ReadPassword reads a line of input from a terminal without local echo.  This
53
+// is commonly used for inputting passwords and other sensitive data. The slice
54
+// returned does not include the \n.
55
+func ReadPassword(fd int) ([]byte, error) {
56
+	return nil, fmt.Errorf("terminal: ReadPassword not implemented on %s/%s", runtime.GOOS, runtime.GOARCH)
57
+}
0 58
new file mode 100644
... ...
@@ -0,0 +1,128 @@
0
+// Copyright 2015 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build solaris
5
+
6
+package terminal // import "golang.org/x/crypto/ssh/terminal"
7
+
8
+import (
9
+	"golang.org/x/sys/unix"
10
+	"io"
11
+	"syscall"
12
+)
13
+
14
+// State contains the state of a terminal.
15
+type State struct {
16
+	state *unix.Termios
17
+}
18
+
19
+// IsTerminal returns true if the given file descriptor is a terminal.
20
+func IsTerminal(fd int) bool {
21
+	_, err := unix.IoctlGetTermio(fd, unix.TCGETA)
22
+	return err == nil
23
+}
24
+
25
+// ReadPassword reads a line of input from a terminal without local echo.  This
26
+// is commonly used for inputting passwords and other sensitive data. The slice
27
+// returned does not include the \n.
28
+func ReadPassword(fd int) ([]byte, error) {
29
+	// see also: http://src.illumos.org/source/xref/illumos-gate/usr/src/lib/libast/common/uwin/getpass.c
30
+	val, err := unix.IoctlGetTermios(fd, unix.TCGETS)
31
+	if err != nil {
32
+		return nil, err
33
+	}
34
+	oldState := *val
35
+
36
+	newState := oldState
37
+	newState.Lflag &^= syscall.ECHO
38
+	newState.Lflag |= syscall.ICANON | syscall.ISIG
39
+	newState.Iflag |= syscall.ICRNL
40
+	err = unix.IoctlSetTermios(fd, unix.TCSETS, &newState)
41
+	if err != nil {
42
+		return nil, err
43
+	}
44
+
45
+	defer unix.IoctlSetTermios(fd, unix.TCSETS, &oldState)
46
+
47
+	var buf [16]byte
48
+	var ret []byte
49
+	for {
50
+		n, err := syscall.Read(fd, buf[:])
51
+		if err != nil {
52
+			return nil, err
53
+		}
54
+		if n == 0 {
55
+			if len(ret) == 0 {
56
+				return nil, io.EOF
57
+			}
58
+			break
59
+		}
60
+		if buf[n-1] == '\n' {
61
+			n--
62
+		}
63
+		ret = append(ret, buf[:n]...)
64
+		if n < len(buf) {
65
+			break
66
+		}
67
+	}
68
+
69
+	return ret, nil
70
+}
71
+
72
+// MakeRaw puts the terminal connected to the given file descriptor into raw
73
+// mode and returns the previous state of the terminal so that it can be
74
+// restored.
75
+// see http://cr.illumos.org/~webrev/andy_js/1060/
76
+func MakeRaw(fd int) (*State, error) {
77
+	oldTermiosPtr, err := unix.IoctlGetTermios(fd, unix.TCGETS)
78
+	if err != nil {
79
+		return nil, err
80
+	}
81
+	oldTermios := *oldTermiosPtr
82
+
83
+	newTermios := oldTermios
84
+	newTermios.Iflag &^= syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON
85
+	newTermios.Oflag &^= syscall.OPOST
86
+	newTermios.Lflag &^= syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN
87
+	newTermios.Cflag &^= syscall.CSIZE | syscall.PARENB
88
+	newTermios.Cflag |= syscall.CS8
89
+	newTermios.Cc[unix.VMIN] = 1
90
+	newTermios.Cc[unix.VTIME] = 0
91
+
92
+	if err := unix.IoctlSetTermios(fd, unix.TCSETS, &newTermios); err != nil {
93
+		return nil, err
94
+	}
95
+
96
+	return &State{
97
+		state: oldTermiosPtr,
98
+	}, nil
99
+}
100
+
101
+// Restore restores the terminal connected to the given file descriptor to a
102
+// previous state.
103
+func Restore(fd int, oldState *State) error {
104
+	return unix.IoctlSetTermios(fd, unix.TCSETS, oldState.state)
105
+}
106
+
107
+// GetState returns the current state of a terminal which may be useful to
108
+// restore the terminal after a signal.
109
+func GetState(fd int) (*State, error) {
110
+	oldTermiosPtr, err := unix.IoctlGetTermios(fd, unix.TCGETS)
111
+	if err != nil {
112
+		return nil, err
113
+	}
114
+
115
+	return &State{
116
+		state: oldTermiosPtr,
117
+	}, nil
118
+}
119
+
120
+// GetSize returns the dimensions of the given terminal.
121
+func GetSize(fd int) (width, height int, err error) {
122
+	ws, err := unix.IoctlGetWinsize(fd, unix.TIOCGWINSZ)
123
+	if err != nil {
124
+		return 0, 0, err
125
+	}
126
+	return int(ws.Col), int(ws.Row), nil
127
+}
0 128
new file mode 100644
... ...
@@ -0,0 +1,155 @@
0
+// Copyright 2011 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build windows
5
+
6
+// Package terminal provides support functions for dealing with terminals, as
7
+// commonly found on UNIX systems.
8
+//
9
+// Putting a terminal into raw mode is the most common requirement:
10
+//
11
+// 	oldState, err := terminal.MakeRaw(0)
12
+// 	if err != nil {
13
+// 	        panic(err)
14
+// 	}
15
+// 	defer terminal.Restore(0, oldState)
16
+package terminal
17
+
18
+import (
19
+	"syscall"
20
+	"unsafe"
21
+)
22
+
23
+const (
24
+	enableLineInput       = 2
25
+	enableEchoInput       = 4
26
+	enableProcessedInput  = 1
27
+	enableWindowInput     = 8
28
+	enableMouseInput      = 16
29
+	enableInsertMode      = 32
30
+	enableQuickEditMode   = 64
31
+	enableExtendedFlags   = 128
32
+	enableAutoPosition    = 256
33
+	enableProcessedOutput = 1
34
+	enableWrapAtEolOutput = 2
35
+)
36
+
37
+var kernel32 = syscall.NewLazyDLL("kernel32.dll")
38
+
39
+var (
40
+	procGetConsoleMode             = kernel32.NewProc("GetConsoleMode")
41
+	procSetConsoleMode             = kernel32.NewProc("SetConsoleMode")
42
+	procGetConsoleScreenBufferInfo = kernel32.NewProc("GetConsoleScreenBufferInfo")
43
+)
44
+
45
+type (
46
+	short int16
47
+	word  uint16
48
+
49
+	coord struct {
50
+		x short
51
+		y short
52
+	}
53
+	smallRect struct {
54
+		left   short
55
+		top    short
56
+		right  short
57
+		bottom short
58
+	}
59
+	consoleScreenBufferInfo struct {
60
+		size              coord
61
+		cursorPosition    coord
62
+		attributes        word
63
+		window            smallRect
64
+		maximumWindowSize coord
65
+	}
66
+)
67
+
68
+type State struct {
69
+	mode uint32
70
+}
71
+
72
+// IsTerminal returns true if the given file descriptor is a terminal.
73
+func IsTerminal(fd int) bool {
74
+	var st uint32
75
+	r, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
76
+	return r != 0 && e == 0
77
+}
78
+
79
+// MakeRaw put the terminal connected to the given file descriptor into raw
80
+// mode and returns the previous state of the terminal so that it can be
81
+// restored.
82
+func MakeRaw(fd int) (*State, error) {
83
+	var st uint32
84
+	_, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
85
+	if e != 0 {
86
+		return nil, error(e)
87
+	}
88
+	raw := st &^ (enableEchoInput | enableProcessedInput | enableLineInput | enableProcessedOutput)
89
+	_, _, e = syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(fd), uintptr(raw), 0)
90
+	if e != 0 {
91
+		return nil, error(e)
92
+	}
93
+	return &State{st}, nil
94
+}
95
+
96
+// GetState returns the current state of a terminal which may be useful to
97
+// restore the terminal after a signal.
98
+func GetState(fd int) (*State, error) {
99
+	var st uint32
100
+	_, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
101
+	if e != 0 {
102
+		return nil, error(e)
103
+	}
104
+	return &State{st}, nil
105
+}
106
+
107
+// Restore restores the terminal connected to the given file descriptor to a
108
+// previous state.
109
+func Restore(fd int, state *State) error {
110
+	_, _, err := syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(fd), uintptr(state.mode), 0)
111
+	return err
112
+}
113
+
114
+// GetSize returns the dimensions of the given terminal.
115
+func GetSize(fd int) (width, height int, err error) {
116
+	var info consoleScreenBufferInfo
117
+	_, _, e := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&info)), 0)
118
+	if e != 0 {
119
+		return 0, 0, error(e)
120
+	}
121
+	return int(info.size.x), int(info.size.y), nil
122
+}
123
+
124
+// passwordReader is an io.Reader that reads from a specific Windows HANDLE.
125
+type passwordReader int
126
+
127
+func (r passwordReader) Read(buf []byte) (int, error) {
128
+	return syscall.Read(syscall.Handle(r), buf)
129
+}
130
+
131
+// ReadPassword reads a line of input from a terminal without local echo.  This
132
+// is commonly used for inputting passwords and other sensitive data. The slice
133
+// returned does not include the \n.
134
+func ReadPassword(fd int) ([]byte, error) {
135
+	var st uint32
136
+	_, _, e := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
137
+	if e != 0 {
138
+		return nil, error(e)
139
+	}
140
+	old := st
141
+
142
+	st &^= (enableEchoInput)
143
+	st |= (enableProcessedInput | enableLineInput | enableProcessedOutput)
144
+	_, _, e = syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(fd), uintptr(st), 0)
145
+	if e != 0 {
146
+		return nil, error(e)
147
+	}
148
+
149
+	defer func() {
150
+		syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(fd), uintptr(old), 0)
151
+	}()
152
+
153
+	return readPasswordLine(passwordReader(fd))
154
+}