Update Golang 1.13.7, golang.org/x/crypto (CVE-2020-0601, CVE-2020-7919)
| ... | ... |
@@ -1,7 +1,7 @@ |
| 1 | 1 |
# syntax=docker/dockerfile:1.1.3-experimental |
| 2 | 2 |
|
| 3 | 3 |
ARG CROSS="false" |
| 4 |
-ARG GO_VERSION=1.13.6 |
|
| 4 |
+ARG GO_VERSION=1.13.7 |
|
| 5 | 5 |
ARG DEBIAN_FRONTEND=noninteractive |
| 6 | 6 |
ARG VPNKIT_DIGEST=e508a17cfacc8fd39261d5b4e397df2b953690da577e2c987a47630cd0c42f8e |
| 7 | 7 |
ARG DOCKER_BUILDTAGS="apparmor seccomp selinux" |
| ... | ... |
@@ -165,7 +165,7 @@ FROM microsoft/windowsservercore |
| 165 | 165 |
# Use PowerShell as the default shell |
| 166 | 166 |
SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"] |
| 167 | 167 |
|
| 168 |
-ARG GO_VERSION=1.13.6 |
|
| 168 |
+ARG GO_VERSION=1.13.7 |
|
| 169 | 169 |
ARG GOTESTSUM_COMMIT=v0.3.5 |
| 170 | 170 |
|
| 171 | 171 |
# Environment variable notes: |
| ... | ... |
@@ -134,7 +134,7 @@ github.com/golang/protobuf aa810b61a9c79d51363740d207bb |
| 134 | 134 |
github.com/cloudflare/cfssl 5d63dbd981b5c408effbb58c442d54761ff94fbd # 1.3.2 |
| 135 | 135 |
github.com/fernet/fernet-go 9eac43b88a5efb8651d24de9b68e87567e029736 |
| 136 | 136 |
github.com/google/certificate-transparency-go 37a384cd035e722ea46e55029093e26687138edf # v1.0.20 |
| 137 |
-golang.org/x/crypto 88737f569e3a9c7ab309cdc09a07fe7fc87233c3 |
|
| 137 |
+golang.org/x/crypto 69ecbb4d6d5dab05e49161c6e77ea40a030884e1 |
|
| 138 | 138 |
golang.org/x/time fbb02b2291d28baffd63558aa44b4b56f178d650 |
| 139 | 139 |
github.com/hashicorp/go-memdb cb9a474f84cc5e41b273b20c6927680b2a8776ad |
| 140 | 140 |
github.com/hashicorp/go-immutable-radix 826af9ccf0feeee615d546d69b11f8e98da8c8f1 git://github.com/tonistiigi/go-immutable-radix.git |
| ... | ... |
@@ -470,7 +470,8 @@ func (s *String) ReadASN1GeneralizedTime(out *time.Time) bool {
|
| 470 | 470 |
// It reports whether the read was successful. |
| 471 | 471 |
func (s *String) ReadASN1BitString(out *encoding_asn1.BitString) bool {
|
| 472 | 472 |
var bytes String |
| 473 |
- if !s.ReadASN1(&bytes, asn1.BIT_STRING) || len(bytes) == 0 {
|
|
| 473 |
+ if !s.ReadASN1(&bytes, asn1.BIT_STRING) || len(bytes) == 0 || |
|
| 474 |
+ len(bytes)*8/8 != len(bytes) {
|
|
| 474 | 475 |
return false |
| 475 | 476 |
} |
| 476 | 477 |
|
| ... | ... |
@@ -740,7 +741,7 @@ func (s *String) readASN1(out *String, outTag *asn1.Tag, skipHeader bool) bool {
|
| 740 | 740 |
length = headerLen + len32 |
| 741 | 741 |
} |
| 742 | 742 |
|
| 743 |
- if uint32(int(length)) != length || !s.ReadBytes((*[]byte)(out), int(length)) {
|
|
| 743 |
+ if int(length) < 0 || !s.ReadBytes((*[]byte)(out), int(length)) {
|
|
| 744 | 744 |
return false |
| 745 | 745 |
} |
| 746 | 746 |
if skipHeader && !out.Skip(int(headerLen)) {
|
| ... | ... |
@@ -24,7 +24,7 @@ type String []byte |
| 24 | 24 |
// read advances a String by n bytes and returns them. If less than n bytes |
| 25 | 25 |
// remain, it returns nil. |
| 26 | 26 |
func (s *String) read(n int) []byte {
|
| 27 |
- if len(*s) < n {
|
|
| 27 |
+ if len(*s) < n || n < 0 {
|
|
| 28 | 28 |
return nil |
| 29 | 29 |
} |
| 30 | 30 |
v := (*s)[:n] |
| ... | ... |
@@ -105,11 +105,6 @@ func (s *String) readLengthPrefixed(lenLen int, outChild *String) bool {
|
| 105 | 105 |
length = length << 8 |
| 106 | 106 |
length = length | uint32(b) |
| 107 | 107 |
} |
| 108 |
- if int(length) < 0 {
|
|
| 109 |
- // This currently cannot overflow because we read uint24 at most, but check |
|
| 110 |
- // anyway in case that changes in the future. |
|
| 111 |
- return false |
|
| 112 |
- } |
|
| 113 | 108 |
v := s.read(int(length)) |
| 114 | 109 |
if v == nil {
|
| 115 | 110 |
return false |
| 116 | 111 |
deleted file mode 100644 |
| ... | ... |
@@ -1,8 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// This code was translated into a form compatible with 6a from the public |
|
| 6 |
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html |
|
| 7 |
- |
|
| 8 |
-#define REDMASK51 0x0007FFFFFFFFFFFF |
| 9 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,20 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// This code was translated into a form compatible with 6a from the public |
|
| 6 |
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html |
|
| 7 |
- |
|
| 8 |
-// +build amd64,!gccgo,!appengine |
|
| 9 |
- |
|
| 10 |
-// These constants cannot be encoded in non-MOVQ immediates. |
|
| 11 |
-// We access them directly from memory instead. |
|
| 12 |
- |
|
| 13 |
-DATA ·_121666_213(SB)/8, $996687872 |
|
| 14 |
-GLOBL ·_121666_213(SB), 8, $8 |
|
| 15 |
- |
|
| 16 |
-DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA |
|
| 17 |
-GLOBL ·_2P0(SB), 8, $8 |
|
| 18 |
- |
|
| 19 |
-DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE |
|
| 20 |
-GLOBL ·_2P1234(SB), 8, $8 |
| 21 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,65 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// +build amd64,!gccgo,!appengine |
|
| 6 |
- |
|
| 7 |
-// func cswap(inout *[4][5]uint64, v uint64) |
|
| 8 |
-TEXT ·cswap(SB),7,$0 |
|
| 9 |
- MOVQ inout+0(FP),DI |
|
| 10 |
- MOVQ v+8(FP),SI |
|
| 11 |
- |
|
| 12 |
- SUBQ $1, SI |
|
| 13 |
- NOTQ SI |
|
| 14 |
- MOVQ SI, X15 |
|
| 15 |
- PSHUFD $0x44, X15, X15 |
|
| 16 |
- |
|
| 17 |
- MOVOU 0(DI), X0 |
|
| 18 |
- MOVOU 16(DI), X2 |
|
| 19 |
- MOVOU 32(DI), X4 |
|
| 20 |
- MOVOU 48(DI), X6 |
|
| 21 |
- MOVOU 64(DI), X8 |
|
| 22 |
- MOVOU 80(DI), X1 |
|
| 23 |
- MOVOU 96(DI), X3 |
|
| 24 |
- MOVOU 112(DI), X5 |
|
| 25 |
- MOVOU 128(DI), X7 |
|
| 26 |
- MOVOU 144(DI), X9 |
|
| 27 |
- |
|
| 28 |
- MOVO X1, X10 |
|
| 29 |
- MOVO X3, X11 |
|
| 30 |
- MOVO X5, X12 |
|
| 31 |
- MOVO X7, X13 |
|
| 32 |
- MOVO X9, X14 |
|
| 33 |
- |
|
| 34 |
- PXOR X0, X10 |
|
| 35 |
- PXOR X2, X11 |
|
| 36 |
- PXOR X4, X12 |
|
| 37 |
- PXOR X6, X13 |
|
| 38 |
- PXOR X8, X14 |
|
| 39 |
- PAND X15, X10 |
|
| 40 |
- PAND X15, X11 |
|
| 41 |
- PAND X15, X12 |
|
| 42 |
- PAND X15, X13 |
|
| 43 |
- PAND X15, X14 |
|
| 44 |
- PXOR X10, X0 |
|
| 45 |
- PXOR X10, X1 |
|
| 46 |
- PXOR X11, X2 |
|
| 47 |
- PXOR X11, X3 |
|
| 48 |
- PXOR X12, X4 |
|
| 49 |
- PXOR X12, X5 |
|
| 50 |
- PXOR X13, X6 |
|
| 51 |
- PXOR X13, X7 |
|
| 52 |
- PXOR X14, X8 |
|
| 53 |
- PXOR X14, X9 |
|
| 54 |
- |
|
| 55 |
- MOVOU X0, 0(DI) |
|
| 56 |
- MOVOU X2, 16(DI) |
|
| 57 |
- MOVOU X4, 32(DI) |
|
| 58 |
- MOVOU X6, 48(DI) |
|
| 59 |
- MOVOU X8, 64(DI) |
|
| 60 |
- MOVOU X1, 80(DI) |
|
| 61 |
- MOVOU X3, 96(DI) |
|
| 62 |
- MOVOU X5, 112(DI) |
|
| 63 |
- MOVOU X7, 128(DI) |
|
| 64 |
- MOVOU X9, 144(DI) |
|
| 65 |
- RET |
| 66 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,834 +0,0 @@ |
| 1 |
-// Copyright 2013 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// We have an implementation in amd64 assembly so this code is only run on |
|
| 6 |
-// non-amd64 platforms. The amd64 assembly does not support gccgo. |
|
| 7 |
-// +build !amd64 gccgo appengine |
|
| 8 |
- |
|
| 9 |
-package curve25519 |
|
| 10 |
- |
|
| 11 |
-import ( |
|
| 12 |
- "encoding/binary" |
|
| 13 |
-) |
|
| 14 |
- |
|
| 15 |
-// This code is a port of the public domain, "ref10" implementation of |
|
| 16 |
-// curve25519 from SUPERCOP 20130419 by D. J. Bernstein. |
|
| 17 |
- |
|
| 18 |
-// fieldElement represents an element of the field GF(2^255 - 19). An element |
|
| 19 |
-// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 |
|
| 20 |
-// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on |
|
| 21 |
-// context. |
|
| 22 |
-type fieldElement [10]int32 |
|
| 23 |
- |
|
| 24 |
-func feZero(fe *fieldElement) {
|
|
| 25 |
- for i := range fe {
|
|
| 26 |
- fe[i] = 0 |
|
| 27 |
- } |
|
| 28 |
-} |
|
| 29 |
- |
|
| 30 |
-func feOne(fe *fieldElement) {
|
|
| 31 |
- feZero(fe) |
|
| 32 |
- fe[0] = 1 |
|
| 33 |
-} |
|
| 34 |
- |
|
| 35 |
-func feAdd(dst, a, b *fieldElement) {
|
|
| 36 |
- for i := range dst {
|
|
| 37 |
- dst[i] = a[i] + b[i] |
|
| 38 |
- } |
|
| 39 |
-} |
|
| 40 |
- |
|
| 41 |
-func feSub(dst, a, b *fieldElement) {
|
|
| 42 |
- for i := range dst {
|
|
| 43 |
- dst[i] = a[i] - b[i] |
|
| 44 |
- } |
|
| 45 |
-} |
|
| 46 |
- |
|
| 47 |
-func feCopy(dst, src *fieldElement) {
|
|
| 48 |
- for i := range dst {
|
|
| 49 |
- dst[i] = src[i] |
|
| 50 |
- } |
|
| 51 |
-} |
|
| 52 |
- |
|
| 53 |
-// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0. |
|
| 54 |
-// |
|
| 55 |
-// Preconditions: b in {0,1}.
|
|
| 56 |
-func feCSwap(f, g *fieldElement, b int32) {
|
|
| 57 |
- b = -b |
|
| 58 |
- for i := range f {
|
|
| 59 |
- t := b & (f[i] ^ g[i]) |
|
| 60 |
- f[i] ^= t |
|
| 61 |
- g[i] ^= t |
|
| 62 |
- } |
|
| 63 |
-} |
|
| 64 |
- |
|
| 65 |
-// load3 reads a 24-bit, little-endian value from in. |
|
| 66 |
-func load3(in []byte) int64 {
|
|
| 67 |
- var r int64 |
|
| 68 |
- r = int64(in[0]) |
|
| 69 |
- r |= int64(in[1]) << 8 |
|
| 70 |
- r |= int64(in[2]) << 16 |
|
| 71 |
- return r |
|
| 72 |
-} |
|
| 73 |
- |
|
| 74 |
-// load4 reads a 32-bit, little-endian value from in. |
|
| 75 |
-func load4(in []byte) int64 {
|
|
| 76 |
- return int64(binary.LittleEndian.Uint32(in)) |
|
| 77 |
-} |
|
| 78 |
- |
|
| 79 |
-func feFromBytes(dst *fieldElement, src *[32]byte) {
|
|
| 80 |
- h0 := load4(src[:]) |
|
| 81 |
- h1 := load3(src[4:]) << 6 |
|
| 82 |
- h2 := load3(src[7:]) << 5 |
|
| 83 |
- h3 := load3(src[10:]) << 3 |
|
| 84 |
- h4 := load3(src[13:]) << 2 |
|
| 85 |
- h5 := load4(src[16:]) |
|
| 86 |
- h6 := load3(src[20:]) << 7 |
|
| 87 |
- h7 := load3(src[23:]) << 5 |
|
| 88 |
- h8 := load3(src[26:]) << 4 |
|
| 89 |
- h9 := (load3(src[29:]) & 0x7fffff) << 2 |
|
| 90 |
- |
|
| 91 |
- var carry [10]int64 |
|
| 92 |
- carry[9] = (h9 + 1<<24) >> 25 |
|
| 93 |
- h0 += carry[9] * 19 |
|
| 94 |
- h9 -= carry[9] << 25 |
|
| 95 |
- carry[1] = (h1 + 1<<24) >> 25 |
|
| 96 |
- h2 += carry[1] |
|
| 97 |
- h1 -= carry[1] << 25 |
|
| 98 |
- carry[3] = (h3 + 1<<24) >> 25 |
|
| 99 |
- h4 += carry[3] |
|
| 100 |
- h3 -= carry[3] << 25 |
|
| 101 |
- carry[5] = (h5 + 1<<24) >> 25 |
|
| 102 |
- h6 += carry[5] |
|
| 103 |
- h5 -= carry[5] << 25 |
|
| 104 |
- carry[7] = (h7 + 1<<24) >> 25 |
|
| 105 |
- h8 += carry[7] |
|
| 106 |
- h7 -= carry[7] << 25 |
|
| 107 |
- |
|
| 108 |
- carry[0] = (h0 + 1<<25) >> 26 |
|
| 109 |
- h1 += carry[0] |
|
| 110 |
- h0 -= carry[0] << 26 |
|
| 111 |
- carry[2] = (h2 + 1<<25) >> 26 |
|
| 112 |
- h3 += carry[2] |
|
| 113 |
- h2 -= carry[2] << 26 |
|
| 114 |
- carry[4] = (h4 + 1<<25) >> 26 |
|
| 115 |
- h5 += carry[4] |
|
| 116 |
- h4 -= carry[4] << 26 |
|
| 117 |
- carry[6] = (h6 + 1<<25) >> 26 |
|
| 118 |
- h7 += carry[6] |
|
| 119 |
- h6 -= carry[6] << 26 |
|
| 120 |
- carry[8] = (h8 + 1<<25) >> 26 |
|
| 121 |
- h9 += carry[8] |
|
| 122 |
- h8 -= carry[8] << 26 |
|
| 123 |
- |
|
| 124 |
- dst[0] = int32(h0) |
|
| 125 |
- dst[1] = int32(h1) |
|
| 126 |
- dst[2] = int32(h2) |
|
| 127 |
- dst[3] = int32(h3) |
|
| 128 |
- dst[4] = int32(h4) |
|
| 129 |
- dst[5] = int32(h5) |
|
| 130 |
- dst[6] = int32(h6) |
|
| 131 |
- dst[7] = int32(h7) |
|
| 132 |
- dst[8] = int32(h8) |
|
| 133 |
- dst[9] = int32(h9) |
|
| 134 |
-} |
|
| 135 |
- |
|
| 136 |
-// feToBytes marshals h to s. |
|
| 137 |
-// Preconditions: |
|
| 138 |
-// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
|
| 139 |
-// |
|
| 140 |
-// Write p=2^255-19; q=floor(h/p). |
|
| 141 |
-// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). |
|
| 142 |
-// |
|
| 143 |
-// Proof: |
|
| 144 |
-// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. |
|
| 145 |
-// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. |
|
| 146 |
-// |
|
| 147 |
-// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). |
|
| 148 |
-// Then 0<y<1. |
|
| 149 |
-// |
|
| 150 |
-// Write r=h-pq. |
|
| 151 |
-// Have 0<=r<=p-1=2^255-20. |
|
| 152 |
-// Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. |
|
| 153 |
-// |
|
| 154 |
-// Write x=r+19(2^-255)r+y. |
|
| 155 |
-// Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. |
|
| 156 |
-// |
|
| 157 |
-// Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) |
|
| 158 |
-// so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. |
|
| 159 |
-func feToBytes(s *[32]byte, h *fieldElement) {
|
|
| 160 |
- var carry [10]int32 |
|
| 161 |
- |
|
| 162 |
- q := (19*h[9] + (1 << 24)) >> 25 |
|
| 163 |
- q = (h[0] + q) >> 26 |
|
| 164 |
- q = (h[1] + q) >> 25 |
|
| 165 |
- q = (h[2] + q) >> 26 |
|
| 166 |
- q = (h[3] + q) >> 25 |
|
| 167 |
- q = (h[4] + q) >> 26 |
|
| 168 |
- q = (h[5] + q) >> 25 |
|
| 169 |
- q = (h[6] + q) >> 26 |
|
| 170 |
- q = (h[7] + q) >> 25 |
|
| 171 |
- q = (h[8] + q) >> 26 |
|
| 172 |
- q = (h[9] + q) >> 25 |
|
| 173 |
- |
|
| 174 |
- // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. |
|
| 175 |
- h[0] += 19 * q |
|
| 176 |
- // Goal: Output h-2^255 q, which is between 0 and 2^255-20. |
|
| 177 |
- |
|
| 178 |
- carry[0] = h[0] >> 26 |
|
| 179 |
- h[1] += carry[0] |
|
| 180 |
- h[0] -= carry[0] << 26 |
|
| 181 |
- carry[1] = h[1] >> 25 |
|
| 182 |
- h[2] += carry[1] |
|
| 183 |
- h[1] -= carry[1] << 25 |
|
| 184 |
- carry[2] = h[2] >> 26 |
|
| 185 |
- h[3] += carry[2] |
|
| 186 |
- h[2] -= carry[2] << 26 |
|
| 187 |
- carry[3] = h[3] >> 25 |
|
| 188 |
- h[4] += carry[3] |
|
| 189 |
- h[3] -= carry[3] << 25 |
|
| 190 |
- carry[4] = h[4] >> 26 |
|
| 191 |
- h[5] += carry[4] |
|
| 192 |
- h[4] -= carry[4] << 26 |
|
| 193 |
- carry[5] = h[5] >> 25 |
|
| 194 |
- h[6] += carry[5] |
|
| 195 |
- h[5] -= carry[5] << 25 |
|
| 196 |
- carry[6] = h[6] >> 26 |
|
| 197 |
- h[7] += carry[6] |
|
| 198 |
- h[6] -= carry[6] << 26 |
|
| 199 |
- carry[7] = h[7] >> 25 |
|
| 200 |
- h[8] += carry[7] |
|
| 201 |
- h[7] -= carry[7] << 25 |
|
| 202 |
- carry[8] = h[8] >> 26 |
|
| 203 |
- h[9] += carry[8] |
|
| 204 |
- h[8] -= carry[8] << 26 |
|
| 205 |
- carry[9] = h[9] >> 25 |
|
| 206 |
- h[9] -= carry[9] << 25 |
|
| 207 |
- // h10 = carry9 |
|
| 208 |
- |
|
| 209 |
- // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. |
|
| 210 |
- // Have h[0]+...+2^230 h[9] between 0 and 2^255-1; |
|
| 211 |
- // evidently 2^255 h10-2^255 q = 0. |
|
| 212 |
- // Goal: Output h[0]+...+2^230 h[9]. |
|
| 213 |
- |
|
| 214 |
- s[0] = byte(h[0] >> 0) |
|
| 215 |
- s[1] = byte(h[0] >> 8) |
|
| 216 |
- s[2] = byte(h[0] >> 16) |
|
| 217 |
- s[3] = byte((h[0] >> 24) | (h[1] << 2)) |
|
| 218 |
- s[4] = byte(h[1] >> 6) |
|
| 219 |
- s[5] = byte(h[1] >> 14) |
|
| 220 |
- s[6] = byte((h[1] >> 22) | (h[2] << 3)) |
|
| 221 |
- s[7] = byte(h[2] >> 5) |
|
| 222 |
- s[8] = byte(h[2] >> 13) |
|
| 223 |
- s[9] = byte((h[2] >> 21) | (h[3] << 5)) |
|
| 224 |
- s[10] = byte(h[3] >> 3) |
|
| 225 |
- s[11] = byte(h[3] >> 11) |
|
| 226 |
- s[12] = byte((h[3] >> 19) | (h[4] << 6)) |
|
| 227 |
- s[13] = byte(h[4] >> 2) |
|
| 228 |
- s[14] = byte(h[4] >> 10) |
|
| 229 |
- s[15] = byte(h[4] >> 18) |
|
| 230 |
- s[16] = byte(h[5] >> 0) |
|
| 231 |
- s[17] = byte(h[5] >> 8) |
|
| 232 |
- s[18] = byte(h[5] >> 16) |
|
| 233 |
- s[19] = byte((h[5] >> 24) | (h[6] << 1)) |
|
| 234 |
- s[20] = byte(h[6] >> 7) |
|
| 235 |
- s[21] = byte(h[6] >> 15) |
|
| 236 |
- s[22] = byte((h[6] >> 23) | (h[7] << 3)) |
|
| 237 |
- s[23] = byte(h[7] >> 5) |
|
| 238 |
- s[24] = byte(h[7] >> 13) |
|
| 239 |
- s[25] = byte((h[7] >> 21) | (h[8] << 4)) |
|
| 240 |
- s[26] = byte(h[8] >> 4) |
|
| 241 |
- s[27] = byte(h[8] >> 12) |
|
| 242 |
- s[28] = byte((h[8] >> 20) | (h[9] << 6)) |
|
| 243 |
- s[29] = byte(h[9] >> 2) |
|
| 244 |
- s[30] = byte(h[9] >> 10) |
|
| 245 |
- s[31] = byte(h[9] >> 18) |
|
| 246 |
-} |
|
| 247 |
- |
|
| 248 |
-// feMul calculates h = f * g |
|
| 249 |
-// Can overlap h with f or g. |
|
| 250 |
-// |
|
| 251 |
-// Preconditions: |
|
| 252 |
-// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
|
| 253 |
-// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
|
| 254 |
-// |
|
| 255 |
-// Postconditions: |
|
| 256 |
-// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
|
| 257 |
-// |
|
| 258 |
-// Notes on implementation strategy: |
|
| 259 |
-// |
|
| 260 |
-// Using schoolbook multiplication. |
|
| 261 |
-// Karatsuba would save a little in some cost models. |
|
| 262 |
-// |
|
| 263 |
-// Most multiplications by 2 and 19 are 32-bit precomputations; |
|
| 264 |
-// cheaper than 64-bit postcomputations. |
|
| 265 |
-// |
|
| 266 |
-// There is one remaining multiplication by 19 in the carry chain; |
|
| 267 |
-// one *19 precomputation can be merged into this, |
|
| 268 |
-// but the resulting data flow is considerably less clean. |
|
| 269 |
-// |
|
| 270 |
-// There are 12 carries below. |
|
| 271 |
-// 10 of them are 2-way parallelizable and vectorizable. |
|
| 272 |
-// Can get away with 11 carries, but then data flow is much deeper. |
|
| 273 |
-// |
|
| 274 |
-// With tighter constraints on inputs can squeeze carries into int32. |
|
| 275 |
-func feMul(h, f, g *fieldElement) {
|
|
| 276 |
- f0 := f[0] |
|
| 277 |
- f1 := f[1] |
|
| 278 |
- f2 := f[2] |
|
| 279 |
- f3 := f[3] |
|
| 280 |
- f4 := f[4] |
|
| 281 |
- f5 := f[5] |
|
| 282 |
- f6 := f[6] |
|
| 283 |
- f7 := f[7] |
|
| 284 |
- f8 := f[8] |
|
| 285 |
- f9 := f[9] |
|
| 286 |
- g0 := g[0] |
|
| 287 |
- g1 := g[1] |
|
| 288 |
- g2 := g[2] |
|
| 289 |
- g3 := g[3] |
|
| 290 |
- g4 := g[4] |
|
| 291 |
- g5 := g[5] |
|
| 292 |
- g6 := g[6] |
|
| 293 |
- g7 := g[7] |
|
| 294 |
- g8 := g[8] |
|
| 295 |
- g9 := g[9] |
|
| 296 |
- g1_19 := 19 * g1 // 1.4*2^29 |
|
| 297 |
- g2_19 := 19 * g2 // 1.4*2^30; still ok |
|
| 298 |
- g3_19 := 19 * g3 |
|
| 299 |
- g4_19 := 19 * g4 |
|
| 300 |
- g5_19 := 19 * g5 |
|
| 301 |
- g6_19 := 19 * g6 |
|
| 302 |
- g7_19 := 19 * g7 |
|
| 303 |
- g8_19 := 19 * g8 |
|
| 304 |
- g9_19 := 19 * g9 |
|
| 305 |
- f1_2 := 2 * f1 |
|
| 306 |
- f3_2 := 2 * f3 |
|
| 307 |
- f5_2 := 2 * f5 |
|
| 308 |
- f7_2 := 2 * f7 |
|
| 309 |
- f9_2 := 2 * f9 |
|
| 310 |
- f0g0 := int64(f0) * int64(g0) |
|
| 311 |
- f0g1 := int64(f0) * int64(g1) |
|
| 312 |
- f0g2 := int64(f0) * int64(g2) |
|
| 313 |
- f0g3 := int64(f0) * int64(g3) |
|
| 314 |
- f0g4 := int64(f0) * int64(g4) |
|
| 315 |
- f0g5 := int64(f0) * int64(g5) |
|
| 316 |
- f0g6 := int64(f0) * int64(g6) |
|
| 317 |
- f0g7 := int64(f0) * int64(g7) |
|
| 318 |
- f0g8 := int64(f0) * int64(g8) |
|
| 319 |
- f0g9 := int64(f0) * int64(g9) |
|
| 320 |
- f1g0 := int64(f1) * int64(g0) |
|
| 321 |
- f1g1_2 := int64(f1_2) * int64(g1) |
|
| 322 |
- f1g2 := int64(f1) * int64(g2) |
|
| 323 |
- f1g3_2 := int64(f1_2) * int64(g3) |
|
| 324 |
- f1g4 := int64(f1) * int64(g4) |
|
| 325 |
- f1g5_2 := int64(f1_2) * int64(g5) |
|
| 326 |
- f1g6 := int64(f1) * int64(g6) |
|
| 327 |
- f1g7_2 := int64(f1_2) * int64(g7) |
|
| 328 |
- f1g8 := int64(f1) * int64(g8) |
|
| 329 |
- f1g9_38 := int64(f1_2) * int64(g9_19) |
|
| 330 |
- f2g0 := int64(f2) * int64(g0) |
|
| 331 |
- f2g1 := int64(f2) * int64(g1) |
|
| 332 |
- f2g2 := int64(f2) * int64(g2) |
|
| 333 |
- f2g3 := int64(f2) * int64(g3) |
|
| 334 |
- f2g4 := int64(f2) * int64(g4) |
|
| 335 |
- f2g5 := int64(f2) * int64(g5) |
|
| 336 |
- f2g6 := int64(f2) * int64(g6) |
|
| 337 |
- f2g7 := int64(f2) * int64(g7) |
|
| 338 |
- f2g8_19 := int64(f2) * int64(g8_19) |
|
| 339 |
- f2g9_19 := int64(f2) * int64(g9_19) |
|
| 340 |
- f3g0 := int64(f3) * int64(g0) |
|
| 341 |
- f3g1_2 := int64(f3_2) * int64(g1) |
|
| 342 |
- f3g2 := int64(f3) * int64(g2) |
|
| 343 |
- f3g3_2 := int64(f3_2) * int64(g3) |
|
| 344 |
- f3g4 := int64(f3) * int64(g4) |
|
| 345 |
- f3g5_2 := int64(f3_2) * int64(g5) |
|
| 346 |
- f3g6 := int64(f3) * int64(g6) |
|
| 347 |
- f3g7_38 := int64(f3_2) * int64(g7_19) |
|
| 348 |
- f3g8_19 := int64(f3) * int64(g8_19) |
|
| 349 |
- f3g9_38 := int64(f3_2) * int64(g9_19) |
|
| 350 |
- f4g0 := int64(f4) * int64(g0) |
|
| 351 |
- f4g1 := int64(f4) * int64(g1) |
|
| 352 |
- f4g2 := int64(f4) * int64(g2) |
|
| 353 |
- f4g3 := int64(f4) * int64(g3) |
|
| 354 |
- f4g4 := int64(f4) * int64(g4) |
|
| 355 |
- f4g5 := int64(f4) * int64(g5) |
|
| 356 |
- f4g6_19 := int64(f4) * int64(g6_19) |
|
| 357 |
- f4g7_19 := int64(f4) * int64(g7_19) |
|
| 358 |
- f4g8_19 := int64(f4) * int64(g8_19) |
|
| 359 |
- f4g9_19 := int64(f4) * int64(g9_19) |
|
| 360 |
- f5g0 := int64(f5) * int64(g0) |
|
| 361 |
- f5g1_2 := int64(f5_2) * int64(g1) |
|
| 362 |
- f5g2 := int64(f5) * int64(g2) |
|
| 363 |
- f5g3_2 := int64(f5_2) * int64(g3) |
|
| 364 |
- f5g4 := int64(f5) * int64(g4) |
|
| 365 |
- f5g5_38 := int64(f5_2) * int64(g5_19) |
|
| 366 |
- f5g6_19 := int64(f5) * int64(g6_19) |
|
| 367 |
- f5g7_38 := int64(f5_2) * int64(g7_19) |
|
| 368 |
- f5g8_19 := int64(f5) * int64(g8_19) |
|
| 369 |
- f5g9_38 := int64(f5_2) * int64(g9_19) |
|
| 370 |
- f6g0 := int64(f6) * int64(g0) |
|
| 371 |
- f6g1 := int64(f6) * int64(g1) |
|
| 372 |
- f6g2 := int64(f6) * int64(g2) |
|
| 373 |
- f6g3 := int64(f6) * int64(g3) |
|
| 374 |
- f6g4_19 := int64(f6) * int64(g4_19) |
|
| 375 |
- f6g5_19 := int64(f6) * int64(g5_19) |
|
| 376 |
- f6g6_19 := int64(f6) * int64(g6_19) |
|
| 377 |
- f6g7_19 := int64(f6) * int64(g7_19) |
|
| 378 |
- f6g8_19 := int64(f6) * int64(g8_19) |
|
| 379 |
- f6g9_19 := int64(f6) * int64(g9_19) |
|
| 380 |
- f7g0 := int64(f7) * int64(g0) |
|
| 381 |
- f7g1_2 := int64(f7_2) * int64(g1) |
|
| 382 |
- f7g2 := int64(f7) * int64(g2) |
|
| 383 |
- f7g3_38 := int64(f7_2) * int64(g3_19) |
|
| 384 |
- f7g4_19 := int64(f7) * int64(g4_19) |
|
| 385 |
- f7g5_38 := int64(f7_2) * int64(g5_19) |
|
| 386 |
- f7g6_19 := int64(f7) * int64(g6_19) |
|
| 387 |
- f7g7_38 := int64(f7_2) * int64(g7_19) |
|
| 388 |
- f7g8_19 := int64(f7) * int64(g8_19) |
|
| 389 |
- f7g9_38 := int64(f7_2) * int64(g9_19) |
|
| 390 |
- f8g0 := int64(f8) * int64(g0) |
|
| 391 |
- f8g1 := int64(f8) * int64(g1) |
|
| 392 |
- f8g2_19 := int64(f8) * int64(g2_19) |
|
| 393 |
- f8g3_19 := int64(f8) * int64(g3_19) |
|
| 394 |
- f8g4_19 := int64(f8) * int64(g4_19) |
|
| 395 |
- f8g5_19 := int64(f8) * int64(g5_19) |
|
| 396 |
- f8g6_19 := int64(f8) * int64(g6_19) |
|
| 397 |
- f8g7_19 := int64(f8) * int64(g7_19) |
|
| 398 |
- f8g8_19 := int64(f8) * int64(g8_19) |
|
| 399 |
- f8g9_19 := int64(f8) * int64(g9_19) |
|
| 400 |
- f9g0 := int64(f9) * int64(g0) |
|
| 401 |
- f9g1_38 := int64(f9_2) * int64(g1_19) |
|
| 402 |
- f9g2_19 := int64(f9) * int64(g2_19) |
|
| 403 |
- f9g3_38 := int64(f9_2) * int64(g3_19) |
|
| 404 |
- f9g4_19 := int64(f9) * int64(g4_19) |
|
| 405 |
- f9g5_38 := int64(f9_2) * int64(g5_19) |
|
| 406 |
- f9g6_19 := int64(f9) * int64(g6_19) |
|
| 407 |
- f9g7_38 := int64(f9_2) * int64(g7_19) |
|
| 408 |
- f9g8_19 := int64(f9) * int64(g8_19) |
|
| 409 |
- f9g9_38 := int64(f9_2) * int64(g9_19) |
|
| 410 |
- h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38 |
|
| 411 |
- h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19 |
|
| 412 |
- h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38 |
|
| 413 |
- h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19 |
|
| 414 |
- h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38 |
|
| 415 |
- h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19 |
|
| 416 |
- h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38 |
|
| 417 |
- h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19 |
|
| 418 |
- h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38 |
|
| 419 |
- h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 |
|
| 420 |
- var carry [10]int64 |
|
| 421 |
- |
|
| 422 |
- // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) |
|
| 423 |
- // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 |
|
| 424 |
- // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) |
|
| 425 |
- // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 |
|
| 426 |
- |
|
| 427 |
- carry[0] = (h0 + (1 << 25)) >> 26 |
|
| 428 |
- h1 += carry[0] |
|
| 429 |
- h0 -= carry[0] << 26 |
|
| 430 |
- carry[4] = (h4 + (1 << 25)) >> 26 |
|
| 431 |
- h5 += carry[4] |
|
| 432 |
- h4 -= carry[4] << 26 |
|
| 433 |
- // |h0| <= 2^25 |
|
| 434 |
- // |h4| <= 2^25 |
|
| 435 |
- // |h1| <= 1.51*2^58 |
|
| 436 |
- // |h5| <= 1.51*2^58 |
|
| 437 |
- |
|
| 438 |
- carry[1] = (h1 + (1 << 24)) >> 25 |
|
| 439 |
- h2 += carry[1] |
|
| 440 |
- h1 -= carry[1] << 25 |
|
| 441 |
- carry[5] = (h5 + (1 << 24)) >> 25 |
|
| 442 |
- h6 += carry[5] |
|
| 443 |
- h5 -= carry[5] << 25 |
|
| 444 |
- // |h1| <= 2^24; from now on fits into int32 |
|
| 445 |
- // |h5| <= 2^24; from now on fits into int32 |
|
| 446 |
- // |h2| <= 1.21*2^59 |
|
| 447 |
- // |h6| <= 1.21*2^59 |
|
| 448 |
- |
|
| 449 |
- carry[2] = (h2 + (1 << 25)) >> 26 |
|
| 450 |
- h3 += carry[2] |
|
| 451 |
- h2 -= carry[2] << 26 |
|
| 452 |
- carry[6] = (h6 + (1 << 25)) >> 26 |
|
| 453 |
- h7 += carry[6] |
|
| 454 |
- h6 -= carry[6] << 26 |
|
| 455 |
- // |h2| <= 2^25; from now on fits into int32 unchanged |
|
| 456 |
- // |h6| <= 2^25; from now on fits into int32 unchanged |
|
| 457 |
- // |h3| <= 1.51*2^58 |
|
| 458 |
- // |h7| <= 1.51*2^58 |
|
| 459 |
- |
|
| 460 |
- carry[3] = (h3 + (1 << 24)) >> 25 |
|
| 461 |
- h4 += carry[3] |
|
| 462 |
- h3 -= carry[3] << 25 |
|
| 463 |
- carry[7] = (h7 + (1 << 24)) >> 25 |
|
| 464 |
- h8 += carry[7] |
|
| 465 |
- h7 -= carry[7] << 25 |
|
| 466 |
- // |h3| <= 2^24; from now on fits into int32 unchanged |
|
| 467 |
- // |h7| <= 2^24; from now on fits into int32 unchanged |
|
| 468 |
- // |h4| <= 1.52*2^33 |
|
| 469 |
- // |h8| <= 1.52*2^33 |
|
| 470 |
- |
|
| 471 |
- carry[4] = (h4 + (1 << 25)) >> 26 |
|
| 472 |
- h5 += carry[4] |
|
| 473 |
- h4 -= carry[4] << 26 |
|
| 474 |
- carry[8] = (h8 + (1 << 25)) >> 26 |
|
| 475 |
- h9 += carry[8] |
|
| 476 |
- h8 -= carry[8] << 26 |
|
| 477 |
- // |h4| <= 2^25; from now on fits into int32 unchanged |
|
| 478 |
- // |h8| <= 2^25; from now on fits into int32 unchanged |
|
| 479 |
- // |h5| <= 1.01*2^24 |
|
| 480 |
- // |h9| <= 1.51*2^58 |
|
| 481 |
- |
|
| 482 |
- carry[9] = (h9 + (1 << 24)) >> 25 |
|
| 483 |
- h0 += carry[9] * 19 |
|
| 484 |
- h9 -= carry[9] << 25 |
|
| 485 |
- // |h9| <= 2^24; from now on fits into int32 unchanged |
|
| 486 |
- // |h0| <= 1.8*2^37 |
|
| 487 |
- |
|
| 488 |
- carry[0] = (h0 + (1 << 25)) >> 26 |
|
| 489 |
- h1 += carry[0] |
|
| 490 |
- h0 -= carry[0] << 26 |
|
| 491 |
- // |h0| <= 2^25; from now on fits into int32 unchanged |
|
| 492 |
- // |h1| <= 1.01*2^24 |
|
| 493 |
- |
|
| 494 |
- h[0] = int32(h0) |
|
| 495 |
- h[1] = int32(h1) |
|
| 496 |
- h[2] = int32(h2) |
|
| 497 |
- h[3] = int32(h3) |
|
| 498 |
- h[4] = int32(h4) |
|
| 499 |
- h[5] = int32(h5) |
|
| 500 |
- h[6] = int32(h6) |
|
| 501 |
- h[7] = int32(h7) |
|
| 502 |
- h[8] = int32(h8) |
|
| 503 |
- h[9] = int32(h9) |
|
| 504 |
-} |
|
| 505 |
- |
|
| 506 |
-// feSquare calculates h = f*f. Can overlap h with f. |
|
| 507 |
-// |
|
| 508 |
-// Preconditions: |
|
| 509 |
-// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
|
| 510 |
-// |
|
| 511 |
-// Postconditions: |
|
| 512 |
-// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
|
| 513 |
-func feSquare(h, f *fieldElement) {
|
|
| 514 |
- f0 := f[0] |
|
| 515 |
- f1 := f[1] |
|
| 516 |
- f2 := f[2] |
|
| 517 |
- f3 := f[3] |
|
| 518 |
- f4 := f[4] |
|
| 519 |
- f5 := f[5] |
|
| 520 |
- f6 := f[6] |
|
| 521 |
- f7 := f[7] |
|
| 522 |
- f8 := f[8] |
|
| 523 |
- f9 := f[9] |
|
| 524 |
- f0_2 := 2 * f0 |
|
| 525 |
- f1_2 := 2 * f1 |
|
| 526 |
- f2_2 := 2 * f2 |
|
| 527 |
- f3_2 := 2 * f3 |
|
| 528 |
- f4_2 := 2 * f4 |
|
| 529 |
- f5_2 := 2 * f5 |
|
| 530 |
- f6_2 := 2 * f6 |
|
| 531 |
- f7_2 := 2 * f7 |
|
| 532 |
- f5_38 := 38 * f5 // 1.31*2^30 |
|
| 533 |
- f6_19 := 19 * f6 // 1.31*2^30 |
|
| 534 |
- f7_38 := 38 * f7 // 1.31*2^30 |
|
| 535 |
- f8_19 := 19 * f8 // 1.31*2^30 |
|
| 536 |
- f9_38 := 38 * f9 // 1.31*2^30 |
|
| 537 |
- f0f0 := int64(f0) * int64(f0) |
|
| 538 |
- f0f1_2 := int64(f0_2) * int64(f1) |
|
| 539 |
- f0f2_2 := int64(f0_2) * int64(f2) |
|
| 540 |
- f0f3_2 := int64(f0_2) * int64(f3) |
|
| 541 |
- f0f4_2 := int64(f0_2) * int64(f4) |
|
| 542 |
- f0f5_2 := int64(f0_2) * int64(f5) |
|
| 543 |
- f0f6_2 := int64(f0_2) * int64(f6) |
|
| 544 |
- f0f7_2 := int64(f0_2) * int64(f7) |
|
| 545 |
- f0f8_2 := int64(f0_2) * int64(f8) |
|
| 546 |
- f0f9_2 := int64(f0_2) * int64(f9) |
|
| 547 |
- f1f1_2 := int64(f1_2) * int64(f1) |
|
| 548 |
- f1f2_2 := int64(f1_2) * int64(f2) |
|
| 549 |
- f1f3_4 := int64(f1_2) * int64(f3_2) |
|
| 550 |
- f1f4_2 := int64(f1_2) * int64(f4) |
|
| 551 |
- f1f5_4 := int64(f1_2) * int64(f5_2) |
|
| 552 |
- f1f6_2 := int64(f1_2) * int64(f6) |
|
| 553 |
- f1f7_4 := int64(f1_2) * int64(f7_2) |
|
| 554 |
- f1f8_2 := int64(f1_2) * int64(f8) |
|
| 555 |
- f1f9_76 := int64(f1_2) * int64(f9_38) |
|
| 556 |
- f2f2 := int64(f2) * int64(f2) |
|
| 557 |
- f2f3_2 := int64(f2_2) * int64(f3) |
|
| 558 |
- f2f4_2 := int64(f2_2) * int64(f4) |
|
| 559 |
- f2f5_2 := int64(f2_2) * int64(f5) |
|
| 560 |
- f2f6_2 := int64(f2_2) * int64(f6) |
|
| 561 |
- f2f7_2 := int64(f2_2) * int64(f7) |
|
| 562 |
- f2f8_38 := int64(f2_2) * int64(f8_19) |
|
| 563 |
- f2f9_38 := int64(f2) * int64(f9_38) |
|
| 564 |
- f3f3_2 := int64(f3_2) * int64(f3) |
|
| 565 |
- f3f4_2 := int64(f3_2) * int64(f4) |
|
| 566 |
- f3f5_4 := int64(f3_2) * int64(f5_2) |
|
| 567 |
- f3f6_2 := int64(f3_2) * int64(f6) |
|
| 568 |
- f3f7_76 := int64(f3_2) * int64(f7_38) |
|
| 569 |
- f3f8_38 := int64(f3_2) * int64(f8_19) |
|
| 570 |
- f3f9_76 := int64(f3_2) * int64(f9_38) |
|
| 571 |
- f4f4 := int64(f4) * int64(f4) |
|
| 572 |
- f4f5_2 := int64(f4_2) * int64(f5) |
|
| 573 |
- f4f6_38 := int64(f4_2) * int64(f6_19) |
|
| 574 |
- f4f7_38 := int64(f4) * int64(f7_38) |
|
| 575 |
- f4f8_38 := int64(f4_2) * int64(f8_19) |
|
| 576 |
- f4f9_38 := int64(f4) * int64(f9_38) |
|
| 577 |
- f5f5_38 := int64(f5) * int64(f5_38) |
|
| 578 |
- f5f6_38 := int64(f5_2) * int64(f6_19) |
|
| 579 |
- f5f7_76 := int64(f5_2) * int64(f7_38) |
|
| 580 |
- f5f8_38 := int64(f5_2) * int64(f8_19) |
|
| 581 |
- f5f9_76 := int64(f5_2) * int64(f9_38) |
|
| 582 |
- f6f6_19 := int64(f6) * int64(f6_19) |
|
| 583 |
- f6f7_38 := int64(f6) * int64(f7_38) |
|
| 584 |
- f6f8_38 := int64(f6_2) * int64(f8_19) |
|
| 585 |
- f6f9_38 := int64(f6) * int64(f9_38) |
|
| 586 |
- f7f7_38 := int64(f7) * int64(f7_38) |
|
| 587 |
- f7f8_38 := int64(f7_2) * int64(f8_19) |
|
| 588 |
- f7f9_76 := int64(f7_2) * int64(f9_38) |
|
| 589 |
- f8f8_19 := int64(f8) * int64(f8_19) |
|
| 590 |
- f8f9_38 := int64(f8) * int64(f9_38) |
|
| 591 |
- f9f9_38 := int64(f9) * int64(f9_38) |
|
| 592 |
- h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38 |
|
| 593 |
- h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38 |
|
| 594 |
- h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19 |
|
| 595 |
- h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38 |
|
| 596 |
- h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38 |
|
| 597 |
- h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38 |
|
| 598 |
- h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19 |
|
| 599 |
- h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38 |
|
| 600 |
- h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38 |
|
| 601 |
- h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2 |
|
| 602 |
- var carry [10]int64 |
|
| 603 |
- |
|
| 604 |
- carry[0] = (h0 + (1 << 25)) >> 26 |
|
| 605 |
- h1 += carry[0] |
|
| 606 |
- h0 -= carry[0] << 26 |
|
| 607 |
- carry[4] = (h4 + (1 << 25)) >> 26 |
|
| 608 |
- h5 += carry[4] |
|
| 609 |
- h4 -= carry[4] << 26 |
|
| 610 |
- |
|
| 611 |
- carry[1] = (h1 + (1 << 24)) >> 25 |
|
| 612 |
- h2 += carry[1] |
|
| 613 |
- h1 -= carry[1] << 25 |
|
| 614 |
- carry[5] = (h5 + (1 << 24)) >> 25 |
|
| 615 |
- h6 += carry[5] |
|
| 616 |
- h5 -= carry[5] << 25 |
|
| 617 |
- |
|
| 618 |
- carry[2] = (h2 + (1 << 25)) >> 26 |
|
| 619 |
- h3 += carry[2] |
|
| 620 |
- h2 -= carry[2] << 26 |
|
| 621 |
- carry[6] = (h6 + (1 << 25)) >> 26 |
|
| 622 |
- h7 += carry[6] |
|
| 623 |
- h6 -= carry[6] << 26 |
|
| 624 |
- |
|
| 625 |
- carry[3] = (h3 + (1 << 24)) >> 25 |
|
| 626 |
- h4 += carry[3] |
|
| 627 |
- h3 -= carry[3] << 25 |
|
| 628 |
- carry[7] = (h7 + (1 << 24)) >> 25 |
|
| 629 |
- h8 += carry[7] |
|
| 630 |
- h7 -= carry[7] << 25 |
|
| 631 |
- |
|
| 632 |
- carry[4] = (h4 + (1 << 25)) >> 26 |
|
| 633 |
- h5 += carry[4] |
|
| 634 |
- h4 -= carry[4] << 26 |
|
| 635 |
- carry[8] = (h8 + (1 << 25)) >> 26 |
|
| 636 |
- h9 += carry[8] |
|
| 637 |
- h8 -= carry[8] << 26 |
|
| 638 |
- |
|
| 639 |
- carry[9] = (h9 + (1 << 24)) >> 25 |
|
| 640 |
- h0 += carry[9] * 19 |
|
| 641 |
- h9 -= carry[9] << 25 |
|
| 642 |
- |
|
| 643 |
- carry[0] = (h0 + (1 << 25)) >> 26 |
|
| 644 |
- h1 += carry[0] |
|
| 645 |
- h0 -= carry[0] << 26 |
|
| 646 |
- |
|
| 647 |
- h[0] = int32(h0) |
|
| 648 |
- h[1] = int32(h1) |
|
| 649 |
- h[2] = int32(h2) |
|
| 650 |
- h[3] = int32(h3) |
|
| 651 |
- h[4] = int32(h4) |
|
| 652 |
- h[5] = int32(h5) |
|
| 653 |
- h[6] = int32(h6) |
|
| 654 |
- h[7] = int32(h7) |
|
| 655 |
- h[8] = int32(h8) |
|
| 656 |
- h[9] = int32(h9) |
|
| 657 |
-} |
|
| 658 |
- |
|
| 659 |
-// feMul121666 calculates h = f * 121666. Can overlap h with f. |
|
| 660 |
-// |
|
| 661 |
-// Preconditions: |
|
| 662 |
-// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. |
|
| 663 |
-// |
|
| 664 |
-// Postconditions: |
|
| 665 |
-// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. |
|
| 666 |
-func feMul121666(h, f *fieldElement) {
|
|
| 667 |
- h0 := int64(f[0]) * 121666 |
|
| 668 |
- h1 := int64(f[1]) * 121666 |
|
| 669 |
- h2 := int64(f[2]) * 121666 |
|
| 670 |
- h3 := int64(f[3]) * 121666 |
|
| 671 |
- h4 := int64(f[4]) * 121666 |
|
| 672 |
- h5 := int64(f[5]) * 121666 |
|
| 673 |
- h6 := int64(f[6]) * 121666 |
|
| 674 |
- h7 := int64(f[7]) * 121666 |
|
| 675 |
- h8 := int64(f[8]) * 121666 |
|
| 676 |
- h9 := int64(f[9]) * 121666 |
|
| 677 |
- var carry [10]int64 |
|
| 678 |
- |
|
| 679 |
- carry[9] = (h9 + (1 << 24)) >> 25 |
|
| 680 |
- h0 += carry[9] * 19 |
|
| 681 |
- h9 -= carry[9] << 25 |
|
| 682 |
- carry[1] = (h1 + (1 << 24)) >> 25 |
|
| 683 |
- h2 += carry[1] |
|
| 684 |
- h1 -= carry[1] << 25 |
|
| 685 |
- carry[3] = (h3 + (1 << 24)) >> 25 |
|
| 686 |
- h4 += carry[3] |
|
| 687 |
- h3 -= carry[3] << 25 |
|
| 688 |
- carry[5] = (h5 + (1 << 24)) >> 25 |
|
| 689 |
- h6 += carry[5] |
|
| 690 |
- h5 -= carry[5] << 25 |
|
| 691 |
- carry[7] = (h7 + (1 << 24)) >> 25 |
|
| 692 |
- h8 += carry[7] |
|
| 693 |
- h7 -= carry[7] << 25 |
|
| 694 |
- |
|
| 695 |
- carry[0] = (h0 + (1 << 25)) >> 26 |
|
| 696 |
- h1 += carry[0] |
|
| 697 |
- h0 -= carry[0] << 26 |
|
| 698 |
- carry[2] = (h2 + (1 << 25)) >> 26 |
|
| 699 |
- h3 += carry[2] |
|
| 700 |
- h2 -= carry[2] << 26 |
|
| 701 |
- carry[4] = (h4 + (1 << 25)) >> 26 |
|
| 702 |
- h5 += carry[4] |
|
| 703 |
- h4 -= carry[4] << 26 |
|
| 704 |
- carry[6] = (h6 + (1 << 25)) >> 26 |
|
| 705 |
- h7 += carry[6] |
|
| 706 |
- h6 -= carry[6] << 26 |
|
| 707 |
- carry[8] = (h8 + (1 << 25)) >> 26 |
|
| 708 |
- h9 += carry[8] |
|
| 709 |
- h8 -= carry[8] << 26 |
|
| 710 |
- |
|
| 711 |
- h[0] = int32(h0) |
|
| 712 |
- h[1] = int32(h1) |
|
| 713 |
- h[2] = int32(h2) |
|
| 714 |
- h[3] = int32(h3) |
|
| 715 |
- h[4] = int32(h4) |
|
| 716 |
- h[5] = int32(h5) |
|
| 717 |
- h[6] = int32(h6) |
|
| 718 |
- h[7] = int32(h7) |
|
| 719 |
- h[8] = int32(h8) |
|
| 720 |
- h[9] = int32(h9) |
|
| 721 |
-} |
|
| 722 |
- |
|
| 723 |
-// feInvert sets out = z^-1. |
|
| 724 |
-func feInvert(out, z *fieldElement) {
|
|
| 725 |
- var t0, t1, t2, t3 fieldElement |
|
| 726 |
- var i int |
|
| 727 |
- |
|
| 728 |
- feSquare(&t0, z) |
|
| 729 |
- for i = 1; i < 1; i++ {
|
|
| 730 |
- feSquare(&t0, &t0) |
|
| 731 |
- } |
|
| 732 |
- feSquare(&t1, &t0) |
|
| 733 |
- for i = 1; i < 2; i++ {
|
|
| 734 |
- feSquare(&t1, &t1) |
|
| 735 |
- } |
|
| 736 |
- feMul(&t1, z, &t1) |
|
| 737 |
- feMul(&t0, &t0, &t1) |
|
| 738 |
- feSquare(&t2, &t0) |
|
| 739 |
- for i = 1; i < 1; i++ {
|
|
| 740 |
- feSquare(&t2, &t2) |
|
| 741 |
- } |
|
| 742 |
- feMul(&t1, &t1, &t2) |
|
| 743 |
- feSquare(&t2, &t1) |
|
| 744 |
- for i = 1; i < 5; i++ {
|
|
| 745 |
- feSquare(&t2, &t2) |
|
| 746 |
- } |
|
| 747 |
- feMul(&t1, &t2, &t1) |
|
| 748 |
- feSquare(&t2, &t1) |
|
| 749 |
- for i = 1; i < 10; i++ {
|
|
| 750 |
- feSquare(&t2, &t2) |
|
| 751 |
- } |
|
| 752 |
- feMul(&t2, &t2, &t1) |
|
| 753 |
- feSquare(&t3, &t2) |
|
| 754 |
- for i = 1; i < 20; i++ {
|
|
| 755 |
- feSquare(&t3, &t3) |
|
| 756 |
- } |
|
| 757 |
- feMul(&t2, &t3, &t2) |
|
| 758 |
- feSquare(&t2, &t2) |
|
| 759 |
- for i = 1; i < 10; i++ {
|
|
| 760 |
- feSquare(&t2, &t2) |
|
| 761 |
- } |
|
| 762 |
- feMul(&t1, &t2, &t1) |
|
| 763 |
- feSquare(&t2, &t1) |
|
| 764 |
- for i = 1; i < 50; i++ {
|
|
| 765 |
- feSquare(&t2, &t2) |
|
| 766 |
- } |
|
| 767 |
- feMul(&t2, &t2, &t1) |
|
| 768 |
- feSquare(&t3, &t2) |
|
| 769 |
- for i = 1; i < 100; i++ {
|
|
| 770 |
- feSquare(&t3, &t3) |
|
| 771 |
- } |
|
| 772 |
- feMul(&t2, &t3, &t2) |
|
| 773 |
- feSquare(&t2, &t2) |
|
| 774 |
- for i = 1; i < 50; i++ {
|
|
| 775 |
- feSquare(&t2, &t2) |
|
| 776 |
- } |
|
| 777 |
- feMul(&t1, &t2, &t1) |
|
| 778 |
- feSquare(&t1, &t1) |
|
| 779 |
- for i = 1; i < 5; i++ {
|
|
| 780 |
- feSquare(&t1, &t1) |
|
| 781 |
- } |
|
| 782 |
- feMul(out, &t1, &t0) |
|
| 783 |
-} |
|
| 784 |
- |
|
| 785 |
-func scalarMult(out, in, base *[32]byte) {
|
|
| 786 |
- var e [32]byte |
|
| 787 |
- |
|
| 788 |
- copy(e[:], in[:]) |
|
| 789 |
- e[0] &= 248 |
|
| 790 |
- e[31] &= 127 |
|
| 791 |
- e[31] |= 64 |
|
| 792 |
- |
|
| 793 |
- var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement |
|
| 794 |
- feFromBytes(&x1, base) |
|
| 795 |
- feOne(&x2) |
|
| 796 |
- feCopy(&x3, &x1) |
|
| 797 |
- feOne(&z3) |
|
| 798 |
- |
|
| 799 |
- swap := int32(0) |
|
| 800 |
- for pos := 254; pos >= 0; pos-- {
|
|
| 801 |
- b := e[pos/8] >> uint(pos&7) |
|
| 802 |
- b &= 1 |
|
| 803 |
- swap ^= int32(b) |
|
| 804 |
- feCSwap(&x2, &x3, swap) |
|
| 805 |
- feCSwap(&z2, &z3, swap) |
|
| 806 |
- swap = int32(b) |
|
| 807 |
- |
|
| 808 |
- feSub(&tmp0, &x3, &z3) |
|
| 809 |
- feSub(&tmp1, &x2, &z2) |
|
| 810 |
- feAdd(&x2, &x2, &z2) |
|
| 811 |
- feAdd(&z2, &x3, &z3) |
|
| 812 |
- feMul(&z3, &tmp0, &x2) |
|
| 813 |
- feMul(&z2, &z2, &tmp1) |
|
| 814 |
- feSquare(&tmp0, &tmp1) |
|
| 815 |
- feSquare(&tmp1, &x2) |
|
| 816 |
- feAdd(&x3, &z3, &z2) |
|
| 817 |
- feSub(&z2, &z3, &z2) |
|
| 818 |
- feMul(&x2, &tmp1, &tmp0) |
|
| 819 |
- feSub(&tmp1, &tmp1, &tmp0) |
|
| 820 |
- feSquare(&z2, &z2) |
|
| 821 |
- feMul121666(&z3, &tmp1) |
|
| 822 |
- feSquare(&x3, &x3) |
|
| 823 |
- feAdd(&tmp0, &tmp0, &z3) |
|
| 824 |
- feMul(&z3, &x1, &z2) |
|
| 825 |
- feMul(&z2, &tmp1, &tmp0) |
|
| 826 |
- } |
|
| 827 |
- |
|
| 828 |
- feCSwap(&x2, &x3, swap) |
|
| 829 |
- feCSwap(&z2, &z3, swap) |
|
| 830 |
- |
|
| 831 |
- feInvert(&z2, &z2) |
|
| 832 |
- feMul(&x2, &x2, &z2) |
|
| 833 |
- feToBytes(out, &x2) |
|
| 834 |
-} |
| 835 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,23 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// Package curve25519 provides an implementation of scalar multiplication on |
|
| 6 |
-// the elliptic curve known as curve25519. See https://cr.yp.to/ecdh.html |
|
| 7 |
-package curve25519 // import "golang.org/x/crypto/curve25519" |
|
| 8 |
- |
|
| 9 |
-// basePoint is the x coordinate of the generator of the curve. |
|
| 10 |
-var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
|
| 11 |
- |
|
| 12 |
-// ScalarMult sets dst to the product in*base where dst and base are the x |
|
| 13 |
-// coordinates of group points and all values are in little-endian form. |
|
| 14 |
-func ScalarMult(dst, in, base *[32]byte) {
|
|
| 15 |
- scalarMult(dst, in, base) |
|
| 16 |
-} |
|
| 17 |
- |
|
| 18 |
-// ScalarBaseMult sets dst to the product in*base where dst and base are the x |
|
| 19 |
-// coordinates of group points, base is the standard generator and all values |
|
| 20 |
-// are in little-endian form. |
|
| 21 |
-func ScalarBaseMult(dst, in *[32]byte) {
|
|
| 22 |
- ScalarMult(dst, in, &basePoint) |
|
| 23 |
-} |
| 24 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,73 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// This code was translated into a form compatible with 6a from the public |
|
| 6 |
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html |
|
| 7 |
- |
|
| 8 |
-// +build amd64,!gccgo,!appengine |
|
| 9 |
- |
|
| 10 |
-#include "const_amd64.h" |
|
| 11 |
- |
|
| 12 |
-// func freeze(inout *[5]uint64) |
|
| 13 |
-TEXT ·freeze(SB),7,$0-8 |
|
| 14 |
- MOVQ inout+0(FP), DI |
|
| 15 |
- |
|
| 16 |
- MOVQ 0(DI),SI |
|
| 17 |
- MOVQ 8(DI),DX |
|
| 18 |
- MOVQ 16(DI),CX |
|
| 19 |
- MOVQ 24(DI),R8 |
|
| 20 |
- MOVQ 32(DI),R9 |
|
| 21 |
- MOVQ $REDMASK51,AX |
|
| 22 |
- MOVQ AX,R10 |
|
| 23 |
- SUBQ $18,R10 |
|
| 24 |
- MOVQ $3,R11 |
|
| 25 |
-REDUCELOOP: |
|
| 26 |
- MOVQ SI,R12 |
|
| 27 |
- SHRQ $51,R12 |
|
| 28 |
- ANDQ AX,SI |
|
| 29 |
- ADDQ R12,DX |
|
| 30 |
- MOVQ DX,R12 |
|
| 31 |
- SHRQ $51,R12 |
|
| 32 |
- ANDQ AX,DX |
|
| 33 |
- ADDQ R12,CX |
|
| 34 |
- MOVQ CX,R12 |
|
| 35 |
- SHRQ $51,R12 |
|
| 36 |
- ANDQ AX,CX |
|
| 37 |
- ADDQ R12,R8 |
|
| 38 |
- MOVQ R8,R12 |
|
| 39 |
- SHRQ $51,R12 |
|
| 40 |
- ANDQ AX,R8 |
|
| 41 |
- ADDQ R12,R9 |
|
| 42 |
- MOVQ R9,R12 |
|
| 43 |
- SHRQ $51,R12 |
|
| 44 |
- ANDQ AX,R9 |
|
| 45 |
- IMUL3Q $19,R12,R12 |
|
| 46 |
- ADDQ R12,SI |
|
| 47 |
- SUBQ $1,R11 |
|
| 48 |
- JA REDUCELOOP |
|
| 49 |
- MOVQ $1,R12 |
|
| 50 |
- CMPQ R10,SI |
|
| 51 |
- CMOVQLT R11,R12 |
|
| 52 |
- CMPQ AX,DX |
|
| 53 |
- CMOVQNE R11,R12 |
|
| 54 |
- CMPQ AX,CX |
|
| 55 |
- CMOVQNE R11,R12 |
|
| 56 |
- CMPQ AX,R8 |
|
| 57 |
- CMOVQNE R11,R12 |
|
| 58 |
- CMPQ AX,R9 |
|
| 59 |
- CMOVQNE R11,R12 |
|
| 60 |
- NEGQ R12 |
|
| 61 |
- ANDQ R12,AX |
|
| 62 |
- ANDQ R12,R10 |
|
| 63 |
- SUBQ R10,SI |
|
| 64 |
- SUBQ AX,DX |
|
| 65 |
- SUBQ AX,CX |
|
| 66 |
- SUBQ AX,R8 |
|
| 67 |
- SUBQ AX,R9 |
|
| 68 |
- MOVQ SI,0(DI) |
|
| 69 |
- MOVQ DX,8(DI) |
|
| 70 |
- MOVQ CX,16(DI) |
|
| 71 |
- MOVQ R8,24(DI) |
|
| 72 |
- MOVQ R9,32(DI) |
|
| 73 |
- RET |
| 74 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,1377 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// This code was translated into a form compatible with 6a from the public |
|
| 6 |
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html |
|
| 7 |
- |
|
| 8 |
-// +build amd64,!gccgo,!appengine |
|
| 9 |
- |
|
| 10 |
-#include "const_amd64.h" |
|
| 11 |
- |
|
| 12 |
-// func ladderstep(inout *[5][5]uint64) |
|
| 13 |
-TEXT ·ladderstep(SB),0,$296-8 |
|
| 14 |
- MOVQ inout+0(FP),DI |
|
| 15 |
- |
|
| 16 |
- MOVQ 40(DI),SI |
|
| 17 |
- MOVQ 48(DI),DX |
|
| 18 |
- MOVQ 56(DI),CX |
|
| 19 |
- MOVQ 64(DI),R8 |
|
| 20 |
- MOVQ 72(DI),R9 |
|
| 21 |
- MOVQ SI,AX |
|
| 22 |
- MOVQ DX,R10 |
|
| 23 |
- MOVQ CX,R11 |
|
| 24 |
- MOVQ R8,R12 |
|
| 25 |
- MOVQ R9,R13 |
|
| 26 |
- ADDQ ·_2P0(SB),AX |
|
| 27 |
- ADDQ ·_2P1234(SB),R10 |
|
| 28 |
- ADDQ ·_2P1234(SB),R11 |
|
| 29 |
- ADDQ ·_2P1234(SB),R12 |
|
| 30 |
- ADDQ ·_2P1234(SB),R13 |
|
| 31 |
- ADDQ 80(DI),SI |
|
| 32 |
- ADDQ 88(DI),DX |
|
| 33 |
- ADDQ 96(DI),CX |
|
| 34 |
- ADDQ 104(DI),R8 |
|
| 35 |
- ADDQ 112(DI),R9 |
|
| 36 |
- SUBQ 80(DI),AX |
|
| 37 |
- SUBQ 88(DI),R10 |
|
| 38 |
- SUBQ 96(DI),R11 |
|
| 39 |
- SUBQ 104(DI),R12 |
|
| 40 |
- SUBQ 112(DI),R13 |
|
| 41 |
- MOVQ SI,0(SP) |
|
| 42 |
- MOVQ DX,8(SP) |
|
| 43 |
- MOVQ CX,16(SP) |
|
| 44 |
- MOVQ R8,24(SP) |
|
| 45 |
- MOVQ R9,32(SP) |
|
| 46 |
- MOVQ AX,40(SP) |
|
| 47 |
- MOVQ R10,48(SP) |
|
| 48 |
- MOVQ R11,56(SP) |
|
| 49 |
- MOVQ R12,64(SP) |
|
| 50 |
- MOVQ R13,72(SP) |
|
| 51 |
- MOVQ 40(SP),AX |
|
| 52 |
- MULQ 40(SP) |
|
| 53 |
- MOVQ AX,SI |
|
| 54 |
- MOVQ DX,CX |
|
| 55 |
- MOVQ 40(SP),AX |
|
| 56 |
- SHLQ $1,AX |
|
| 57 |
- MULQ 48(SP) |
|
| 58 |
- MOVQ AX,R8 |
|
| 59 |
- MOVQ DX,R9 |
|
| 60 |
- MOVQ 40(SP),AX |
|
| 61 |
- SHLQ $1,AX |
|
| 62 |
- MULQ 56(SP) |
|
| 63 |
- MOVQ AX,R10 |
|
| 64 |
- MOVQ DX,R11 |
|
| 65 |
- MOVQ 40(SP),AX |
|
| 66 |
- SHLQ $1,AX |
|
| 67 |
- MULQ 64(SP) |
|
| 68 |
- MOVQ AX,R12 |
|
| 69 |
- MOVQ DX,R13 |
|
| 70 |
- MOVQ 40(SP),AX |
|
| 71 |
- SHLQ $1,AX |
|
| 72 |
- MULQ 72(SP) |
|
| 73 |
- MOVQ AX,R14 |
|
| 74 |
- MOVQ DX,R15 |
|
| 75 |
- MOVQ 48(SP),AX |
|
| 76 |
- MULQ 48(SP) |
|
| 77 |
- ADDQ AX,R10 |
|
| 78 |
- ADCQ DX,R11 |
|
| 79 |
- MOVQ 48(SP),AX |
|
| 80 |
- SHLQ $1,AX |
|
| 81 |
- MULQ 56(SP) |
|
| 82 |
- ADDQ AX,R12 |
|
| 83 |
- ADCQ DX,R13 |
|
| 84 |
- MOVQ 48(SP),AX |
|
| 85 |
- SHLQ $1,AX |
|
| 86 |
- MULQ 64(SP) |
|
| 87 |
- ADDQ AX,R14 |
|
| 88 |
- ADCQ DX,R15 |
|
| 89 |
- MOVQ 48(SP),DX |
|
| 90 |
- IMUL3Q $38,DX,AX |
|
| 91 |
- MULQ 72(SP) |
|
| 92 |
- ADDQ AX,SI |
|
| 93 |
- ADCQ DX,CX |
|
| 94 |
- MOVQ 56(SP),AX |
|
| 95 |
- MULQ 56(SP) |
|
| 96 |
- ADDQ AX,R14 |
|
| 97 |
- ADCQ DX,R15 |
|
| 98 |
- MOVQ 56(SP),DX |
|
| 99 |
- IMUL3Q $38,DX,AX |
|
| 100 |
- MULQ 64(SP) |
|
| 101 |
- ADDQ AX,SI |
|
| 102 |
- ADCQ DX,CX |
|
| 103 |
- MOVQ 56(SP),DX |
|
| 104 |
- IMUL3Q $38,DX,AX |
|
| 105 |
- MULQ 72(SP) |
|
| 106 |
- ADDQ AX,R8 |
|
| 107 |
- ADCQ DX,R9 |
|
| 108 |
- MOVQ 64(SP),DX |
|
| 109 |
- IMUL3Q $19,DX,AX |
|
| 110 |
- MULQ 64(SP) |
|
| 111 |
- ADDQ AX,R8 |
|
| 112 |
- ADCQ DX,R9 |
|
| 113 |
- MOVQ 64(SP),DX |
|
| 114 |
- IMUL3Q $38,DX,AX |
|
| 115 |
- MULQ 72(SP) |
|
| 116 |
- ADDQ AX,R10 |
|
| 117 |
- ADCQ DX,R11 |
|
| 118 |
- MOVQ 72(SP),DX |
|
| 119 |
- IMUL3Q $19,DX,AX |
|
| 120 |
- MULQ 72(SP) |
|
| 121 |
- ADDQ AX,R12 |
|
| 122 |
- ADCQ DX,R13 |
|
| 123 |
- MOVQ $REDMASK51,DX |
|
| 124 |
- SHLQ $13,CX:SI |
|
| 125 |
- ANDQ DX,SI |
|
| 126 |
- SHLQ $13,R9:R8 |
|
| 127 |
- ANDQ DX,R8 |
|
| 128 |
- ADDQ CX,R8 |
|
| 129 |
- SHLQ $13,R11:R10 |
|
| 130 |
- ANDQ DX,R10 |
|
| 131 |
- ADDQ R9,R10 |
|
| 132 |
- SHLQ $13,R13:R12 |
|
| 133 |
- ANDQ DX,R12 |
|
| 134 |
- ADDQ R11,R12 |
|
| 135 |
- SHLQ $13,R15:R14 |
|
| 136 |
- ANDQ DX,R14 |
|
| 137 |
- ADDQ R13,R14 |
|
| 138 |
- IMUL3Q $19,R15,CX |
|
| 139 |
- ADDQ CX,SI |
|
| 140 |
- MOVQ SI,CX |
|
| 141 |
- SHRQ $51,CX |
|
| 142 |
- ADDQ R8,CX |
|
| 143 |
- ANDQ DX,SI |
|
| 144 |
- MOVQ CX,R8 |
|
| 145 |
- SHRQ $51,CX |
|
| 146 |
- ADDQ R10,CX |
|
| 147 |
- ANDQ DX,R8 |
|
| 148 |
- MOVQ CX,R9 |
|
| 149 |
- SHRQ $51,CX |
|
| 150 |
- ADDQ R12,CX |
|
| 151 |
- ANDQ DX,R9 |
|
| 152 |
- MOVQ CX,AX |
|
| 153 |
- SHRQ $51,CX |
|
| 154 |
- ADDQ R14,CX |
|
| 155 |
- ANDQ DX,AX |
|
| 156 |
- MOVQ CX,R10 |
|
| 157 |
- SHRQ $51,CX |
|
| 158 |
- IMUL3Q $19,CX,CX |
|
| 159 |
- ADDQ CX,SI |
|
| 160 |
- ANDQ DX,R10 |
|
| 161 |
- MOVQ SI,80(SP) |
|
| 162 |
- MOVQ R8,88(SP) |
|
| 163 |
- MOVQ R9,96(SP) |
|
| 164 |
- MOVQ AX,104(SP) |
|
| 165 |
- MOVQ R10,112(SP) |
|
| 166 |
- MOVQ 0(SP),AX |
|
| 167 |
- MULQ 0(SP) |
|
| 168 |
- MOVQ AX,SI |
|
| 169 |
- MOVQ DX,CX |
|
| 170 |
- MOVQ 0(SP),AX |
|
| 171 |
- SHLQ $1,AX |
|
| 172 |
- MULQ 8(SP) |
|
| 173 |
- MOVQ AX,R8 |
|
| 174 |
- MOVQ DX,R9 |
|
| 175 |
- MOVQ 0(SP),AX |
|
| 176 |
- SHLQ $1,AX |
|
| 177 |
- MULQ 16(SP) |
|
| 178 |
- MOVQ AX,R10 |
|
| 179 |
- MOVQ DX,R11 |
|
| 180 |
- MOVQ 0(SP),AX |
|
| 181 |
- SHLQ $1,AX |
|
| 182 |
- MULQ 24(SP) |
|
| 183 |
- MOVQ AX,R12 |
|
| 184 |
- MOVQ DX,R13 |
|
| 185 |
- MOVQ 0(SP),AX |
|
| 186 |
- SHLQ $1,AX |
|
| 187 |
- MULQ 32(SP) |
|
| 188 |
- MOVQ AX,R14 |
|
| 189 |
- MOVQ DX,R15 |
|
| 190 |
- MOVQ 8(SP),AX |
|
| 191 |
- MULQ 8(SP) |
|
| 192 |
- ADDQ AX,R10 |
|
| 193 |
- ADCQ DX,R11 |
|
| 194 |
- MOVQ 8(SP),AX |
|
| 195 |
- SHLQ $1,AX |
|
| 196 |
- MULQ 16(SP) |
|
| 197 |
- ADDQ AX,R12 |
|
| 198 |
- ADCQ DX,R13 |
|
| 199 |
- MOVQ 8(SP),AX |
|
| 200 |
- SHLQ $1,AX |
|
| 201 |
- MULQ 24(SP) |
|
| 202 |
- ADDQ AX,R14 |
|
| 203 |
- ADCQ DX,R15 |
|
| 204 |
- MOVQ 8(SP),DX |
|
| 205 |
- IMUL3Q $38,DX,AX |
|
| 206 |
- MULQ 32(SP) |
|
| 207 |
- ADDQ AX,SI |
|
| 208 |
- ADCQ DX,CX |
|
| 209 |
- MOVQ 16(SP),AX |
|
| 210 |
- MULQ 16(SP) |
|
| 211 |
- ADDQ AX,R14 |
|
| 212 |
- ADCQ DX,R15 |
|
| 213 |
- MOVQ 16(SP),DX |
|
| 214 |
- IMUL3Q $38,DX,AX |
|
| 215 |
- MULQ 24(SP) |
|
| 216 |
- ADDQ AX,SI |
|
| 217 |
- ADCQ DX,CX |
|
| 218 |
- MOVQ 16(SP),DX |
|
| 219 |
- IMUL3Q $38,DX,AX |
|
| 220 |
- MULQ 32(SP) |
|
| 221 |
- ADDQ AX,R8 |
|
| 222 |
- ADCQ DX,R9 |
|
| 223 |
- MOVQ 24(SP),DX |
|
| 224 |
- IMUL3Q $19,DX,AX |
|
| 225 |
- MULQ 24(SP) |
|
| 226 |
- ADDQ AX,R8 |
|
| 227 |
- ADCQ DX,R9 |
|
| 228 |
- MOVQ 24(SP),DX |
|
| 229 |
- IMUL3Q $38,DX,AX |
|
| 230 |
- MULQ 32(SP) |
|
| 231 |
- ADDQ AX,R10 |
|
| 232 |
- ADCQ DX,R11 |
|
| 233 |
- MOVQ 32(SP),DX |
|
| 234 |
- IMUL3Q $19,DX,AX |
|
| 235 |
- MULQ 32(SP) |
|
| 236 |
- ADDQ AX,R12 |
|
| 237 |
- ADCQ DX,R13 |
|
| 238 |
- MOVQ $REDMASK51,DX |
|
| 239 |
- SHLQ $13,CX:SI |
|
| 240 |
- ANDQ DX,SI |
|
| 241 |
- SHLQ $13,R9:R8 |
|
| 242 |
- ANDQ DX,R8 |
|
| 243 |
- ADDQ CX,R8 |
|
| 244 |
- SHLQ $13,R11:R10 |
|
| 245 |
- ANDQ DX,R10 |
|
| 246 |
- ADDQ R9,R10 |
|
| 247 |
- SHLQ $13,R13:R12 |
|
| 248 |
- ANDQ DX,R12 |
|
| 249 |
- ADDQ R11,R12 |
|
| 250 |
- SHLQ $13,R15:R14 |
|
| 251 |
- ANDQ DX,R14 |
|
| 252 |
- ADDQ R13,R14 |
|
| 253 |
- IMUL3Q $19,R15,CX |
|
| 254 |
- ADDQ CX,SI |
|
| 255 |
- MOVQ SI,CX |
|
| 256 |
- SHRQ $51,CX |
|
| 257 |
- ADDQ R8,CX |
|
| 258 |
- ANDQ DX,SI |
|
| 259 |
- MOVQ CX,R8 |
|
| 260 |
- SHRQ $51,CX |
|
| 261 |
- ADDQ R10,CX |
|
| 262 |
- ANDQ DX,R8 |
|
| 263 |
- MOVQ CX,R9 |
|
| 264 |
- SHRQ $51,CX |
|
| 265 |
- ADDQ R12,CX |
|
| 266 |
- ANDQ DX,R9 |
|
| 267 |
- MOVQ CX,AX |
|
| 268 |
- SHRQ $51,CX |
|
| 269 |
- ADDQ R14,CX |
|
| 270 |
- ANDQ DX,AX |
|
| 271 |
- MOVQ CX,R10 |
|
| 272 |
- SHRQ $51,CX |
|
| 273 |
- IMUL3Q $19,CX,CX |
|
| 274 |
- ADDQ CX,SI |
|
| 275 |
- ANDQ DX,R10 |
|
| 276 |
- MOVQ SI,120(SP) |
|
| 277 |
- MOVQ R8,128(SP) |
|
| 278 |
- MOVQ R9,136(SP) |
|
| 279 |
- MOVQ AX,144(SP) |
|
| 280 |
- MOVQ R10,152(SP) |
|
| 281 |
- MOVQ SI,SI |
|
| 282 |
- MOVQ R8,DX |
|
| 283 |
- MOVQ R9,CX |
|
| 284 |
- MOVQ AX,R8 |
|
| 285 |
- MOVQ R10,R9 |
|
| 286 |
- ADDQ ·_2P0(SB),SI |
|
| 287 |
- ADDQ ·_2P1234(SB),DX |
|
| 288 |
- ADDQ ·_2P1234(SB),CX |
|
| 289 |
- ADDQ ·_2P1234(SB),R8 |
|
| 290 |
- ADDQ ·_2P1234(SB),R9 |
|
| 291 |
- SUBQ 80(SP),SI |
|
| 292 |
- SUBQ 88(SP),DX |
|
| 293 |
- SUBQ 96(SP),CX |
|
| 294 |
- SUBQ 104(SP),R8 |
|
| 295 |
- SUBQ 112(SP),R9 |
|
| 296 |
- MOVQ SI,160(SP) |
|
| 297 |
- MOVQ DX,168(SP) |
|
| 298 |
- MOVQ CX,176(SP) |
|
| 299 |
- MOVQ R8,184(SP) |
|
| 300 |
- MOVQ R9,192(SP) |
|
| 301 |
- MOVQ 120(DI),SI |
|
| 302 |
- MOVQ 128(DI),DX |
|
| 303 |
- MOVQ 136(DI),CX |
|
| 304 |
- MOVQ 144(DI),R8 |
|
| 305 |
- MOVQ 152(DI),R9 |
|
| 306 |
- MOVQ SI,AX |
|
| 307 |
- MOVQ DX,R10 |
|
| 308 |
- MOVQ CX,R11 |
|
| 309 |
- MOVQ R8,R12 |
|
| 310 |
- MOVQ R9,R13 |
|
| 311 |
- ADDQ ·_2P0(SB),AX |
|
| 312 |
- ADDQ ·_2P1234(SB),R10 |
|
| 313 |
- ADDQ ·_2P1234(SB),R11 |
|
| 314 |
- ADDQ ·_2P1234(SB),R12 |
|
| 315 |
- ADDQ ·_2P1234(SB),R13 |
|
| 316 |
- ADDQ 160(DI),SI |
|
| 317 |
- ADDQ 168(DI),DX |
|
| 318 |
- ADDQ 176(DI),CX |
|
| 319 |
- ADDQ 184(DI),R8 |
|
| 320 |
- ADDQ 192(DI),R9 |
|
| 321 |
- SUBQ 160(DI),AX |
|
| 322 |
- SUBQ 168(DI),R10 |
|
| 323 |
- SUBQ 176(DI),R11 |
|
| 324 |
- SUBQ 184(DI),R12 |
|
| 325 |
- SUBQ 192(DI),R13 |
|
| 326 |
- MOVQ SI,200(SP) |
|
| 327 |
- MOVQ DX,208(SP) |
|
| 328 |
- MOVQ CX,216(SP) |
|
| 329 |
- MOVQ R8,224(SP) |
|
| 330 |
- MOVQ R9,232(SP) |
|
| 331 |
- MOVQ AX,240(SP) |
|
| 332 |
- MOVQ R10,248(SP) |
|
| 333 |
- MOVQ R11,256(SP) |
|
| 334 |
- MOVQ R12,264(SP) |
|
| 335 |
- MOVQ R13,272(SP) |
|
| 336 |
- MOVQ 224(SP),SI |
|
| 337 |
- IMUL3Q $19,SI,AX |
|
| 338 |
- MOVQ AX,280(SP) |
|
| 339 |
- MULQ 56(SP) |
|
| 340 |
- MOVQ AX,SI |
|
| 341 |
- MOVQ DX,CX |
|
| 342 |
- MOVQ 232(SP),DX |
|
| 343 |
- IMUL3Q $19,DX,AX |
|
| 344 |
- MOVQ AX,288(SP) |
|
| 345 |
- MULQ 48(SP) |
|
| 346 |
- ADDQ AX,SI |
|
| 347 |
- ADCQ DX,CX |
|
| 348 |
- MOVQ 200(SP),AX |
|
| 349 |
- MULQ 40(SP) |
|
| 350 |
- ADDQ AX,SI |
|
| 351 |
- ADCQ DX,CX |
|
| 352 |
- MOVQ 200(SP),AX |
|
| 353 |
- MULQ 48(SP) |
|
| 354 |
- MOVQ AX,R8 |
|
| 355 |
- MOVQ DX,R9 |
|
| 356 |
- MOVQ 200(SP),AX |
|
| 357 |
- MULQ 56(SP) |
|
| 358 |
- MOVQ AX,R10 |
|
| 359 |
- MOVQ DX,R11 |
|
| 360 |
- MOVQ 200(SP),AX |
|
| 361 |
- MULQ 64(SP) |
|
| 362 |
- MOVQ AX,R12 |
|
| 363 |
- MOVQ DX,R13 |
|
| 364 |
- MOVQ 200(SP),AX |
|
| 365 |
- MULQ 72(SP) |
|
| 366 |
- MOVQ AX,R14 |
|
| 367 |
- MOVQ DX,R15 |
|
| 368 |
- MOVQ 208(SP),AX |
|
| 369 |
- MULQ 40(SP) |
|
| 370 |
- ADDQ AX,R8 |
|
| 371 |
- ADCQ DX,R9 |
|
| 372 |
- MOVQ 208(SP),AX |
|
| 373 |
- MULQ 48(SP) |
|
| 374 |
- ADDQ AX,R10 |
|
| 375 |
- ADCQ DX,R11 |
|
| 376 |
- MOVQ 208(SP),AX |
|
| 377 |
- MULQ 56(SP) |
|
| 378 |
- ADDQ AX,R12 |
|
| 379 |
- ADCQ DX,R13 |
|
| 380 |
- MOVQ 208(SP),AX |
|
| 381 |
- MULQ 64(SP) |
|
| 382 |
- ADDQ AX,R14 |
|
| 383 |
- ADCQ DX,R15 |
|
| 384 |
- MOVQ 208(SP),DX |
|
| 385 |
- IMUL3Q $19,DX,AX |
|
| 386 |
- MULQ 72(SP) |
|
| 387 |
- ADDQ AX,SI |
|
| 388 |
- ADCQ DX,CX |
|
| 389 |
- MOVQ 216(SP),AX |
|
| 390 |
- MULQ 40(SP) |
|
| 391 |
- ADDQ AX,R10 |
|
| 392 |
- ADCQ DX,R11 |
|
| 393 |
- MOVQ 216(SP),AX |
|
| 394 |
- MULQ 48(SP) |
|
| 395 |
- ADDQ AX,R12 |
|
| 396 |
- ADCQ DX,R13 |
|
| 397 |
- MOVQ 216(SP),AX |
|
| 398 |
- MULQ 56(SP) |
|
| 399 |
- ADDQ AX,R14 |
|
| 400 |
- ADCQ DX,R15 |
|
| 401 |
- MOVQ 216(SP),DX |
|
| 402 |
- IMUL3Q $19,DX,AX |
|
| 403 |
- MULQ 64(SP) |
|
| 404 |
- ADDQ AX,SI |
|
| 405 |
- ADCQ DX,CX |
|
| 406 |
- MOVQ 216(SP),DX |
|
| 407 |
- IMUL3Q $19,DX,AX |
|
| 408 |
- MULQ 72(SP) |
|
| 409 |
- ADDQ AX,R8 |
|
| 410 |
- ADCQ DX,R9 |
|
| 411 |
- MOVQ 224(SP),AX |
|
| 412 |
- MULQ 40(SP) |
|
| 413 |
- ADDQ AX,R12 |
|
| 414 |
- ADCQ DX,R13 |
|
| 415 |
- MOVQ 224(SP),AX |
|
| 416 |
- MULQ 48(SP) |
|
| 417 |
- ADDQ AX,R14 |
|
| 418 |
- ADCQ DX,R15 |
|
| 419 |
- MOVQ 280(SP),AX |
|
| 420 |
- MULQ 64(SP) |
|
| 421 |
- ADDQ AX,R8 |
|
| 422 |
- ADCQ DX,R9 |
|
| 423 |
- MOVQ 280(SP),AX |
|
| 424 |
- MULQ 72(SP) |
|
| 425 |
- ADDQ AX,R10 |
|
| 426 |
- ADCQ DX,R11 |
|
| 427 |
- MOVQ 232(SP),AX |
|
| 428 |
- MULQ 40(SP) |
|
| 429 |
- ADDQ AX,R14 |
|
| 430 |
- ADCQ DX,R15 |
|
| 431 |
- MOVQ 288(SP),AX |
|
| 432 |
- MULQ 56(SP) |
|
| 433 |
- ADDQ AX,R8 |
|
| 434 |
- ADCQ DX,R9 |
|
| 435 |
- MOVQ 288(SP),AX |
|
| 436 |
- MULQ 64(SP) |
|
| 437 |
- ADDQ AX,R10 |
|
| 438 |
- ADCQ DX,R11 |
|
| 439 |
- MOVQ 288(SP),AX |
|
| 440 |
- MULQ 72(SP) |
|
| 441 |
- ADDQ AX,R12 |
|
| 442 |
- ADCQ DX,R13 |
|
| 443 |
- MOVQ $REDMASK51,DX |
|
| 444 |
- SHLQ $13,CX:SI |
|
| 445 |
- ANDQ DX,SI |
|
| 446 |
- SHLQ $13,R9:R8 |
|
| 447 |
- ANDQ DX,R8 |
|
| 448 |
- ADDQ CX,R8 |
|
| 449 |
- SHLQ $13,R11:R10 |
|
| 450 |
- ANDQ DX,R10 |
|
| 451 |
- ADDQ R9,R10 |
|
| 452 |
- SHLQ $13,R13:R12 |
|
| 453 |
- ANDQ DX,R12 |
|
| 454 |
- ADDQ R11,R12 |
|
| 455 |
- SHLQ $13,R15:R14 |
|
| 456 |
- ANDQ DX,R14 |
|
| 457 |
- ADDQ R13,R14 |
|
| 458 |
- IMUL3Q $19,R15,CX |
|
| 459 |
- ADDQ CX,SI |
|
| 460 |
- MOVQ SI,CX |
|
| 461 |
- SHRQ $51,CX |
|
| 462 |
- ADDQ R8,CX |
|
| 463 |
- MOVQ CX,R8 |
|
| 464 |
- SHRQ $51,CX |
|
| 465 |
- ANDQ DX,SI |
|
| 466 |
- ADDQ R10,CX |
|
| 467 |
- MOVQ CX,R9 |
|
| 468 |
- SHRQ $51,CX |
|
| 469 |
- ANDQ DX,R8 |
|
| 470 |
- ADDQ R12,CX |
|
| 471 |
- MOVQ CX,AX |
|
| 472 |
- SHRQ $51,CX |
|
| 473 |
- ANDQ DX,R9 |
|
| 474 |
- ADDQ R14,CX |
|
| 475 |
- MOVQ CX,R10 |
|
| 476 |
- SHRQ $51,CX |
|
| 477 |
- ANDQ DX,AX |
|
| 478 |
- IMUL3Q $19,CX,CX |
|
| 479 |
- ADDQ CX,SI |
|
| 480 |
- ANDQ DX,R10 |
|
| 481 |
- MOVQ SI,40(SP) |
|
| 482 |
- MOVQ R8,48(SP) |
|
| 483 |
- MOVQ R9,56(SP) |
|
| 484 |
- MOVQ AX,64(SP) |
|
| 485 |
- MOVQ R10,72(SP) |
|
| 486 |
- MOVQ 264(SP),SI |
|
| 487 |
- IMUL3Q $19,SI,AX |
|
| 488 |
- MOVQ AX,200(SP) |
|
| 489 |
- MULQ 16(SP) |
|
| 490 |
- MOVQ AX,SI |
|
| 491 |
- MOVQ DX,CX |
|
| 492 |
- MOVQ 272(SP),DX |
|
| 493 |
- IMUL3Q $19,DX,AX |
|
| 494 |
- MOVQ AX,208(SP) |
|
| 495 |
- MULQ 8(SP) |
|
| 496 |
- ADDQ AX,SI |
|
| 497 |
- ADCQ DX,CX |
|
| 498 |
- MOVQ 240(SP),AX |
|
| 499 |
- MULQ 0(SP) |
|
| 500 |
- ADDQ AX,SI |
|
| 501 |
- ADCQ DX,CX |
|
| 502 |
- MOVQ 240(SP),AX |
|
| 503 |
- MULQ 8(SP) |
|
| 504 |
- MOVQ AX,R8 |
|
| 505 |
- MOVQ DX,R9 |
|
| 506 |
- MOVQ 240(SP),AX |
|
| 507 |
- MULQ 16(SP) |
|
| 508 |
- MOVQ AX,R10 |
|
| 509 |
- MOVQ DX,R11 |
|
| 510 |
- MOVQ 240(SP),AX |
|
| 511 |
- MULQ 24(SP) |
|
| 512 |
- MOVQ AX,R12 |
|
| 513 |
- MOVQ DX,R13 |
|
| 514 |
- MOVQ 240(SP),AX |
|
| 515 |
- MULQ 32(SP) |
|
| 516 |
- MOVQ AX,R14 |
|
| 517 |
- MOVQ DX,R15 |
|
| 518 |
- MOVQ 248(SP),AX |
|
| 519 |
- MULQ 0(SP) |
|
| 520 |
- ADDQ AX,R8 |
|
| 521 |
- ADCQ DX,R9 |
|
| 522 |
- MOVQ 248(SP),AX |
|
| 523 |
- MULQ 8(SP) |
|
| 524 |
- ADDQ AX,R10 |
|
| 525 |
- ADCQ DX,R11 |
|
| 526 |
- MOVQ 248(SP),AX |
|
| 527 |
- MULQ 16(SP) |
|
| 528 |
- ADDQ AX,R12 |
|
| 529 |
- ADCQ DX,R13 |
|
| 530 |
- MOVQ 248(SP),AX |
|
| 531 |
- MULQ 24(SP) |
|
| 532 |
- ADDQ AX,R14 |
|
| 533 |
- ADCQ DX,R15 |
|
| 534 |
- MOVQ 248(SP),DX |
|
| 535 |
- IMUL3Q $19,DX,AX |
|
| 536 |
- MULQ 32(SP) |
|
| 537 |
- ADDQ AX,SI |
|
| 538 |
- ADCQ DX,CX |
|
| 539 |
- MOVQ 256(SP),AX |
|
| 540 |
- MULQ 0(SP) |
|
| 541 |
- ADDQ AX,R10 |
|
| 542 |
- ADCQ DX,R11 |
|
| 543 |
- MOVQ 256(SP),AX |
|
| 544 |
- MULQ 8(SP) |
|
| 545 |
- ADDQ AX,R12 |
|
| 546 |
- ADCQ DX,R13 |
|
| 547 |
- MOVQ 256(SP),AX |
|
| 548 |
- MULQ 16(SP) |
|
| 549 |
- ADDQ AX,R14 |
|
| 550 |
- ADCQ DX,R15 |
|
| 551 |
- MOVQ 256(SP),DX |
|
| 552 |
- IMUL3Q $19,DX,AX |
|
| 553 |
- MULQ 24(SP) |
|
| 554 |
- ADDQ AX,SI |
|
| 555 |
- ADCQ DX,CX |
|
| 556 |
- MOVQ 256(SP),DX |
|
| 557 |
- IMUL3Q $19,DX,AX |
|
| 558 |
- MULQ 32(SP) |
|
| 559 |
- ADDQ AX,R8 |
|
| 560 |
- ADCQ DX,R9 |
|
| 561 |
- MOVQ 264(SP),AX |
|
| 562 |
- MULQ 0(SP) |
|
| 563 |
- ADDQ AX,R12 |
|
| 564 |
- ADCQ DX,R13 |
|
| 565 |
- MOVQ 264(SP),AX |
|
| 566 |
- MULQ 8(SP) |
|
| 567 |
- ADDQ AX,R14 |
|
| 568 |
- ADCQ DX,R15 |
|
| 569 |
- MOVQ 200(SP),AX |
|
| 570 |
- MULQ 24(SP) |
|
| 571 |
- ADDQ AX,R8 |
|
| 572 |
- ADCQ DX,R9 |
|
| 573 |
- MOVQ 200(SP),AX |
|
| 574 |
- MULQ 32(SP) |
|
| 575 |
- ADDQ AX,R10 |
|
| 576 |
- ADCQ DX,R11 |
|
| 577 |
- MOVQ 272(SP),AX |
|
| 578 |
- MULQ 0(SP) |
|
| 579 |
- ADDQ AX,R14 |
|
| 580 |
- ADCQ DX,R15 |
|
| 581 |
- MOVQ 208(SP),AX |
|
| 582 |
- MULQ 16(SP) |
|
| 583 |
- ADDQ AX,R8 |
|
| 584 |
- ADCQ DX,R9 |
|
| 585 |
- MOVQ 208(SP),AX |
|
| 586 |
- MULQ 24(SP) |
|
| 587 |
- ADDQ AX,R10 |
|
| 588 |
- ADCQ DX,R11 |
|
| 589 |
- MOVQ 208(SP),AX |
|
| 590 |
- MULQ 32(SP) |
|
| 591 |
- ADDQ AX,R12 |
|
| 592 |
- ADCQ DX,R13 |
|
| 593 |
- MOVQ $REDMASK51,DX |
|
| 594 |
- SHLQ $13,CX:SI |
|
| 595 |
- ANDQ DX,SI |
|
| 596 |
- SHLQ $13,R9:R8 |
|
| 597 |
- ANDQ DX,R8 |
|
| 598 |
- ADDQ CX,R8 |
|
| 599 |
- SHLQ $13,R11:R10 |
|
| 600 |
- ANDQ DX,R10 |
|
| 601 |
- ADDQ R9,R10 |
|
| 602 |
- SHLQ $13,R13:R12 |
|
| 603 |
- ANDQ DX,R12 |
|
| 604 |
- ADDQ R11,R12 |
|
| 605 |
- SHLQ $13,R15:R14 |
|
| 606 |
- ANDQ DX,R14 |
|
| 607 |
- ADDQ R13,R14 |
|
| 608 |
- IMUL3Q $19,R15,CX |
|
| 609 |
- ADDQ CX,SI |
|
| 610 |
- MOVQ SI,CX |
|
| 611 |
- SHRQ $51,CX |
|
| 612 |
- ADDQ R8,CX |
|
| 613 |
- MOVQ CX,R8 |
|
| 614 |
- SHRQ $51,CX |
|
| 615 |
- ANDQ DX,SI |
|
| 616 |
- ADDQ R10,CX |
|
| 617 |
- MOVQ CX,R9 |
|
| 618 |
- SHRQ $51,CX |
|
| 619 |
- ANDQ DX,R8 |
|
| 620 |
- ADDQ R12,CX |
|
| 621 |
- MOVQ CX,AX |
|
| 622 |
- SHRQ $51,CX |
|
| 623 |
- ANDQ DX,R9 |
|
| 624 |
- ADDQ R14,CX |
|
| 625 |
- MOVQ CX,R10 |
|
| 626 |
- SHRQ $51,CX |
|
| 627 |
- ANDQ DX,AX |
|
| 628 |
- IMUL3Q $19,CX,CX |
|
| 629 |
- ADDQ CX,SI |
|
| 630 |
- ANDQ DX,R10 |
|
| 631 |
- MOVQ SI,DX |
|
| 632 |
- MOVQ R8,CX |
|
| 633 |
- MOVQ R9,R11 |
|
| 634 |
- MOVQ AX,R12 |
|
| 635 |
- MOVQ R10,R13 |
|
| 636 |
- ADDQ ·_2P0(SB),DX |
|
| 637 |
- ADDQ ·_2P1234(SB),CX |
|
| 638 |
- ADDQ ·_2P1234(SB),R11 |
|
| 639 |
- ADDQ ·_2P1234(SB),R12 |
|
| 640 |
- ADDQ ·_2P1234(SB),R13 |
|
| 641 |
- ADDQ 40(SP),SI |
|
| 642 |
- ADDQ 48(SP),R8 |
|
| 643 |
- ADDQ 56(SP),R9 |
|
| 644 |
- ADDQ 64(SP),AX |
|
| 645 |
- ADDQ 72(SP),R10 |
|
| 646 |
- SUBQ 40(SP),DX |
|
| 647 |
- SUBQ 48(SP),CX |
|
| 648 |
- SUBQ 56(SP),R11 |
|
| 649 |
- SUBQ 64(SP),R12 |
|
| 650 |
- SUBQ 72(SP),R13 |
|
| 651 |
- MOVQ SI,120(DI) |
|
| 652 |
- MOVQ R8,128(DI) |
|
| 653 |
- MOVQ R9,136(DI) |
|
| 654 |
- MOVQ AX,144(DI) |
|
| 655 |
- MOVQ R10,152(DI) |
|
| 656 |
- MOVQ DX,160(DI) |
|
| 657 |
- MOVQ CX,168(DI) |
|
| 658 |
- MOVQ R11,176(DI) |
|
| 659 |
- MOVQ R12,184(DI) |
|
| 660 |
- MOVQ R13,192(DI) |
|
| 661 |
- MOVQ 120(DI),AX |
|
| 662 |
- MULQ 120(DI) |
|
| 663 |
- MOVQ AX,SI |
|
| 664 |
- MOVQ DX,CX |
|
| 665 |
- MOVQ 120(DI),AX |
|
| 666 |
- SHLQ $1,AX |
|
| 667 |
- MULQ 128(DI) |
|
| 668 |
- MOVQ AX,R8 |
|
| 669 |
- MOVQ DX,R9 |
|
| 670 |
- MOVQ 120(DI),AX |
|
| 671 |
- SHLQ $1,AX |
|
| 672 |
- MULQ 136(DI) |
|
| 673 |
- MOVQ AX,R10 |
|
| 674 |
- MOVQ DX,R11 |
|
| 675 |
- MOVQ 120(DI),AX |
|
| 676 |
- SHLQ $1,AX |
|
| 677 |
- MULQ 144(DI) |
|
| 678 |
- MOVQ AX,R12 |
|
| 679 |
- MOVQ DX,R13 |
|
| 680 |
- MOVQ 120(DI),AX |
|
| 681 |
- SHLQ $1,AX |
|
| 682 |
- MULQ 152(DI) |
|
| 683 |
- MOVQ AX,R14 |
|
| 684 |
- MOVQ DX,R15 |
|
| 685 |
- MOVQ 128(DI),AX |
|
| 686 |
- MULQ 128(DI) |
|
| 687 |
- ADDQ AX,R10 |
|
| 688 |
- ADCQ DX,R11 |
|
| 689 |
- MOVQ 128(DI),AX |
|
| 690 |
- SHLQ $1,AX |
|
| 691 |
- MULQ 136(DI) |
|
| 692 |
- ADDQ AX,R12 |
|
| 693 |
- ADCQ DX,R13 |
|
| 694 |
- MOVQ 128(DI),AX |
|
| 695 |
- SHLQ $1,AX |
|
| 696 |
- MULQ 144(DI) |
|
| 697 |
- ADDQ AX,R14 |
|
| 698 |
- ADCQ DX,R15 |
|
| 699 |
- MOVQ 128(DI),DX |
|
| 700 |
- IMUL3Q $38,DX,AX |
|
| 701 |
- MULQ 152(DI) |
|
| 702 |
- ADDQ AX,SI |
|
| 703 |
- ADCQ DX,CX |
|
| 704 |
- MOVQ 136(DI),AX |
|
| 705 |
- MULQ 136(DI) |
|
| 706 |
- ADDQ AX,R14 |
|
| 707 |
- ADCQ DX,R15 |
|
| 708 |
- MOVQ 136(DI),DX |
|
| 709 |
- IMUL3Q $38,DX,AX |
|
| 710 |
- MULQ 144(DI) |
|
| 711 |
- ADDQ AX,SI |
|
| 712 |
- ADCQ DX,CX |
|
| 713 |
- MOVQ 136(DI),DX |
|
| 714 |
- IMUL3Q $38,DX,AX |
|
| 715 |
- MULQ 152(DI) |
|
| 716 |
- ADDQ AX,R8 |
|
| 717 |
- ADCQ DX,R9 |
|
| 718 |
- MOVQ 144(DI),DX |
|
| 719 |
- IMUL3Q $19,DX,AX |
|
| 720 |
- MULQ 144(DI) |
|
| 721 |
- ADDQ AX,R8 |
|
| 722 |
- ADCQ DX,R9 |
|
| 723 |
- MOVQ 144(DI),DX |
|
| 724 |
- IMUL3Q $38,DX,AX |
|
| 725 |
- MULQ 152(DI) |
|
| 726 |
- ADDQ AX,R10 |
|
| 727 |
- ADCQ DX,R11 |
|
| 728 |
- MOVQ 152(DI),DX |
|
| 729 |
- IMUL3Q $19,DX,AX |
|
| 730 |
- MULQ 152(DI) |
|
| 731 |
- ADDQ AX,R12 |
|
| 732 |
- ADCQ DX,R13 |
|
| 733 |
- MOVQ $REDMASK51,DX |
|
| 734 |
- SHLQ $13,CX:SI |
|
| 735 |
- ANDQ DX,SI |
|
| 736 |
- SHLQ $13,R9:R8 |
|
| 737 |
- ANDQ DX,R8 |
|
| 738 |
- ADDQ CX,R8 |
|
| 739 |
- SHLQ $13,R11:R10 |
|
| 740 |
- ANDQ DX,R10 |
|
| 741 |
- ADDQ R9,R10 |
|
| 742 |
- SHLQ $13,R13:R12 |
|
| 743 |
- ANDQ DX,R12 |
|
| 744 |
- ADDQ R11,R12 |
|
| 745 |
- SHLQ $13,R15:R14 |
|
| 746 |
- ANDQ DX,R14 |
|
| 747 |
- ADDQ R13,R14 |
|
| 748 |
- IMUL3Q $19,R15,CX |
|
| 749 |
- ADDQ CX,SI |
|
| 750 |
- MOVQ SI,CX |
|
| 751 |
- SHRQ $51,CX |
|
| 752 |
- ADDQ R8,CX |
|
| 753 |
- ANDQ DX,SI |
|
| 754 |
- MOVQ CX,R8 |
|
| 755 |
- SHRQ $51,CX |
|
| 756 |
- ADDQ R10,CX |
|
| 757 |
- ANDQ DX,R8 |
|
| 758 |
- MOVQ CX,R9 |
|
| 759 |
- SHRQ $51,CX |
|
| 760 |
- ADDQ R12,CX |
|
| 761 |
- ANDQ DX,R9 |
|
| 762 |
- MOVQ CX,AX |
|
| 763 |
- SHRQ $51,CX |
|
| 764 |
- ADDQ R14,CX |
|
| 765 |
- ANDQ DX,AX |
|
| 766 |
- MOVQ CX,R10 |
|
| 767 |
- SHRQ $51,CX |
|
| 768 |
- IMUL3Q $19,CX,CX |
|
| 769 |
- ADDQ CX,SI |
|
| 770 |
- ANDQ DX,R10 |
|
| 771 |
- MOVQ SI,120(DI) |
|
| 772 |
- MOVQ R8,128(DI) |
|
| 773 |
- MOVQ R9,136(DI) |
|
| 774 |
- MOVQ AX,144(DI) |
|
| 775 |
- MOVQ R10,152(DI) |
|
| 776 |
- MOVQ 160(DI),AX |
|
| 777 |
- MULQ 160(DI) |
|
| 778 |
- MOVQ AX,SI |
|
| 779 |
- MOVQ DX,CX |
|
| 780 |
- MOVQ 160(DI),AX |
|
| 781 |
- SHLQ $1,AX |
|
| 782 |
- MULQ 168(DI) |
|
| 783 |
- MOVQ AX,R8 |
|
| 784 |
- MOVQ DX,R9 |
|
| 785 |
- MOVQ 160(DI),AX |
|
| 786 |
- SHLQ $1,AX |
|
| 787 |
- MULQ 176(DI) |
|
| 788 |
- MOVQ AX,R10 |
|
| 789 |
- MOVQ DX,R11 |
|
| 790 |
- MOVQ 160(DI),AX |
|
| 791 |
- SHLQ $1,AX |
|
| 792 |
- MULQ 184(DI) |
|
| 793 |
- MOVQ AX,R12 |
|
| 794 |
- MOVQ DX,R13 |
|
| 795 |
- MOVQ 160(DI),AX |
|
| 796 |
- SHLQ $1,AX |
|
| 797 |
- MULQ 192(DI) |
|
| 798 |
- MOVQ AX,R14 |
|
| 799 |
- MOVQ DX,R15 |
|
| 800 |
- MOVQ 168(DI),AX |
|
| 801 |
- MULQ 168(DI) |
|
| 802 |
- ADDQ AX,R10 |
|
| 803 |
- ADCQ DX,R11 |
|
| 804 |
- MOVQ 168(DI),AX |
|
| 805 |
- SHLQ $1,AX |
|
| 806 |
- MULQ 176(DI) |
|
| 807 |
- ADDQ AX,R12 |
|
| 808 |
- ADCQ DX,R13 |
|
| 809 |
- MOVQ 168(DI),AX |
|
| 810 |
- SHLQ $1,AX |
|
| 811 |
- MULQ 184(DI) |
|
| 812 |
- ADDQ AX,R14 |
|
| 813 |
- ADCQ DX,R15 |
|
| 814 |
- MOVQ 168(DI),DX |
|
| 815 |
- IMUL3Q $38,DX,AX |
|
| 816 |
- MULQ 192(DI) |
|
| 817 |
- ADDQ AX,SI |
|
| 818 |
- ADCQ DX,CX |
|
| 819 |
- MOVQ 176(DI),AX |
|
| 820 |
- MULQ 176(DI) |
|
| 821 |
- ADDQ AX,R14 |
|
| 822 |
- ADCQ DX,R15 |
|
| 823 |
- MOVQ 176(DI),DX |
|
| 824 |
- IMUL3Q $38,DX,AX |
|
| 825 |
- MULQ 184(DI) |
|
| 826 |
- ADDQ AX,SI |
|
| 827 |
- ADCQ DX,CX |
|
| 828 |
- MOVQ 176(DI),DX |
|
| 829 |
- IMUL3Q $38,DX,AX |
|
| 830 |
- MULQ 192(DI) |
|
| 831 |
- ADDQ AX,R8 |
|
| 832 |
- ADCQ DX,R9 |
|
| 833 |
- MOVQ 184(DI),DX |
|
| 834 |
- IMUL3Q $19,DX,AX |
|
| 835 |
- MULQ 184(DI) |
|
| 836 |
- ADDQ AX,R8 |
|
| 837 |
- ADCQ DX,R9 |
|
| 838 |
- MOVQ 184(DI),DX |
|
| 839 |
- IMUL3Q $38,DX,AX |
|
| 840 |
- MULQ 192(DI) |
|
| 841 |
- ADDQ AX,R10 |
|
| 842 |
- ADCQ DX,R11 |
|
| 843 |
- MOVQ 192(DI),DX |
|
| 844 |
- IMUL3Q $19,DX,AX |
|
| 845 |
- MULQ 192(DI) |
|
| 846 |
- ADDQ AX,R12 |
|
| 847 |
- ADCQ DX,R13 |
|
| 848 |
- MOVQ $REDMASK51,DX |
|
| 849 |
- SHLQ $13,CX:SI |
|
| 850 |
- ANDQ DX,SI |
|
| 851 |
- SHLQ $13,R9:R8 |
|
| 852 |
- ANDQ DX,R8 |
|
| 853 |
- ADDQ CX,R8 |
|
| 854 |
- SHLQ $13,R11:R10 |
|
| 855 |
- ANDQ DX,R10 |
|
| 856 |
- ADDQ R9,R10 |
|
| 857 |
- SHLQ $13,R13:R12 |
|
| 858 |
- ANDQ DX,R12 |
|
| 859 |
- ADDQ R11,R12 |
|
| 860 |
- SHLQ $13,R15:R14 |
|
| 861 |
- ANDQ DX,R14 |
|
| 862 |
- ADDQ R13,R14 |
|
| 863 |
- IMUL3Q $19,R15,CX |
|
| 864 |
- ADDQ CX,SI |
|
| 865 |
- MOVQ SI,CX |
|
| 866 |
- SHRQ $51,CX |
|
| 867 |
- ADDQ R8,CX |
|
| 868 |
- ANDQ DX,SI |
|
| 869 |
- MOVQ CX,R8 |
|
| 870 |
- SHRQ $51,CX |
|
| 871 |
- ADDQ R10,CX |
|
| 872 |
- ANDQ DX,R8 |
|
| 873 |
- MOVQ CX,R9 |
|
| 874 |
- SHRQ $51,CX |
|
| 875 |
- ADDQ R12,CX |
|
| 876 |
- ANDQ DX,R9 |
|
| 877 |
- MOVQ CX,AX |
|
| 878 |
- SHRQ $51,CX |
|
| 879 |
- ADDQ R14,CX |
|
| 880 |
- ANDQ DX,AX |
|
| 881 |
- MOVQ CX,R10 |
|
| 882 |
- SHRQ $51,CX |
|
| 883 |
- IMUL3Q $19,CX,CX |
|
| 884 |
- ADDQ CX,SI |
|
| 885 |
- ANDQ DX,R10 |
|
| 886 |
- MOVQ SI,160(DI) |
|
| 887 |
- MOVQ R8,168(DI) |
|
| 888 |
- MOVQ R9,176(DI) |
|
| 889 |
- MOVQ AX,184(DI) |
|
| 890 |
- MOVQ R10,192(DI) |
|
| 891 |
- MOVQ 184(DI),SI |
|
| 892 |
- IMUL3Q $19,SI,AX |
|
| 893 |
- MOVQ AX,0(SP) |
|
| 894 |
- MULQ 16(DI) |
|
| 895 |
- MOVQ AX,SI |
|
| 896 |
- MOVQ DX,CX |
|
| 897 |
- MOVQ 192(DI),DX |
|
| 898 |
- IMUL3Q $19,DX,AX |
|
| 899 |
- MOVQ AX,8(SP) |
|
| 900 |
- MULQ 8(DI) |
|
| 901 |
- ADDQ AX,SI |
|
| 902 |
- ADCQ DX,CX |
|
| 903 |
- MOVQ 160(DI),AX |
|
| 904 |
- MULQ 0(DI) |
|
| 905 |
- ADDQ AX,SI |
|
| 906 |
- ADCQ DX,CX |
|
| 907 |
- MOVQ 160(DI),AX |
|
| 908 |
- MULQ 8(DI) |
|
| 909 |
- MOVQ AX,R8 |
|
| 910 |
- MOVQ DX,R9 |
|
| 911 |
- MOVQ 160(DI),AX |
|
| 912 |
- MULQ 16(DI) |
|
| 913 |
- MOVQ AX,R10 |
|
| 914 |
- MOVQ DX,R11 |
|
| 915 |
- MOVQ 160(DI),AX |
|
| 916 |
- MULQ 24(DI) |
|
| 917 |
- MOVQ AX,R12 |
|
| 918 |
- MOVQ DX,R13 |
|
| 919 |
- MOVQ 160(DI),AX |
|
| 920 |
- MULQ 32(DI) |
|
| 921 |
- MOVQ AX,R14 |
|
| 922 |
- MOVQ DX,R15 |
|
| 923 |
- MOVQ 168(DI),AX |
|
| 924 |
- MULQ 0(DI) |
|
| 925 |
- ADDQ AX,R8 |
|
| 926 |
- ADCQ DX,R9 |
|
| 927 |
- MOVQ 168(DI),AX |
|
| 928 |
- MULQ 8(DI) |
|
| 929 |
- ADDQ AX,R10 |
|
| 930 |
- ADCQ DX,R11 |
|
| 931 |
- MOVQ 168(DI),AX |
|
| 932 |
- MULQ 16(DI) |
|
| 933 |
- ADDQ AX,R12 |
|
| 934 |
- ADCQ DX,R13 |
|
| 935 |
- MOVQ 168(DI),AX |
|
| 936 |
- MULQ 24(DI) |
|
| 937 |
- ADDQ AX,R14 |
|
| 938 |
- ADCQ DX,R15 |
|
| 939 |
- MOVQ 168(DI),DX |
|
| 940 |
- IMUL3Q $19,DX,AX |
|
| 941 |
- MULQ 32(DI) |
|
| 942 |
- ADDQ AX,SI |
|
| 943 |
- ADCQ DX,CX |
|
| 944 |
- MOVQ 176(DI),AX |
|
| 945 |
- MULQ 0(DI) |
|
| 946 |
- ADDQ AX,R10 |
|
| 947 |
- ADCQ DX,R11 |
|
| 948 |
- MOVQ 176(DI),AX |
|
| 949 |
- MULQ 8(DI) |
|
| 950 |
- ADDQ AX,R12 |
|
| 951 |
- ADCQ DX,R13 |
|
| 952 |
- MOVQ 176(DI),AX |
|
| 953 |
- MULQ 16(DI) |
|
| 954 |
- ADDQ AX,R14 |
|
| 955 |
- ADCQ DX,R15 |
|
| 956 |
- MOVQ 176(DI),DX |
|
| 957 |
- IMUL3Q $19,DX,AX |
|
| 958 |
- MULQ 24(DI) |
|
| 959 |
- ADDQ AX,SI |
|
| 960 |
- ADCQ DX,CX |
|
| 961 |
- MOVQ 176(DI),DX |
|
| 962 |
- IMUL3Q $19,DX,AX |
|
| 963 |
- MULQ 32(DI) |
|
| 964 |
- ADDQ AX,R8 |
|
| 965 |
- ADCQ DX,R9 |
|
| 966 |
- MOVQ 184(DI),AX |
|
| 967 |
- MULQ 0(DI) |
|
| 968 |
- ADDQ AX,R12 |
|
| 969 |
- ADCQ DX,R13 |
|
| 970 |
- MOVQ 184(DI),AX |
|
| 971 |
- MULQ 8(DI) |
|
| 972 |
- ADDQ AX,R14 |
|
| 973 |
- ADCQ DX,R15 |
|
| 974 |
- MOVQ 0(SP),AX |
|
| 975 |
- MULQ 24(DI) |
|
| 976 |
- ADDQ AX,R8 |
|
| 977 |
- ADCQ DX,R9 |
|
| 978 |
- MOVQ 0(SP),AX |
|
| 979 |
- MULQ 32(DI) |
|
| 980 |
- ADDQ AX,R10 |
|
| 981 |
- ADCQ DX,R11 |
|
| 982 |
- MOVQ 192(DI),AX |
|
| 983 |
- MULQ 0(DI) |
|
| 984 |
- ADDQ AX,R14 |
|
| 985 |
- ADCQ DX,R15 |
|
| 986 |
- MOVQ 8(SP),AX |
|
| 987 |
- MULQ 16(DI) |
|
| 988 |
- ADDQ AX,R8 |
|
| 989 |
- ADCQ DX,R9 |
|
| 990 |
- MOVQ 8(SP),AX |
|
| 991 |
- MULQ 24(DI) |
|
| 992 |
- ADDQ AX,R10 |
|
| 993 |
- ADCQ DX,R11 |
|
| 994 |
- MOVQ 8(SP),AX |
|
| 995 |
- MULQ 32(DI) |
|
| 996 |
- ADDQ AX,R12 |
|
| 997 |
- ADCQ DX,R13 |
|
| 998 |
- MOVQ $REDMASK51,DX |
|
| 999 |
- SHLQ $13,CX:SI |
|
| 1000 |
- ANDQ DX,SI |
|
| 1001 |
- SHLQ $13,R9:R8 |
|
| 1002 |
- ANDQ DX,R8 |
|
| 1003 |
- ADDQ CX,R8 |
|
| 1004 |
- SHLQ $13,R11:R10 |
|
| 1005 |
- ANDQ DX,R10 |
|
| 1006 |
- ADDQ R9,R10 |
|
| 1007 |
- SHLQ $13,R13:R12 |
|
| 1008 |
- ANDQ DX,R12 |
|
| 1009 |
- ADDQ R11,R12 |
|
| 1010 |
- SHLQ $13,R15:R14 |
|
| 1011 |
- ANDQ DX,R14 |
|
| 1012 |
- ADDQ R13,R14 |
|
| 1013 |
- IMUL3Q $19,R15,CX |
|
| 1014 |
- ADDQ CX,SI |
|
| 1015 |
- MOVQ SI,CX |
|
| 1016 |
- SHRQ $51,CX |
|
| 1017 |
- ADDQ R8,CX |
|
| 1018 |
- MOVQ CX,R8 |
|
| 1019 |
- SHRQ $51,CX |
|
| 1020 |
- ANDQ DX,SI |
|
| 1021 |
- ADDQ R10,CX |
|
| 1022 |
- MOVQ CX,R9 |
|
| 1023 |
- SHRQ $51,CX |
|
| 1024 |
- ANDQ DX,R8 |
|
| 1025 |
- ADDQ R12,CX |
|
| 1026 |
- MOVQ CX,AX |
|
| 1027 |
- SHRQ $51,CX |
|
| 1028 |
- ANDQ DX,R9 |
|
| 1029 |
- ADDQ R14,CX |
|
| 1030 |
- MOVQ CX,R10 |
|
| 1031 |
- SHRQ $51,CX |
|
| 1032 |
- ANDQ DX,AX |
|
| 1033 |
- IMUL3Q $19,CX,CX |
|
| 1034 |
- ADDQ CX,SI |
|
| 1035 |
- ANDQ DX,R10 |
|
| 1036 |
- MOVQ SI,160(DI) |
|
| 1037 |
- MOVQ R8,168(DI) |
|
| 1038 |
- MOVQ R9,176(DI) |
|
| 1039 |
- MOVQ AX,184(DI) |
|
| 1040 |
- MOVQ R10,192(DI) |
|
| 1041 |
- MOVQ 144(SP),SI |
|
| 1042 |
- IMUL3Q $19,SI,AX |
|
| 1043 |
- MOVQ AX,0(SP) |
|
| 1044 |
- MULQ 96(SP) |
|
| 1045 |
- MOVQ AX,SI |
|
| 1046 |
- MOVQ DX,CX |
|
| 1047 |
- MOVQ 152(SP),DX |
|
| 1048 |
- IMUL3Q $19,DX,AX |
|
| 1049 |
- MOVQ AX,8(SP) |
|
| 1050 |
- MULQ 88(SP) |
|
| 1051 |
- ADDQ AX,SI |
|
| 1052 |
- ADCQ DX,CX |
|
| 1053 |
- MOVQ 120(SP),AX |
|
| 1054 |
- MULQ 80(SP) |
|
| 1055 |
- ADDQ AX,SI |
|
| 1056 |
- ADCQ DX,CX |
|
| 1057 |
- MOVQ 120(SP),AX |
|
| 1058 |
- MULQ 88(SP) |
|
| 1059 |
- MOVQ AX,R8 |
|
| 1060 |
- MOVQ DX,R9 |
|
| 1061 |
- MOVQ 120(SP),AX |
|
| 1062 |
- MULQ 96(SP) |
|
| 1063 |
- MOVQ AX,R10 |
|
| 1064 |
- MOVQ DX,R11 |
|
| 1065 |
- MOVQ 120(SP),AX |
|
| 1066 |
- MULQ 104(SP) |
|
| 1067 |
- MOVQ AX,R12 |
|
| 1068 |
- MOVQ DX,R13 |
|
| 1069 |
- MOVQ 120(SP),AX |
|
| 1070 |
- MULQ 112(SP) |
|
| 1071 |
- MOVQ AX,R14 |
|
| 1072 |
- MOVQ DX,R15 |
|
| 1073 |
- MOVQ 128(SP),AX |
|
| 1074 |
- MULQ 80(SP) |
|
| 1075 |
- ADDQ AX,R8 |
|
| 1076 |
- ADCQ DX,R9 |
|
| 1077 |
- MOVQ 128(SP),AX |
|
| 1078 |
- MULQ 88(SP) |
|
| 1079 |
- ADDQ AX,R10 |
|
| 1080 |
- ADCQ DX,R11 |
|
| 1081 |
- MOVQ 128(SP),AX |
|
| 1082 |
- MULQ 96(SP) |
|
| 1083 |
- ADDQ AX,R12 |
|
| 1084 |
- ADCQ DX,R13 |
|
| 1085 |
- MOVQ 128(SP),AX |
|
| 1086 |
- MULQ 104(SP) |
|
| 1087 |
- ADDQ AX,R14 |
|
| 1088 |
- ADCQ DX,R15 |
|
| 1089 |
- MOVQ 128(SP),DX |
|
| 1090 |
- IMUL3Q $19,DX,AX |
|
| 1091 |
- MULQ 112(SP) |
|
| 1092 |
- ADDQ AX,SI |
|
| 1093 |
- ADCQ DX,CX |
|
| 1094 |
- MOVQ 136(SP),AX |
|
| 1095 |
- MULQ 80(SP) |
|
| 1096 |
- ADDQ AX,R10 |
|
| 1097 |
- ADCQ DX,R11 |
|
| 1098 |
- MOVQ 136(SP),AX |
|
| 1099 |
- MULQ 88(SP) |
|
| 1100 |
- ADDQ AX,R12 |
|
| 1101 |
- ADCQ DX,R13 |
|
| 1102 |
- MOVQ 136(SP),AX |
|
| 1103 |
- MULQ 96(SP) |
|
| 1104 |
- ADDQ AX,R14 |
|
| 1105 |
- ADCQ DX,R15 |
|
| 1106 |
- MOVQ 136(SP),DX |
|
| 1107 |
- IMUL3Q $19,DX,AX |
|
| 1108 |
- MULQ 104(SP) |
|
| 1109 |
- ADDQ AX,SI |
|
| 1110 |
- ADCQ DX,CX |
|
| 1111 |
- MOVQ 136(SP),DX |
|
| 1112 |
- IMUL3Q $19,DX,AX |
|
| 1113 |
- MULQ 112(SP) |
|
| 1114 |
- ADDQ AX,R8 |
|
| 1115 |
- ADCQ DX,R9 |
|
| 1116 |
- MOVQ 144(SP),AX |
|
| 1117 |
- MULQ 80(SP) |
|
| 1118 |
- ADDQ AX,R12 |
|
| 1119 |
- ADCQ DX,R13 |
|
| 1120 |
- MOVQ 144(SP),AX |
|
| 1121 |
- MULQ 88(SP) |
|
| 1122 |
- ADDQ AX,R14 |
|
| 1123 |
- ADCQ DX,R15 |
|
| 1124 |
- MOVQ 0(SP),AX |
|
| 1125 |
- MULQ 104(SP) |
|
| 1126 |
- ADDQ AX,R8 |
|
| 1127 |
- ADCQ DX,R9 |
|
| 1128 |
- MOVQ 0(SP),AX |
|
| 1129 |
- MULQ 112(SP) |
|
| 1130 |
- ADDQ AX,R10 |
|
| 1131 |
- ADCQ DX,R11 |
|
| 1132 |
- MOVQ 152(SP),AX |
|
| 1133 |
- MULQ 80(SP) |
|
| 1134 |
- ADDQ AX,R14 |
|
| 1135 |
- ADCQ DX,R15 |
|
| 1136 |
- MOVQ 8(SP),AX |
|
| 1137 |
- MULQ 96(SP) |
|
| 1138 |
- ADDQ AX,R8 |
|
| 1139 |
- ADCQ DX,R9 |
|
| 1140 |
- MOVQ 8(SP),AX |
|
| 1141 |
- MULQ 104(SP) |
|
| 1142 |
- ADDQ AX,R10 |
|
| 1143 |
- ADCQ DX,R11 |
|
| 1144 |
- MOVQ 8(SP),AX |
|
| 1145 |
- MULQ 112(SP) |
|
| 1146 |
- ADDQ AX,R12 |
|
| 1147 |
- ADCQ DX,R13 |
|
| 1148 |
- MOVQ $REDMASK51,DX |
|
| 1149 |
- SHLQ $13,CX:SI |
|
| 1150 |
- ANDQ DX,SI |
|
| 1151 |
- SHLQ $13,R9:R8 |
|
| 1152 |
- ANDQ DX,R8 |
|
| 1153 |
- ADDQ CX,R8 |
|
| 1154 |
- SHLQ $13,R11:R10 |
|
| 1155 |
- ANDQ DX,R10 |
|
| 1156 |
- ADDQ R9,R10 |
|
| 1157 |
- SHLQ $13,R13:R12 |
|
| 1158 |
- ANDQ DX,R12 |
|
| 1159 |
- ADDQ R11,R12 |
|
| 1160 |
- SHLQ $13,R15:R14 |
|
| 1161 |
- ANDQ DX,R14 |
|
| 1162 |
- ADDQ R13,R14 |
|
| 1163 |
- IMUL3Q $19,R15,CX |
|
| 1164 |
- ADDQ CX,SI |
|
| 1165 |
- MOVQ SI,CX |
|
| 1166 |
- SHRQ $51,CX |
|
| 1167 |
- ADDQ R8,CX |
|
| 1168 |
- MOVQ CX,R8 |
|
| 1169 |
- SHRQ $51,CX |
|
| 1170 |
- ANDQ DX,SI |
|
| 1171 |
- ADDQ R10,CX |
|
| 1172 |
- MOVQ CX,R9 |
|
| 1173 |
- SHRQ $51,CX |
|
| 1174 |
- ANDQ DX,R8 |
|
| 1175 |
- ADDQ R12,CX |
|
| 1176 |
- MOVQ CX,AX |
|
| 1177 |
- SHRQ $51,CX |
|
| 1178 |
- ANDQ DX,R9 |
|
| 1179 |
- ADDQ R14,CX |
|
| 1180 |
- MOVQ CX,R10 |
|
| 1181 |
- SHRQ $51,CX |
|
| 1182 |
- ANDQ DX,AX |
|
| 1183 |
- IMUL3Q $19,CX,CX |
|
| 1184 |
- ADDQ CX,SI |
|
| 1185 |
- ANDQ DX,R10 |
|
| 1186 |
- MOVQ SI,40(DI) |
|
| 1187 |
- MOVQ R8,48(DI) |
|
| 1188 |
- MOVQ R9,56(DI) |
|
| 1189 |
- MOVQ AX,64(DI) |
|
| 1190 |
- MOVQ R10,72(DI) |
|
| 1191 |
- MOVQ 160(SP),AX |
|
| 1192 |
- MULQ ·_121666_213(SB) |
|
| 1193 |
- SHRQ $13,AX |
|
| 1194 |
- MOVQ AX,SI |
|
| 1195 |
- MOVQ DX,CX |
|
| 1196 |
- MOVQ 168(SP),AX |
|
| 1197 |
- MULQ ·_121666_213(SB) |
|
| 1198 |
- SHRQ $13,AX |
|
| 1199 |
- ADDQ AX,CX |
|
| 1200 |
- MOVQ DX,R8 |
|
| 1201 |
- MOVQ 176(SP),AX |
|
| 1202 |
- MULQ ·_121666_213(SB) |
|
| 1203 |
- SHRQ $13,AX |
|
| 1204 |
- ADDQ AX,R8 |
|
| 1205 |
- MOVQ DX,R9 |
|
| 1206 |
- MOVQ 184(SP),AX |
|
| 1207 |
- MULQ ·_121666_213(SB) |
|
| 1208 |
- SHRQ $13,AX |
|
| 1209 |
- ADDQ AX,R9 |
|
| 1210 |
- MOVQ DX,R10 |
|
| 1211 |
- MOVQ 192(SP),AX |
|
| 1212 |
- MULQ ·_121666_213(SB) |
|
| 1213 |
- SHRQ $13,AX |
|
| 1214 |
- ADDQ AX,R10 |
|
| 1215 |
- IMUL3Q $19,DX,DX |
|
| 1216 |
- ADDQ DX,SI |
|
| 1217 |
- ADDQ 80(SP),SI |
|
| 1218 |
- ADDQ 88(SP),CX |
|
| 1219 |
- ADDQ 96(SP),R8 |
|
| 1220 |
- ADDQ 104(SP),R9 |
|
| 1221 |
- ADDQ 112(SP),R10 |
|
| 1222 |
- MOVQ SI,80(DI) |
|
| 1223 |
- MOVQ CX,88(DI) |
|
| 1224 |
- MOVQ R8,96(DI) |
|
| 1225 |
- MOVQ R9,104(DI) |
|
| 1226 |
- MOVQ R10,112(DI) |
|
| 1227 |
- MOVQ 104(DI),SI |
|
| 1228 |
- IMUL3Q $19,SI,AX |
|
| 1229 |
- MOVQ AX,0(SP) |
|
| 1230 |
- MULQ 176(SP) |
|
| 1231 |
- MOVQ AX,SI |
|
| 1232 |
- MOVQ DX,CX |
|
| 1233 |
- MOVQ 112(DI),DX |
|
| 1234 |
- IMUL3Q $19,DX,AX |
|
| 1235 |
- MOVQ AX,8(SP) |
|
| 1236 |
- MULQ 168(SP) |
|
| 1237 |
- ADDQ AX,SI |
|
| 1238 |
- ADCQ DX,CX |
|
| 1239 |
- MOVQ 80(DI),AX |
|
| 1240 |
- MULQ 160(SP) |
|
| 1241 |
- ADDQ AX,SI |
|
| 1242 |
- ADCQ DX,CX |
|
| 1243 |
- MOVQ 80(DI),AX |
|
| 1244 |
- MULQ 168(SP) |
|
| 1245 |
- MOVQ AX,R8 |
|
| 1246 |
- MOVQ DX,R9 |
|
| 1247 |
- MOVQ 80(DI),AX |
|
| 1248 |
- MULQ 176(SP) |
|
| 1249 |
- MOVQ AX,R10 |
|
| 1250 |
- MOVQ DX,R11 |
|
| 1251 |
- MOVQ 80(DI),AX |
|
| 1252 |
- MULQ 184(SP) |
|
| 1253 |
- MOVQ AX,R12 |
|
| 1254 |
- MOVQ DX,R13 |
|
| 1255 |
- MOVQ 80(DI),AX |
|
| 1256 |
- MULQ 192(SP) |
|
| 1257 |
- MOVQ AX,R14 |
|
| 1258 |
- MOVQ DX,R15 |
|
| 1259 |
- MOVQ 88(DI),AX |
|
| 1260 |
- MULQ 160(SP) |
|
| 1261 |
- ADDQ AX,R8 |
|
| 1262 |
- ADCQ DX,R9 |
|
| 1263 |
- MOVQ 88(DI),AX |
|
| 1264 |
- MULQ 168(SP) |
|
| 1265 |
- ADDQ AX,R10 |
|
| 1266 |
- ADCQ DX,R11 |
|
| 1267 |
- MOVQ 88(DI),AX |
|
| 1268 |
- MULQ 176(SP) |
|
| 1269 |
- ADDQ AX,R12 |
|
| 1270 |
- ADCQ DX,R13 |
|
| 1271 |
- MOVQ 88(DI),AX |
|
| 1272 |
- MULQ 184(SP) |
|
| 1273 |
- ADDQ AX,R14 |
|
| 1274 |
- ADCQ DX,R15 |
|
| 1275 |
- MOVQ 88(DI),DX |
|
| 1276 |
- IMUL3Q $19,DX,AX |
|
| 1277 |
- MULQ 192(SP) |
|
| 1278 |
- ADDQ AX,SI |
|
| 1279 |
- ADCQ DX,CX |
|
| 1280 |
- MOVQ 96(DI),AX |
|
| 1281 |
- MULQ 160(SP) |
|
| 1282 |
- ADDQ AX,R10 |
|
| 1283 |
- ADCQ DX,R11 |
|
| 1284 |
- MOVQ 96(DI),AX |
|
| 1285 |
- MULQ 168(SP) |
|
| 1286 |
- ADDQ AX,R12 |
|
| 1287 |
- ADCQ DX,R13 |
|
| 1288 |
- MOVQ 96(DI),AX |
|
| 1289 |
- MULQ 176(SP) |
|
| 1290 |
- ADDQ AX,R14 |
|
| 1291 |
- ADCQ DX,R15 |
|
| 1292 |
- MOVQ 96(DI),DX |
|
| 1293 |
- IMUL3Q $19,DX,AX |
|
| 1294 |
- MULQ 184(SP) |
|
| 1295 |
- ADDQ AX,SI |
|
| 1296 |
- ADCQ DX,CX |
|
| 1297 |
- MOVQ 96(DI),DX |
|
| 1298 |
- IMUL3Q $19,DX,AX |
|
| 1299 |
- MULQ 192(SP) |
|
| 1300 |
- ADDQ AX,R8 |
|
| 1301 |
- ADCQ DX,R9 |
|
| 1302 |
- MOVQ 104(DI),AX |
|
| 1303 |
- MULQ 160(SP) |
|
| 1304 |
- ADDQ AX,R12 |
|
| 1305 |
- ADCQ DX,R13 |
|
| 1306 |
- MOVQ 104(DI),AX |
|
| 1307 |
- MULQ 168(SP) |
|
| 1308 |
- ADDQ AX,R14 |
|
| 1309 |
- ADCQ DX,R15 |
|
| 1310 |
- MOVQ 0(SP),AX |
|
| 1311 |
- MULQ 184(SP) |
|
| 1312 |
- ADDQ AX,R8 |
|
| 1313 |
- ADCQ DX,R9 |
|
| 1314 |
- MOVQ 0(SP),AX |
|
| 1315 |
- MULQ 192(SP) |
|
| 1316 |
- ADDQ AX,R10 |
|
| 1317 |
- ADCQ DX,R11 |
|
| 1318 |
- MOVQ 112(DI),AX |
|
| 1319 |
- MULQ 160(SP) |
|
| 1320 |
- ADDQ AX,R14 |
|
| 1321 |
- ADCQ DX,R15 |
|
| 1322 |
- MOVQ 8(SP),AX |
|
| 1323 |
- MULQ 176(SP) |
|
| 1324 |
- ADDQ AX,R8 |
|
| 1325 |
- ADCQ DX,R9 |
|
| 1326 |
- MOVQ 8(SP),AX |
|
| 1327 |
- MULQ 184(SP) |
|
| 1328 |
- ADDQ AX,R10 |
|
| 1329 |
- ADCQ DX,R11 |
|
| 1330 |
- MOVQ 8(SP),AX |
|
| 1331 |
- MULQ 192(SP) |
|
| 1332 |
- ADDQ AX,R12 |
|
| 1333 |
- ADCQ DX,R13 |
|
| 1334 |
- MOVQ $REDMASK51,DX |
|
| 1335 |
- SHLQ $13,CX:SI |
|
| 1336 |
- ANDQ DX,SI |
|
| 1337 |
- SHLQ $13,R9:R8 |
|
| 1338 |
- ANDQ DX,R8 |
|
| 1339 |
- ADDQ CX,R8 |
|
| 1340 |
- SHLQ $13,R11:R10 |
|
| 1341 |
- ANDQ DX,R10 |
|
| 1342 |
- ADDQ R9,R10 |
|
| 1343 |
- SHLQ $13,R13:R12 |
|
| 1344 |
- ANDQ DX,R12 |
|
| 1345 |
- ADDQ R11,R12 |
|
| 1346 |
- SHLQ $13,R15:R14 |
|
| 1347 |
- ANDQ DX,R14 |
|
| 1348 |
- ADDQ R13,R14 |
|
| 1349 |
- IMUL3Q $19,R15,CX |
|
| 1350 |
- ADDQ CX,SI |
|
| 1351 |
- MOVQ SI,CX |
|
| 1352 |
- SHRQ $51,CX |
|
| 1353 |
- ADDQ R8,CX |
|
| 1354 |
- MOVQ CX,R8 |
|
| 1355 |
- SHRQ $51,CX |
|
| 1356 |
- ANDQ DX,SI |
|
| 1357 |
- ADDQ R10,CX |
|
| 1358 |
- MOVQ CX,R9 |
|
| 1359 |
- SHRQ $51,CX |
|
| 1360 |
- ANDQ DX,R8 |
|
| 1361 |
- ADDQ R12,CX |
|
| 1362 |
- MOVQ CX,AX |
|
| 1363 |
- SHRQ $51,CX |
|
| 1364 |
- ANDQ DX,R9 |
|
| 1365 |
- ADDQ R14,CX |
|
| 1366 |
- MOVQ CX,R10 |
|
| 1367 |
- SHRQ $51,CX |
|
| 1368 |
- ANDQ DX,AX |
|
| 1369 |
- IMUL3Q $19,CX,CX |
|
| 1370 |
- ADDQ CX,SI |
|
| 1371 |
- ANDQ DX,R10 |
|
| 1372 |
- MOVQ SI,80(DI) |
|
| 1373 |
- MOVQ R8,88(DI) |
|
| 1374 |
- MOVQ R9,96(DI) |
|
| 1375 |
- MOVQ AX,104(DI) |
|
| 1376 |
- MOVQ R10,112(DI) |
|
| 1377 |
- RET |
| 1378 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,240 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// +build amd64,!gccgo,!appengine |
|
| 6 |
- |
|
| 7 |
-package curve25519 |
|
| 8 |
- |
|
| 9 |
-// These functions are implemented in the .s files. The names of the functions |
|
| 10 |
-// in the rest of the file are also taken from the SUPERCOP sources to help |
|
| 11 |
-// people following along. |
|
| 12 |
- |
|
| 13 |
-//go:noescape |
|
| 14 |
- |
|
| 15 |
-func cswap(inout *[5]uint64, v uint64) |
|
| 16 |
- |
|
| 17 |
-//go:noescape |
|
| 18 |
- |
|
| 19 |
-func ladderstep(inout *[5][5]uint64) |
|
| 20 |
- |
|
| 21 |
-//go:noescape |
|
| 22 |
- |
|
| 23 |
-func freeze(inout *[5]uint64) |
|
| 24 |
- |
|
| 25 |
-//go:noescape |
|
| 26 |
- |
|
| 27 |
-func mul(dest, a, b *[5]uint64) |
|
| 28 |
- |
|
| 29 |
-//go:noescape |
|
| 30 |
- |
|
| 31 |
-func square(out, in *[5]uint64) |
|
| 32 |
- |
|
| 33 |
-// mladder uses a Montgomery ladder to calculate (xr/zr) *= s. |
|
| 34 |
-func mladder(xr, zr *[5]uint64, s *[32]byte) {
|
|
| 35 |
- var work [5][5]uint64 |
|
| 36 |
- |
|
| 37 |
- work[0] = *xr |
|
| 38 |
- setint(&work[1], 1) |
|
| 39 |
- setint(&work[2], 0) |
|
| 40 |
- work[3] = *xr |
|
| 41 |
- setint(&work[4], 1) |
|
| 42 |
- |
|
| 43 |
- j := uint(6) |
|
| 44 |
- var prevbit byte |
|
| 45 |
- |
|
| 46 |
- for i := 31; i >= 0; i-- {
|
|
| 47 |
- for j < 8 {
|
|
| 48 |
- bit := ((*s)[i] >> j) & 1 |
|
| 49 |
- swap := bit ^ prevbit |
|
| 50 |
- prevbit = bit |
|
| 51 |
- cswap(&work[1], uint64(swap)) |
|
| 52 |
- ladderstep(&work) |
|
| 53 |
- j-- |
|
| 54 |
- } |
|
| 55 |
- j = 7 |
|
| 56 |
- } |
|
| 57 |
- |
|
| 58 |
- *xr = work[1] |
|
| 59 |
- *zr = work[2] |
|
| 60 |
-} |
|
| 61 |
- |
|
| 62 |
-func scalarMult(out, in, base *[32]byte) {
|
|
| 63 |
- var e [32]byte |
|
| 64 |
- copy(e[:], (*in)[:]) |
|
| 65 |
- e[0] &= 248 |
|
| 66 |
- e[31] &= 127 |
|
| 67 |
- e[31] |= 64 |
|
| 68 |
- |
|
| 69 |
- var t, z [5]uint64 |
|
| 70 |
- unpack(&t, base) |
|
| 71 |
- mladder(&t, &z, &e) |
|
| 72 |
- invert(&z, &z) |
|
| 73 |
- mul(&t, &t, &z) |
|
| 74 |
- pack(out, &t) |
|
| 75 |
-} |
|
| 76 |
- |
|
| 77 |
-func setint(r *[5]uint64, v uint64) {
|
|
| 78 |
- r[0] = v |
|
| 79 |
- r[1] = 0 |
|
| 80 |
- r[2] = 0 |
|
| 81 |
- r[3] = 0 |
|
| 82 |
- r[4] = 0 |
|
| 83 |
-} |
|
| 84 |
- |
|
| 85 |
-// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian |
|
| 86 |
-// order. |
|
| 87 |
-func unpack(r *[5]uint64, x *[32]byte) {
|
|
| 88 |
- r[0] = uint64(x[0]) | |
|
| 89 |
- uint64(x[1])<<8 | |
|
| 90 |
- uint64(x[2])<<16 | |
|
| 91 |
- uint64(x[3])<<24 | |
|
| 92 |
- uint64(x[4])<<32 | |
|
| 93 |
- uint64(x[5])<<40 | |
|
| 94 |
- uint64(x[6]&7)<<48 |
|
| 95 |
- |
|
| 96 |
- r[1] = uint64(x[6])>>3 | |
|
| 97 |
- uint64(x[7])<<5 | |
|
| 98 |
- uint64(x[8])<<13 | |
|
| 99 |
- uint64(x[9])<<21 | |
|
| 100 |
- uint64(x[10])<<29 | |
|
| 101 |
- uint64(x[11])<<37 | |
|
| 102 |
- uint64(x[12]&63)<<45 |
|
| 103 |
- |
|
| 104 |
- r[2] = uint64(x[12])>>6 | |
|
| 105 |
- uint64(x[13])<<2 | |
|
| 106 |
- uint64(x[14])<<10 | |
|
| 107 |
- uint64(x[15])<<18 | |
|
| 108 |
- uint64(x[16])<<26 | |
|
| 109 |
- uint64(x[17])<<34 | |
|
| 110 |
- uint64(x[18])<<42 | |
|
| 111 |
- uint64(x[19]&1)<<50 |
|
| 112 |
- |
|
| 113 |
- r[3] = uint64(x[19])>>1 | |
|
| 114 |
- uint64(x[20])<<7 | |
|
| 115 |
- uint64(x[21])<<15 | |
|
| 116 |
- uint64(x[22])<<23 | |
|
| 117 |
- uint64(x[23])<<31 | |
|
| 118 |
- uint64(x[24])<<39 | |
|
| 119 |
- uint64(x[25]&15)<<47 |
|
| 120 |
- |
|
| 121 |
- r[4] = uint64(x[25])>>4 | |
|
| 122 |
- uint64(x[26])<<4 | |
|
| 123 |
- uint64(x[27])<<12 | |
|
| 124 |
- uint64(x[28])<<20 | |
|
| 125 |
- uint64(x[29])<<28 | |
|
| 126 |
- uint64(x[30])<<36 | |
|
| 127 |
- uint64(x[31]&127)<<44 |
|
| 128 |
-} |
|
| 129 |
- |
|
| 130 |
-// pack sets out = x where out is the usual, little-endian form of the 5, |
|
| 131 |
-// 51-bit limbs in x. |
|
| 132 |
-func pack(out *[32]byte, x *[5]uint64) {
|
|
| 133 |
- t := *x |
|
| 134 |
- freeze(&t) |
|
| 135 |
- |
|
| 136 |
- out[0] = byte(t[0]) |
|
| 137 |
- out[1] = byte(t[0] >> 8) |
|
| 138 |
- out[2] = byte(t[0] >> 16) |
|
| 139 |
- out[3] = byte(t[0] >> 24) |
|
| 140 |
- out[4] = byte(t[0] >> 32) |
|
| 141 |
- out[5] = byte(t[0] >> 40) |
|
| 142 |
- out[6] = byte(t[0] >> 48) |
|
| 143 |
- |
|
| 144 |
- out[6] ^= byte(t[1]<<3) & 0xf8 |
|
| 145 |
- out[7] = byte(t[1] >> 5) |
|
| 146 |
- out[8] = byte(t[1] >> 13) |
|
| 147 |
- out[9] = byte(t[1] >> 21) |
|
| 148 |
- out[10] = byte(t[1] >> 29) |
|
| 149 |
- out[11] = byte(t[1] >> 37) |
|
| 150 |
- out[12] = byte(t[1] >> 45) |
|
| 151 |
- |
|
| 152 |
- out[12] ^= byte(t[2]<<6) & 0xc0 |
|
| 153 |
- out[13] = byte(t[2] >> 2) |
|
| 154 |
- out[14] = byte(t[2] >> 10) |
|
| 155 |
- out[15] = byte(t[2] >> 18) |
|
| 156 |
- out[16] = byte(t[2] >> 26) |
|
| 157 |
- out[17] = byte(t[2] >> 34) |
|
| 158 |
- out[18] = byte(t[2] >> 42) |
|
| 159 |
- out[19] = byte(t[2] >> 50) |
|
| 160 |
- |
|
| 161 |
- out[19] ^= byte(t[3]<<1) & 0xfe |
|
| 162 |
- out[20] = byte(t[3] >> 7) |
|
| 163 |
- out[21] = byte(t[3] >> 15) |
|
| 164 |
- out[22] = byte(t[3] >> 23) |
|
| 165 |
- out[23] = byte(t[3] >> 31) |
|
| 166 |
- out[24] = byte(t[3] >> 39) |
|
| 167 |
- out[25] = byte(t[3] >> 47) |
|
| 168 |
- |
|
| 169 |
- out[25] ^= byte(t[4]<<4) & 0xf0 |
|
| 170 |
- out[26] = byte(t[4] >> 4) |
|
| 171 |
- out[27] = byte(t[4] >> 12) |
|
| 172 |
- out[28] = byte(t[4] >> 20) |
|
| 173 |
- out[29] = byte(t[4] >> 28) |
|
| 174 |
- out[30] = byte(t[4] >> 36) |
|
| 175 |
- out[31] = byte(t[4] >> 44) |
|
| 176 |
-} |
|
| 177 |
- |
|
| 178 |
-// invert calculates r = x^-1 mod p using Fermat's little theorem. |
|
| 179 |
-func invert(r *[5]uint64, x *[5]uint64) {
|
|
| 180 |
- var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64 |
|
| 181 |
- |
|
| 182 |
- square(&z2, x) /* 2 */ |
|
| 183 |
- square(&t, &z2) /* 4 */ |
|
| 184 |
- square(&t, &t) /* 8 */ |
|
| 185 |
- mul(&z9, &t, x) /* 9 */ |
|
| 186 |
- mul(&z11, &z9, &z2) /* 11 */ |
|
| 187 |
- square(&t, &z11) /* 22 */ |
|
| 188 |
- mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */ |
|
| 189 |
- |
|
| 190 |
- square(&t, &z2_5_0) /* 2^6 - 2^1 */ |
|
| 191 |
- for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
|
|
| 192 |
- square(&t, &t) |
|
| 193 |
- } |
|
| 194 |
- mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */ |
|
| 195 |
- |
|
| 196 |
- square(&t, &z2_10_0) /* 2^11 - 2^1 */ |
|
| 197 |
- for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
|
|
| 198 |
- square(&t, &t) |
|
| 199 |
- } |
|
| 200 |
- mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */ |
|
| 201 |
- |
|
| 202 |
- square(&t, &z2_20_0) /* 2^21 - 2^1 */ |
|
| 203 |
- for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
|
|
| 204 |
- square(&t, &t) |
|
| 205 |
- } |
|
| 206 |
- mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */ |
|
| 207 |
- |
|
| 208 |
- square(&t, &t) /* 2^41 - 2^1 */ |
|
| 209 |
- for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
|
|
| 210 |
- square(&t, &t) |
|
| 211 |
- } |
|
| 212 |
- mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */ |
|
| 213 |
- |
|
| 214 |
- square(&t, &z2_50_0) /* 2^51 - 2^1 */ |
|
| 215 |
- for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
|
|
| 216 |
- square(&t, &t) |
|
| 217 |
- } |
|
| 218 |
- mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */ |
|
| 219 |
- |
|
| 220 |
- square(&t, &z2_100_0) /* 2^101 - 2^1 */ |
|
| 221 |
- for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
|
|
| 222 |
- square(&t, &t) |
|
| 223 |
- } |
|
| 224 |
- mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */ |
|
| 225 |
- |
|
| 226 |
- square(&t, &t) /* 2^201 - 2^1 */ |
|
| 227 |
- for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
|
|
| 228 |
- square(&t, &t) |
|
| 229 |
- } |
|
| 230 |
- mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */ |
|
| 231 |
- |
|
| 232 |
- square(&t, &t) /* 2^251 - 2^1 */ |
|
| 233 |
- square(&t, &t) /* 2^252 - 2^2 */ |
|
| 234 |
- square(&t, &t) /* 2^253 - 2^3 */ |
|
| 235 |
- |
|
| 236 |
- square(&t, &t) /* 2^254 - 2^4 */ |
|
| 237 |
- |
|
| 238 |
- square(&t, &t) /* 2^255 - 2^5 */ |
|
| 239 |
- mul(r, &t, &z11) /* 2^255 - 21 */ |
|
| 240 |
-} |
| 241 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,169 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// This code was translated into a form compatible with 6a from the public |
|
| 6 |
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html |
|
| 7 |
- |
|
| 8 |
-// +build amd64,!gccgo,!appengine |
|
| 9 |
- |
|
| 10 |
-#include "const_amd64.h" |
|
| 11 |
- |
|
| 12 |
-// func mul(dest, a, b *[5]uint64) |
|
| 13 |
-TEXT ·mul(SB),0,$16-24 |
|
| 14 |
- MOVQ dest+0(FP), DI |
|
| 15 |
- MOVQ a+8(FP), SI |
|
| 16 |
- MOVQ b+16(FP), DX |
|
| 17 |
- |
|
| 18 |
- MOVQ DX,CX |
|
| 19 |
- MOVQ 24(SI),DX |
|
| 20 |
- IMUL3Q $19,DX,AX |
|
| 21 |
- MOVQ AX,0(SP) |
|
| 22 |
- MULQ 16(CX) |
|
| 23 |
- MOVQ AX,R8 |
|
| 24 |
- MOVQ DX,R9 |
|
| 25 |
- MOVQ 32(SI),DX |
|
| 26 |
- IMUL3Q $19,DX,AX |
|
| 27 |
- MOVQ AX,8(SP) |
|
| 28 |
- MULQ 8(CX) |
|
| 29 |
- ADDQ AX,R8 |
|
| 30 |
- ADCQ DX,R9 |
|
| 31 |
- MOVQ 0(SI),AX |
|
| 32 |
- MULQ 0(CX) |
|
| 33 |
- ADDQ AX,R8 |
|
| 34 |
- ADCQ DX,R9 |
|
| 35 |
- MOVQ 0(SI),AX |
|
| 36 |
- MULQ 8(CX) |
|
| 37 |
- MOVQ AX,R10 |
|
| 38 |
- MOVQ DX,R11 |
|
| 39 |
- MOVQ 0(SI),AX |
|
| 40 |
- MULQ 16(CX) |
|
| 41 |
- MOVQ AX,R12 |
|
| 42 |
- MOVQ DX,R13 |
|
| 43 |
- MOVQ 0(SI),AX |
|
| 44 |
- MULQ 24(CX) |
|
| 45 |
- MOVQ AX,R14 |
|
| 46 |
- MOVQ DX,R15 |
|
| 47 |
- MOVQ 0(SI),AX |
|
| 48 |
- MULQ 32(CX) |
|
| 49 |
- MOVQ AX,BX |
|
| 50 |
- MOVQ DX,BP |
|
| 51 |
- MOVQ 8(SI),AX |
|
| 52 |
- MULQ 0(CX) |
|
| 53 |
- ADDQ AX,R10 |
|
| 54 |
- ADCQ DX,R11 |
|
| 55 |
- MOVQ 8(SI),AX |
|
| 56 |
- MULQ 8(CX) |
|
| 57 |
- ADDQ AX,R12 |
|
| 58 |
- ADCQ DX,R13 |
|
| 59 |
- MOVQ 8(SI),AX |
|
| 60 |
- MULQ 16(CX) |
|
| 61 |
- ADDQ AX,R14 |
|
| 62 |
- ADCQ DX,R15 |
|
| 63 |
- MOVQ 8(SI),AX |
|
| 64 |
- MULQ 24(CX) |
|
| 65 |
- ADDQ AX,BX |
|
| 66 |
- ADCQ DX,BP |
|
| 67 |
- MOVQ 8(SI),DX |
|
| 68 |
- IMUL3Q $19,DX,AX |
|
| 69 |
- MULQ 32(CX) |
|
| 70 |
- ADDQ AX,R8 |
|
| 71 |
- ADCQ DX,R9 |
|
| 72 |
- MOVQ 16(SI),AX |
|
| 73 |
- MULQ 0(CX) |
|
| 74 |
- ADDQ AX,R12 |
|
| 75 |
- ADCQ DX,R13 |
|
| 76 |
- MOVQ 16(SI),AX |
|
| 77 |
- MULQ 8(CX) |
|
| 78 |
- ADDQ AX,R14 |
|
| 79 |
- ADCQ DX,R15 |
|
| 80 |
- MOVQ 16(SI),AX |
|
| 81 |
- MULQ 16(CX) |
|
| 82 |
- ADDQ AX,BX |
|
| 83 |
- ADCQ DX,BP |
|
| 84 |
- MOVQ 16(SI),DX |
|
| 85 |
- IMUL3Q $19,DX,AX |
|
| 86 |
- MULQ 24(CX) |
|
| 87 |
- ADDQ AX,R8 |
|
| 88 |
- ADCQ DX,R9 |
|
| 89 |
- MOVQ 16(SI),DX |
|
| 90 |
- IMUL3Q $19,DX,AX |
|
| 91 |
- MULQ 32(CX) |
|
| 92 |
- ADDQ AX,R10 |
|
| 93 |
- ADCQ DX,R11 |
|
| 94 |
- MOVQ 24(SI),AX |
|
| 95 |
- MULQ 0(CX) |
|
| 96 |
- ADDQ AX,R14 |
|
| 97 |
- ADCQ DX,R15 |
|
| 98 |
- MOVQ 24(SI),AX |
|
| 99 |
- MULQ 8(CX) |
|
| 100 |
- ADDQ AX,BX |
|
| 101 |
- ADCQ DX,BP |
|
| 102 |
- MOVQ 0(SP),AX |
|
| 103 |
- MULQ 24(CX) |
|
| 104 |
- ADDQ AX,R10 |
|
| 105 |
- ADCQ DX,R11 |
|
| 106 |
- MOVQ 0(SP),AX |
|
| 107 |
- MULQ 32(CX) |
|
| 108 |
- ADDQ AX,R12 |
|
| 109 |
- ADCQ DX,R13 |
|
| 110 |
- MOVQ 32(SI),AX |
|
| 111 |
- MULQ 0(CX) |
|
| 112 |
- ADDQ AX,BX |
|
| 113 |
- ADCQ DX,BP |
|
| 114 |
- MOVQ 8(SP),AX |
|
| 115 |
- MULQ 16(CX) |
|
| 116 |
- ADDQ AX,R10 |
|
| 117 |
- ADCQ DX,R11 |
|
| 118 |
- MOVQ 8(SP),AX |
|
| 119 |
- MULQ 24(CX) |
|
| 120 |
- ADDQ AX,R12 |
|
| 121 |
- ADCQ DX,R13 |
|
| 122 |
- MOVQ 8(SP),AX |
|
| 123 |
- MULQ 32(CX) |
|
| 124 |
- ADDQ AX,R14 |
|
| 125 |
- ADCQ DX,R15 |
|
| 126 |
- MOVQ $REDMASK51,SI |
|
| 127 |
- SHLQ $13,R9:R8 |
|
| 128 |
- ANDQ SI,R8 |
|
| 129 |
- SHLQ $13,R11:R10 |
|
| 130 |
- ANDQ SI,R10 |
|
| 131 |
- ADDQ R9,R10 |
|
| 132 |
- SHLQ $13,R13:R12 |
|
| 133 |
- ANDQ SI,R12 |
|
| 134 |
- ADDQ R11,R12 |
|
| 135 |
- SHLQ $13,R15:R14 |
|
| 136 |
- ANDQ SI,R14 |
|
| 137 |
- ADDQ R13,R14 |
|
| 138 |
- SHLQ $13,BP:BX |
|
| 139 |
- ANDQ SI,BX |
|
| 140 |
- ADDQ R15,BX |
|
| 141 |
- IMUL3Q $19,BP,DX |
|
| 142 |
- ADDQ DX,R8 |
|
| 143 |
- MOVQ R8,DX |
|
| 144 |
- SHRQ $51,DX |
|
| 145 |
- ADDQ R10,DX |
|
| 146 |
- MOVQ DX,CX |
|
| 147 |
- SHRQ $51,DX |
|
| 148 |
- ANDQ SI,R8 |
|
| 149 |
- ADDQ R12,DX |
|
| 150 |
- MOVQ DX,R9 |
|
| 151 |
- SHRQ $51,DX |
|
| 152 |
- ANDQ SI,CX |
|
| 153 |
- ADDQ R14,DX |
|
| 154 |
- MOVQ DX,AX |
|
| 155 |
- SHRQ $51,DX |
|
| 156 |
- ANDQ SI,R9 |
|
| 157 |
- ADDQ BX,DX |
|
| 158 |
- MOVQ DX,R10 |
|
| 159 |
- SHRQ $51,DX |
|
| 160 |
- ANDQ SI,AX |
|
| 161 |
- IMUL3Q $19,DX,DX |
|
| 162 |
- ADDQ DX,R8 |
|
| 163 |
- ANDQ SI,R10 |
|
| 164 |
- MOVQ R8,0(DI) |
|
| 165 |
- MOVQ CX,8(DI) |
|
| 166 |
- MOVQ R9,16(DI) |
|
| 167 |
- MOVQ AX,24(DI) |
|
| 168 |
- MOVQ R10,32(DI) |
|
| 169 |
- RET |
| 170 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,132 +0,0 @@ |
| 1 |
-// Copyright 2012 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// This code was translated into a form compatible with 6a from the public |
|
| 6 |
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html |
|
| 7 |
- |
|
| 8 |
-// +build amd64,!gccgo,!appengine |
|
| 9 |
- |
|
| 10 |
-#include "const_amd64.h" |
|
| 11 |
- |
|
| 12 |
-// func square(out, in *[5]uint64) |
|
| 13 |
-TEXT ·square(SB),7,$0-16 |
|
| 14 |
- MOVQ out+0(FP), DI |
|
| 15 |
- MOVQ in+8(FP), SI |
|
| 16 |
- |
|
| 17 |
- MOVQ 0(SI),AX |
|
| 18 |
- MULQ 0(SI) |
|
| 19 |
- MOVQ AX,CX |
|
| 20 |
- MOVQ DX,R8 |
|
| 21 |
- MOVQ 0(SI),AX |
|
| 22 |
- SHLQ $1,AX |
|
| 23 |
- MULQ 8(SI) |
|
| 24 |
- MOVQ AX,R9 |
|
| 25 |
- MOVQ DX,R10 |
|
| 26 |
- MOVQ 0(SI),AX |
|
| 27 |
- SHLQ $1,AX |
|
| 28 |
- MULQ 16(SI) |
|
| 29 |
- MOVQ AX,R11 |
|
| 30 |
- MOVQ DX,R12 |
|
| 31 |
- MOVQ 0(SI),AX |
|
| 32 |
- SHLQ $1,AX |
|
| 33 |
- MULQ 24(SI) |
|
| 34 |
- MOVQ AX,R13 |
|
| 35 |
- MOVQ DX,R14 |
|
| 36 |
- MOVQ 0(SI),AX |
|
| 37 |
- SHLQ $1,AX |
|
| 38 |
- MULQ 32(SI) |
|
| 39 |
- MOVQ AX,R15 |
|
| 40 |
- MOVQ DX,BX |
|
| 41 |
- MOVQ 8(SI),AX |
|
| 42 |
- MULQ 8(SI) |
|
| 43 |
- ADDQ AX,R11 |
|
| 44 |
- ADCQ DX,R12 |
|
| 45 |
- MOVQ 8(SI),AX |
|
| 46 |
- SHLQ $1,AX |
|
| 47 |
- MULQ 16(SI) |
|
| 48 |
- ADDQ AX,R13 |
|
| 49 |
- ADCQ DX,R14 |
|
| 50 |
- MOVQ 8(SI),AX |
|
| 51 |
- SHLQ $1,AX |
|
| 52 |
- MULQ 24(SI) |
|
| 53 |
- ADDQ AX,R15 |
|
| 54 |
- ADCQ DX,BX |
|
| 55 |
- MOVQ 8(SI),DX |
|
| 56 |
- IMUL3Q $38,DX,AX |
|
| 57 |
- MULQ 32(SI) |
|
| 58 |
- ADDQ AX,CX |
|
| 59 |
- ADCQ DX,R8 |
|
| 60 |
- MOVQ 16(SI),AX |
|
| 61 |
- MULQ 16(SI) |
|
| 62 |
- ADDQ AX,R15 |
|
| 63 |
- ADCQ DX,BX |
|
| 64 |
- MOVQ 16(SI),DX |
|
| 65 |
- IMUL3Q $38,DX,AX |
|
| 66 |
- MULQ 24(SI) |
|
| 67 |
- ADDQ AX,CX |
|
| 68 |
- ADCQ DX,R8 |
|
| 69 |
- MOVQ 16(SI),DX |
|
| 70 |
- IMUL3Q $38,DX,AX |
|
| 71 |
- MULQ 32(SI) |
|
| 72 |
- ADDQ AX,R9 |
|
| 73 |
- ADCQ DX,R10 |
|
| 74 |
- MOVQ 24(SI),DX |
|
| 75 |
- IMUL3Q $19,DX,AX |
|
| 76 |
- MULQ 24(SI) |
|
| 77 |
- ADDQ AX,R9 |
|
| 78 |
- ADCQ DX,R10 |
|
| 79 |
- MOVQ 24(SI),DX |
|
| 80 |
- IMUL3Q $38,DX,AX |
|
| 81 |
- MULQ 32(SI) |
|
| 82 |
- ADDQ AX,R11 |
|
| 83 |
- ADCQ DX,R12 |
|
| 84 |
- MOVQ 32(SI),DX |
|
| 85 |
- IMUL3Q $19,DX,AX |
|
| 86 |
- MULQ 32(SI) |
|
| 87 |
- ADDQ AX,R13 |
|
| 88 |
- ADCQ DX,R14 |
|
| 89 |
- MOVQ $REDMASK51,SI |
|
| 90 |
- SHLQ $13,R8:CX |
|
| 91 |
- ANDQ SI,CX |
|
| 92 |
- SHLQ $13,R10:R9 |
|
| 93 |
- ANDQ SI,R9 |
|
| 94 |
- ADDQ R8,R9 |
|
| 95 |
- SHLQ $13,R12:R11 |
|
| 96 |
- ANDQ SI,R11 |
|
| 97 |
- ADDQ R10,R11 |
|
| 98 |
- SHLQ $13,R14:R13 |
|
| 99 |
- ANDQ SI,R13 |
|
| 100 |
- ADDQ R12,R13 |
|
| 101 |
- SHLQ $13,BX:R15 |
|
| 102 |
- ANDQ SI,R15 |
|
| 103 |
- ADDQ R14,R15 |
|
| 104 |
- IMUL3Q $19,BX,DX |
|
| 105 |
- ADDQ DX,CX |
|
| 106 |
- MOVQ CX,DX |
|
| 107 |
- SHRQ $51,DX |
|
| 108 |
- ADDQ R9,DX |
|
| 109 |
- ANDQ SI,CX |
|
| 110 |
- MOVQ DX,R8 |
|
| 111 |
- SHRQ $51,DX |
|
| 112 |
- ADDQ R11,DX |
|
| 113 |
- ANDQ SI,R8 |
|
| 114 |
- MOVQ DX,R9 |
|
| 115 |
- SHRQ $51,DX |
|
| 116 |
- ADDQ R13,DX |
|
| 117 |
- ANDQ SI,R9 |
|
| 118 |
- MOVQ DX,AX |
|
| 119 |
- SHRQ $51,DX |
|
| 120 |
- ADDQ R15,DX |
|
| 121 |
- ANDQ SI,AX |
|
| 122 |
- MOVQ DX,R10 |
|
| 123 |
- SHRQ $51,DX |
|
| 124 |
- IMUL3Q $19,DX,DX |
|
| 125 |
- ADDQ DX,CX |
|
| 126 |
- ANDQ SI,R10 |
|
| 127 |
- MOVQ CX,0(DI) |
|
| 128 |
- MOVQ R8,8(DI) |
|
| 129 |
- MOVQ R9,16(DI) |
|
| 130 |
- MOVQ AX,24(DI) |
|
| 131 |
- MOVQ R10,32(DI) |
|
| 132 |
- RET |
| ... | ... |
@@ -2,6 +2,11 @@ |
| 2 | 2 |
// Use of this source code is governed by a BSD-style |
| 3 | 3 |
// license that can be found in the LICENSE file. |
| 4 | 4 |
|
| 5 |
+// In Go 1.13, the ed25519 package was promoted to the standard library as |
|
| 6 |
+// crypto/ed25519, and this package became a wrapper for the standard library one. |
|
| 7 |
+// |
|
| 8 |
+// +build !go1.13 |
|
| 9 |
+ |
|
| 5 | 10 |
// Package ed25519 implements the Ed25519 signature algorithm. See |
| 6 | 11 |
// https://ed25519.cr.yp.to/. |
| 7 | 12 |
// |
| 8 | 13 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,73 @@ |
| 0 |
+// Copyright 2019 The Go Authors. All rights reserved. |
|
| 1 |
+// Use of this source code is governed by a BSD-style |
|
| 2 |
+// license that can be found in the LICENSE file. |
|
| 3 |
+ |
|
| 4 |
+// +build go1.13 |
|
| 5 |
+ |
|
| 6 |
+// Package ed25519 implements the Ed25519 signature algorithm. See |
|
| 7 |
+// https://ed25519.cr.yp.to/. |
|
| 8 |
+// |
|
| 9 |
+// These functions are also compatible with the “Ed25519” function defined in |
|
| 10 |
+// RFC 8032. However, unlike RFC 8032's formulation, this package's private key |
|
| 11 |
+// representation includes a public key suffix to make multiple signing |
|
| 12 |
+// operations with the same key more efficient. This package refers to the RFC |
|
| 13 |
+// 8032 private key as the “seed”. |
|
| 14 |
+// |
|
| 15 |
+// Beginning with Go 1.13, the functionality of this package was moved to the |
|
| 16 |
+// standard library as crypto/ed25519. This package only acts as a compatibility |
|
| 17 |
+// wrapper. |
|
| 18 |
+package ed25519 |
|
| 19 |
+ |
|
| 20 |
+import ( |
|
| 21 |
+ "crypto/ed25519" |
|
| 22 |
+ "io" |
|
| 23 |
+) |
|
| 24 |
+ |
|
| 25 |
+const ( |
|
| 26 |
+ // PublicKeySize is the size, in bytes, of public keys as used in this package. |
|
| 27 |
+ PublicKeySize = 32 |
|
| 28 |
+ // PrivateKeySize is the size, in bytes, of private keys as used in this package. |
|
| 29 |
+ PrivateKeySize = 64 |
|
| 30 |
+ // SignatureSize is the size, in bytes, of signatures generated and verified by this package. |
|
| 31 |
+ SignatureSize = 64 |
|
| 32 |
+ // SeedSize is the size, in bytes, of private key seeds. These are the private key representations used by RFC 8032. |
|
| 33 |
+ SeedSize = 32 |
|
| 34 |
+) |
|
| 35 |
+ |
|
| 36 |
+// PublicKey is the type of Ed25519 public keys. |
|
| 37 |
+// |
|
| 38 |
+// This type is an alias for crypto/ed25519's PublicKey type. |
|
| 39 |
+// See the crypto/ed25519 package for the methods on this type. |
|
| 40 |
+type PublicKey = ed25519.PublicKey |
|
| 41 |
+ |
|
| 42 |
+// PrivateKey is the type of Ed25519 private keys. It implements crypto.Signer. |
|
| 43 |
+// |
|
| 44 |
+// This type is an alias for crypto/ed25519's PrivateKey type. |
|
| 45 |
+// See the crypto/ed25519 package for the methods on this type. |
|
| 46 |
+type PrivateKey = ed25519.PrivateKey |
|
| 47 |
+ |
|
| 48 |
+// GenerateKey generates a public/private key pair using entropy from rand. |
|
| 49 |
+// If rand is nil, crypto/rand.Reader will be used. |
|
| 50 |
+func GenerateKey(rand io.Reader) (PublicKey, PrivateKey, error) {
|
|
| 51 |
+ return ed25519.GenerateKey(rand) |
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+// NewKeyFromSeed calculates a private key from a seed. It will panic if |
|
| 55 |
+// len(seed) is not SeedSize. This function is provided for interoperability |
|
| 56 |
+// with RFC 8032. RFC 8032's private keys correspond to seeds in this |
|
| 57 |
+// package. |
|
| 58 |
+func NewKeyFromSeed(seed []byte) PrivateKey {
|
|
| 59 |
+ return ed25519.NewKeyFromSeed(seed) |
|
| 60 |
+} |
|
| 61 |
+ |
|
| 62 |
+// Sign signs the message with privateKey and returns a signature. It will |
|
| 63 |
+// panic if len(privateKey) is not PrivateKeySize. |
|
| 64 |
+func Sign(privateKey PrivateKey, message []byte) []byte {
|
|
| 65 |
+ return ed25519.Sign(privateKey, message) |
|
| 66 |
+} |
|
| 67 |
+ |
|
| 68 |
+// Verify reports whether sig is a valid signature of message by publicKey. It |
|
| 69 |
+// will panic if len(publicKey) is not PublicKeySize. |
|
| 70 |
+func Verify(publicKey PublicKey, message, sig []byte) bool {
|
|
| 71 |
+ return ed25519.Verify(publicKey, message, sig) |
|
| 72 |
+} |
| ... | ... |
@@ -252,6 +252,7 @@ func Decode(pfxData []byte, password string) (privateKey interface{}, certificat
|
| 252 | 252 |
case bag.Id.Equal(oidPKCS8ShroundedKeyBag): |
| 253 | 253 |
if privateKey != nil {
|
| 254 | 254 |
err = errors.New("pkcs12: expected exactly one key bag")
|
| 255 |
+ return nil, nil, err |
|
| 255 | 256 |
} |
| 256 | 257 |
|
| 257 | 258 |
if privateKey, err = decodePkcs8ShroudedKeyBag(bag.Value.Bytes, encodedPassword); err != nil {
|
| 258 | 259 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,39 @@ |
| 0 |
+// Copyright 2019 The Go Authors. All rights reserved. |
|
| 1 |
+// Use of this source code is governed by a BSD-style |
|
| 2 |
+// license that can be found in the LICENSE file. |
|
| 3 |
+ |
|
| 4 |
+// +build !go1.13 |
|
| 5 |
+ |
|
| 6 |
+package poly1305 |
|
| 7 |
+ |
|
| 8 |
+// Generic fallbacks for the math/bits intrinsics, copied from |
|
| 9 |
+// src/math/bits/bits.go. They were added in Go 1.12, but Add64 and Sum64 had |
|
| 10 |
+// variable time fallbacks until Go 1.13. |
|
| 11 |
+ |
|
| 12 |
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
|
|
| 13 |
+ sum = x + y + carry |
|
| 14 |
+ carryOut = ((x & y) | ((x | y) &^ sum)) >> 63 |
|
| 15 |
+ return |
|
| 16 |
+} |
|
| 17 |
+ |
|
| 18 |
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
|
|
| 19 |
+ diff = x - y - borrow |
|
| 20 |
+ borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63 |
|
| 21 |
+ return |
|
| 22 |
+} |
|
| 23 |
+ |
|
| 24 |
+func bitsMul64(x, y uint64) (hi, lo uint64) {
|
|
| 25 |
+ const mask32 = 1<<32 - 1 |
|
| 26 |
+ x0 := x & mask32 |
|
| 27 |
+ x1 := x >> 32 |
|
| 28 |
+ y0 := y & mask32 |
|
| 29 |
+ y1 := y >> 32 |
|
| 30 |
+ w0 := x0 * y0 |
|
| 31 |
+ t := x1*y0 + w0>>32 |
|
| 32 |
+ w1 := t & mask32 |
|
| 33 |
+ w2 := t >> 32 |
|
| 34 |
+ w1 += x0 * y1 |
|
| 35 |
+ hi = x1*y1 + w2 + w1>>32 |
|
| 36 |
+ lo = x * y |
|
| 37 |
+ return |
|
| 38 |
+} |
| 0 | 39 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,21 @@ |
| 0 |
+// Copyright 2019 The Go Authors. All rights reserved. |
|
| 1 |
+// Use of this source code is governed by a BSD-style |
|
| 2 |
+// license that can be found in the LICENSE file. |
|
| 3 |
+ |
|
| 4 |
+// +build go1.13 |
|
| 5 |
+ |
|
| 6 |
+package poly1305 |
|
| 7 |
+ |
|
| 8 |
+import "math/bits" |
|
| 9 |
+ |
|
| 10 |
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
|
|
| 11 |
+ return bits.Add64(x, y, carry) |
|
| 12 |
+} |
|
| 13 |
+ |
|
| 14 |
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
|
|
| 15 |
+ return bits.Sub64(x, y, borrow) |
|
| 16 |
+} |
|
| 17 |
+ |
|
| 18 |
+func bitsMul64(x, y uint64) (hi, lo uint64) {
|
|
| 19 |
+ return bits.Mul64(x, y) |
|
| 20 |
+} |
| ... | ... |
@@ -22,8 +22,14 @@ import "crypto/subtle" |
| 22 | 22 |
// TagSize is the size, in bytes, of a poly1305 authenticator. |
| 23 | 23 |
const TagSize = 16 |
| 24 | 24 |
|
| 25 |
-// Verify returns true if mac is a valid authenticator for m with the given |
|
| 26 |
-// key. |
|
| 25 |
+// Sum generates an authenticator for msg using a one-time key and puts the |
|
| 26 |
+// 16-byte result into out. Authenticating two different messages with the same |
|
| 27 |
+// key allows an attacker to forge messages at will. |
|
| 28 |
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
|
|
| 29 |
+ sum(out, m, key) |
|
| 30 |
+} |
|
| 31 |
+ |
|
| 32 |
+// Verify returns true if mac is a valid authenticator for m with the given key. |
|
| 27 | 33 |
func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
|
| 28 | 34 |
var tmp [16]byte |
| 29 | 35 |
Sum(&tmp, m, key) |
| ... | ... |
@@ -7,62 +7,52 @@ |
| 7 | 7 |
package poly1305 |
| 8 | 8 |
|
| 9 | 9 |
//go:noescape |
| 10 |
-func initialize(state *[7]uint64, key *[32]byte) |
|
| 10 |
+func update(state *macState, msg []byte) |
|
| 11 | 11 |
|
| 12 |
-//go:noescape |
|
| 13 |
-func update(state *[7]uint64, msg []byte) |
|
| 14 |
- |
|
| 15 |
-//go:noescape |
|
| 16 |
-func finalize(tag *[TagSize]byte, state *[7]uint64) |
|
| 17 |
- |
|
| 18 |
-// Sum generates an authenticator for m using a one-time key and puts the |
|
| 19 |
-// 16-byte result into out. Authenticating two different messages with the same |
|
| 20 |
-// key allows an attacker to forge messages at will. |
|
| 21 |
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
|
|
| 12 |
+func sum(out *[16]byte, m []byte, key *[32]byte) {
|
|
| 22 | 13 |
h := newMAC(key) |
| 23 | 14 |
h.Write(m) |
| 24 | 15 |
h.Sum(out) |
| 25 | 16 |
} |
| 26 | 17 |
|
| 27 | 18 |
func newMAC(key *[32]byte) (h mac) {
|
| 28 |
- initialize(&h.state, key) |
|
| 19 |
+ initialize(key, &h.r, &h.s) |
|
| 29 | 20 |
return |
| 30 | 21 |
} |
| 31 | 22 |
|
| 32 |
-type mac struct {
|
|
| 33 |
- state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
|
|
| 34 |
- |
|
| 35 |
- buffer [TagSize]byte |
|
| 36 |
- offset int |
|
| 37 |
-} |
|
| 23 |
+// mac is a wrapper for macGeneric that redirects calls that would have gone to |
|
| 24 |
+// updateGeneric to update. |
|
| 25 |
+// |
|
| 26 |
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but |
|
| 27 |
+// using function pointers would carry a major performance cost. |
|
| 28 |
+type mac struct{ macGeneric }
|
|
| 38 | 29 |
|
| 39 |
-func (h *mac) Write(p []byte) (n int, err error) {
|
|
| 40 |
- n = len(p) |
|
| 30 |
+func (h *mac) Write(p []byte) (int, error) {
|
|
| 31 |
+ nn := len(p) |
|
| 41 | 32 |
if h.offset > 0 {
|
| 42 |
- remaining := TagSize - h.offset |
|
| 43 |
- if n < remaining {
|
|
| 44 |
- h.offset += copy(h.buffer[h.offset:], p) |
|
| 45 |
- return n, nil |
|
| 33 |
+ n := copy(h.buffer[h.offset:], p) |
|
| 34 |
+ if h.offset+n < TagSize {
|
|
| 35 |
+ h.offset += n |
|
| 36 |
+ return nn, nil |
|
| 46 | 37 |
} |
| 47 |
- copy(h.buffer[h.offset:], p[:remaining]) |
|
| 48 |
- p = p[remaining:] |
|
| 38 |
+ p = p[n:] |
|
| 49 | 39 |
h.offset = 0 |
| 50 |
- update(&h.state, h.buffer[:]) |
|
| 40 |
+ update(&h.macState, h.buffer[:]) |
|
| 51 | 41 |
} |
| 52 |
- if nn := len(p) - (len(p) % TagSize); nn > 0 {
|
|
| 53 |
- update(&h.state, p[:nn]) |
|
| 54 |
- p = p[nn:] |
|
| 42 |
+ if n := len(p) - (len(p) % TagSize); n > 0 {
|
|
| 43 |
+ update(&h.macState, p[:n]) |
|
| 44 |
+ p = p[n:] |
|
| 55 | 45 |
} |
| 56 | 46 |
if len(p) > 0 {
|
| 57 | 47 |
h.offset += copy(h.buffer[h.offset:], p) |
| 58 | 48 |
} |
| 59 |
- return n, nil |
|
| 49 |
+ return nn, nil |
|
| 60 | 50 |
} |
| 61 | 51 |
|
| 62 | 52 |
func (h *mac) Sum(out *[16]byte) {
|
| 63 |
- state := h.state |
|
| 53 |
+ state := h.macState |
|
| 64 | 54 |
if h.offset > 0 {
|
| 65 | 55 |
update(&state, h.buffer[:h.offset]) |
| 66 | 56 |
} |
| 67 |
- finalize(out, &state) |
|
| 57 |
+ finalize(out, &state.h, &state.s) |
|
| 68 | 58 |
} |
| ... | ... |
@@ -54,10 +54,6 @@ |
| 54 | 54 |
ADCQ t3, h1; \ |
| 55 | 55 |
ADCQ $0, h2 |
| 56 | 56 |
|
| 57 |
-DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF |
|
| 58 |
-DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC |
|
| 59 |
-GLOBL ·poly1305Mask<>(SB), RODATA, $16 |
|
| 60 |
- |
|
| 61 | 57 |
// func update(state *[7]uint64, msg []byte) |
| 62 | 58 |
TEXT ·update(SB), $0-32 |
| 63 | 59 |
MOVQ state+0(FP), DI |
| ... | ... |
@@ -110,39 +106,3 @@ done: |
| 110 | 110 |
MOVQ R9, 8(DI) |
| 111 | 111 |
MOVQ R10, 16(DI) |
| 112 | 112 |
RET |
| 113 |
- |
|
| 114 |
-// func initialize(state *[7]uint64, key *[32]byte) |
|
| 115 |
-TEXT ·initialize(SB), $0-16 |
|
| 116 |
- MOVQ state+0(FP), DI |
|
| 117 |
- MOVQ key+8(FP), SI |
|
| 118 |
- |
|
| 119 |
- // state[0...7] is initialized with zero |
|
| 120 |
- MOVOU 0(SI), X0 |
|
| 121 |
- MOVOU 16(SI), X1 |
|
| 122 |
- MOVOU ·poly1305Mask<>(SB), X2 |
|
| 123 |
- PAND X2, X0 |
|
| 124 |
- MOVOU X0, 24(DI) |
|
| 125 |
- MOVOU X1, 40(DI) |
|
| 126 |
- RET |
|
| 127 |
- |
|
| 128 |
-// func finalize(tag *[TagSize]byte, state *[7]uint64) |
|
| 129 |
-TEXT ·finalize(SB), $0-16 |
|
| 130 |
- MOVQ tag+0(FP), DI |
|
| 131 |
- MOVQ state+8(FP), SI |
|
| 132 |
- |
|
| 133 |
- MOVQ 0(SI), AX |
|
| 134 |
- MOVQ 8(SI), BX |
|
| 135 |
- MOVQ 16(SI), CX |
|
| 136 |
- MOVQ AX, R8 |
|
| 137 |
- MOVQ BX, R9 |
|
| 138 |
- SUBQ $0xFFFFFFFFFFFFFFFB, AX |
|
| 139 |
- SBBQ $0xFFFFFFFFFFFFFFFF, BX |
|
| 140 |
- SBBQ $3, CX |
|
| 141 |
- CMOVQCS R8, AX |
|
| 142 |
- CMOVQCS R9, BX |
|
| 143 |
- ADDQ 40(SI), AX |
|
| 144 |
- ADCQ 48(SI), BX |
|
| 145 |
- |
|
| 146 |
- MOVQ AX, 0(DI) |
|
| 147 |
- MOVQ BX, 8(DI) |
|
| 148 |
- RET |
| 149 | 113 |
deleted file mode 100644 |
| ... | ... |
@@ -1,22 +0,0 @@ |
| 1 |
-// Copyright 2015 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// +build arm,!gccgo,!appengine,!nacl |
|
| 6 |
- |
|
| 7 |
-package poly1305 |
|
| 8 |
- |
|
| 9 |
-// This function is implemented in sum_arm.s |
|
| 10 |
-//go:noescape |
|
| 11 |
-func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte) |
|
| 12 |
- |
|
| 13 |
-// Sum generates an authenticator for m using a one-time key and puts the |
|
| 14 |
-// 16-byte result into out. Authenticating two different messages with the same |
|
| 15 |
-// key allows an attacker to forge messages at will. |
|
| 16 |
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
|
|
| 17 |
- var mPtr *byte |
|
| 18 |
- if len(m) > 0 {
|
|
| 19 |
- mPtr = &m[0] |
|
| 20 |
- } |
|
| 21 |
- poly1305_auth_armv6(out, mPtr, uint32(len(m)), key) |
|
| 22 |
-} |
| 23 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,427 +0,0 @@ |
| 1 |
-// Copyright 2015 The Go Authors. All rights reserved. |
|
| 2 |
-// Use of this source code is governed by a BSD-style |
|
| 3 |
-// license that can be found in the LICENSE file. |
|
| 4 |
- |
|
| 5 |
-// +build arm,!gccgo,!appengine,!nacl |
|
| 6 |
- |
|
| 7 |
-#include "textflag.h" |
|
| 8 |
- |
|
| 9 |
-// This code was translated into a form compatible with 5a from the public |
|
| 10 |
-// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305. |
|
| 11 |
- |
|
| 12 |
-DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff |
|
| 13 |
-DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03 |
|
| 14 |
-DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff |
|
| 15 |
-DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff |
|
| 16 |
-DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff |
|
| 17 |
-GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20 |
|
| 18 |
- |
|
| 19 |
-// Warning: the linker may use R11 to synthesize certain instructions. Please |
|
| 20 |
-// take care and verify that no synthetic instructions use it. |
|
| 21 |
- |
|
| 22 |
-TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0 |
|
| 23 |
- // Needs 16 bytes of stack and 64 bytes of space pointed to by R0. (It |
|
| 24 |
- // might look like it's only 60 bytes of space but the final four bytes |
|
| 25 |
- // will be written by another function.) We need to skip over four |
|
| 26 |
- // bytes of stack because that's saving the value of 'g'. |
|
| 27 |
- ADD $4, R13, R8 |
|
| 28 |
- MOVM.IB [R4-R7], (R8) |
|
| 29 |
- MOVM.IA.W (R1), [R2-R5] |
|
| 30 |
- MOVW $·poly1305_init_constants_armv6<>(SB), R7 |
|
| 31 |
- MOVW R2, R8 |
|
| 32 |
- MOVW R2>>26, R9 |
|
| 33 |
- MOVW R3>>20, g |
|
| 34 |
- MOVW R4>>14, R11 |
|
| 35 |
- MOVW R5>>8, R12 |
|
| 36 |
- ORR R3<<6, R9, R9 |
|
| 37 |
- ORR R4<<12, g, g |
|
| 38 |
- ORR R5<<18, R11, R11 |
|
| 39 |
- MOVM.IA (R7), [R2-R6] |
|
| 40 |
- AND R8, R2, R2 |
|
| 41 |
- AND R9, R3, R3 |
|
| 42 |
- AND g, R4, R4 |
|
| 43 |
- AND R11, R5, R5 |
|
| 44 |
- AND R12, R6, R6 |
|
| 45 |
- MOVM.IA.W [R2-R6], (R0) |
|
| 46 |
- EOR R2, R2, R2 |
|
| 47 |
- EOR R3, R3, R3 |
|
| 48 |
- EOR R4, R4, R4 |
|
| 49 |
- EOR R5, R5, R5 |
|
| 50 |
- EOR R6, R6, R6 |
|
| 51 |
- MOVM.IA.W [R2-R6], (R0) |
|
| 52 |
- MOVM.IA.W (R1), [R2-R5] |
|
| 53 |
- MOVM.IA [R2-R6], (R0) |
|
| 54 |
- ADD $20, R13, R0 |
|
| 55 |
- MOVM.DA (R0), [R4-R7] |
|
| 56 |
- RET |
|
| 57 |
- |
|
| 58 |
-#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \ |
|
| 59 |
- MOVBU (offset+0)(Rsrc), Rtmp; \ |
|
| 60 |
- MOVBU Rtmp, (offset+0)(Rdst); \ |
|
| 61 |
- MOVBU (offset+1)(Rsrc), Rtmp; \ |
|
| 62 |
- MOVBU Rtmp, (offset+1)(Rdst); \ |
|
| 63 |
- MOVBU (offset+2)(Rsrc), Rtmp; \ |
|
| 64 |
- MOVBU Rtmp, (offset+2)(Rdst); \ |
|
| 65 |
- MOVBU (offset+3)(Rsrc), Rtmp; \ |
|
| 66 |
- MOVBU Rtmp, (offset+3)(Rdst) |
|
| 67 |
- |
|
| 68 |
-TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0 |
|
| 69 |
- // Needs 24 bytes of stack for saved registers and then 88 bytes of |
|
| 70 |
- // scratch space after that. We assume that 24 bytes at (R13) have |
|
| 71 |
- // already been used: four bytes for the link register saved in the |
|
| 72 |
- // prelude of poly1305_auth_armv6, four bytes for saving the value of g |
|
| 73 |
- // in that function and 16 bytes of scratch space used around |
|
| 74 |
- // poly1305_finish_ext_armv6_skip1. |
|
| 75 |
- ADD $24, R13, R12 |
|
| 76 |
- MOVM.IB [R4-R8, R14], (R12) |
|
| 77 |
- MOVW R0, 88(R13) |
|
| 78 |
- MOVW R1, 92(R13) |
|
| 79 |
- MOVW R2, 96(R13) |
|
| 80 |
- MOVW R1, R14 |
|
| 81 |
- MOVW R2, R12 |
|
| 82 |
- MOVW 56(R0), R8 |
|
| 83 |
- WORD $0xe1180008 // TST R8, R8 not working see issue 5921 |
|
| 84 |
- EOR R6, R6, R6 |
|
| 85 |
- MOVW.EQ $(1<<24), R6 |
|
| 86 |
- MOVW R6, 84(R13) |
|
| 87 |
- ADD $116, R13, g |
|
| 88 |
- MOVM.IA (R0), [R0-R9] |
|
| 89 |
- MOVM.IA [R0-R4], (g) |
|
| 90 |
- CMP $16, R12 |
|
| 91 |
- BLO poly1305_blocks_armv6_done |
|
| 92 |
- |
|
| 93 |
-poly1305_blocks_armv6_mainloop: |
|
| 94 |
- WORD $0xe31e0003 // TST R14, #3 not working see issue 5921 |
|
| 95 |
- BEQ poly1305_blocks_armv6_mainloop_aligned |
|
| 96 |
- ADD $100, R13, g |
|
| 97 |
- MOVW_UNALIGNED(R14, g, R0, 0) |
|
| 98 |
- MOVW_UNALIGNED(R14, g, R0, 4) |
|
| 99 |
- MOVW_UNALIGNED(R14, g, R0, 8) |
|
| 100 |
- MOVW_UNALIGNED(R14, g, R0, 12) |
|
| 101 |
- MOVM.IA (g), [R0-R3] |
|
| 102 |
- ADD $16, R14 |
|
| 103 |
- B poly1305_blocks_armv6_mainloop_loaded |
|
| 104 |
- |
|
| 105 |
-poly1305_blocks_armv6_mainloop_aligned: |
|
| 106 |
- MOVM.IA.W (R14), [R0-R3] |
|
| 107 |
- |
|
| 108 |
-poly1305_blocks_armv6_mainloop_loaded: |
|
| 109 |
- MOVW R0>>26, g |
|
| 110 |
- MOVW R1>>20, R11 |
|
| 111 |
- MOVW R2>>14, R12 |
|
| 112 |
- MOVW R14, 92(R13) |
|
| 113 |
- MOVW R3>>8, R4 |
|
| 114 |
- ORR R1<<6, g, g |
|
| 115 |
- ORR R2<<12, R11, R11 |
|
| 116 |
- ORR R3<<18, R12, R12 |
|
| 117 |
- BIC $0xfc000000, R0, R0 |
|
| 118 |
- BIC $0xfc000000, g, g |
|
| 119 |
- MOVW 84(R13), R3 |
|
| 120 |
- BIC $0xfc000000, R11, R11 |
|
| 121 |
- BIC $0xfc000000, R12, R12 |
|
| 122 |
- ADD R0, R5, R5 |
|
| 123 |
- ADD g, R6, R6 |
|
| 124 |
- ORR R3, R4, R4 |
|
| 125 |
- ADD R11, R7, R7 |
|
| 126 |
- ADD $116, R13, R14 |
|
| 127 |
- ADD R12, R8, R8 |
|
| 128 |
- ADD R4, R9, R9 |
|
| 129 |
- MOVM.IA (R14), [R0-R4] |
|
| 130 |
- MULLU R4, R5, (R11, g) |
|
| 131 |
- MULLU R3, R5, (R14, R12) |
|
| 132 |
- MULALU R3, R6, (R11, g) |
|
| 133 |
- MULALU R2, R6, (R14, R12) |
|
| 134 |
- MULALU R2, R7, (R11, g) |
|
| 135 |
- MULALU R1, R7, (R14, R12) |
|
| 136 |
- ADD R4<<2, R4, R4 |
|
| 137 |
- ADD R3<<2, R3, R3 |
|
| 138 |
- MULALU R1, R8, (R11, g) |
|
| 139 |
- MULALU R0, R8, (R14, R12) |
|
| 140 |
- MULALU R0, R9, (R11, g) |
|
| 141 |
- MULALU R4, R9, (R14, R12) |
|
| 142 |
- MOVW g, 76(R13) |
|
| 143 |
- MOVW R11, 80(R13) |
|
| 144 |
- MOVW R12, 68(R13) |
|
| 145 |
- MOVW R14, 72(R13) |
|
| 146 |
- MULLU R2, R5, (R11, g) |
|
| 147 |
- MULLU R1, R5, (R14, R12) |
|
| 148 |
- MULALU R1, R6, (R11, g) |
|
| 149 |
- MULALU R0, R6, (R14, R12) |
|
| 150 |
- MULALU R0, R7, (R11, g) |
|
| 151 |
- MULALU R4, R7, (R14, R12) |
|
| 152 |
- ADD R2<<2, R2, R2 |
|
| 153 |
- ADD R1<<2, R1, R1 |
|
| 154 |
- MULALU R4, R8, (R11, g) |
|
| 155 |
- MULALU R3, R8, (R14, R12) |
|
| 156 |
- MULALU R3, R9, (R11, g) |
|
| 157 |
- MULALU R2, R9, (R14, R12) |
|
| 158 |
- MOVW g, 60(R13) |
|
| 159 |
- MOVW R11, 64(R13) |
|
| 160 |
- MOVW R12, 52(R13) |
|
| 161 |
- MOVW R14, 56(R13) |
|
| 162 |
- MULLU R0, R5, (R11, g) |
|
| 163 |
- MULALU R4, R6, (R11, g) |
|
| 164 |
- MULALU R3, R7, (R11, g) |
|
| 165 |
- MULALU R2, R8, (R11, g) |
|
| 166 |
- MULALU R1, R9, (R11, g) |
|
| 167 |
- ADD $52, R13, R0 |
|
| 168 |
- MOVM.IA (R0), [R0-R7] |
|
| 169 |
- MOVW g>>26, R12 |
|
| 170 |
- MOVW R4>>26, R14 |
|
| 171 |
- ORR R11<<6, R12, R12 |
|
| 172 |
- ORR R5<<6, R14, R14 |
|
| 173 |
- BIC $0xfc000000, g, g |
|
| 174 |
- BIC $0xfc000000, R4, R4 |
|
| 175 |
- ADD.S R12, R0, R0 |
|
| 176 |
- ADC $0, R1, R1 |
|
| 177 |
- ADD.S R14, R6, R6 |
|
| 178 |
- ADC $0, R7, R7 |
|
| 179 |
- MOVW R0>>26, R12 |
|
| 180 |
- MOVW R6>>26, R14 |
|
| 181 |
- ORR R1<<6, R12, R12 |
|
| 182 |
- ORR R7<<6, R14, R14 |
|
| 183 |
- BIC $0xfc000000, R0, R0 |
|
| 184 |
- BIC $0xfc000000, R6, R6 |
|
| 185 |
- ADD R14<<2, R14, R14 |
|
| 186 |
- ADD.S R12, R2, R2 |
|
| 187 |
- ADC $0, R3, R3 |
|
| 188 |
- ADD R14, g, g |
|
| 189 |
- MOVW R2>>26, R12 |
|
| 190 |
- MOVW g>>26, R14 |
|
| 191 |
- ORR R3<<6, R12, R12 |
|
| 192 |
- BIC $0xfc000000, g, R5 |
|
| 193 |
- BIC $0xfc000000, R2, R7 |
|
| 194 |
- ADD R12, R4, R4 |
|
| 195 |
- ADD R14, R0, R0 |
|
| 196 |
- MOVW R4>>26, R12 |
|
| 197 |
- BIC $0xfc000000, R4, R8 |
|
| 198 |
- ADD R12, R6, R9 |
|
| 199 |
- MOVW 96(R13), R12 |
|
| 200 |
- MOVW 92(R13), R14 |
|
| 201 |
- MOVW R0, R6 |
|
| 202 |
- CMP $32, R12 |
|
| 203 |
- SUB $16, R12, R12 |
|
| 204 |
- MOVW R12, 96(R13) |
|
| 205 |
- BHS poly1305_blocks_armv6_mainloop |
|
| 206 |
- |
|
| 207 |
-poly1305_blocks_armv6_done: |
|
| 208 |
- MOVW 88(R13), R12 |
|
| 209 |
- MOVW R5, 20(R12) |
|
| 210 |
- MOVW R6, 24(R12) |
|
| 211 |
- MOVW R7, 28(R12) |
|
| 212 |
- MOVW R8, 32(R12) |
|
| 213 |
- MOVW R9, 36(R12) |
|
| 214 |
- ADD $48, R13, R0 |
|
| 215 |
- MOVM.DA (R0), [R4-R8, R14] |
|
| 216 |
- RET |
|
| 217 |
- |
|
| 218 |
-#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \ |
|
| 219 |
- MOVBU.P 1(Rsrc), Rtmp; \ |
|
| 220 |
- MOVBU.P Rtmp, 1(Rdst); \ |
|
| 221 |
- MOVBU.P 1(Rsrc), Rtmp; \ |
|
| 222 |
- MOVBU.P Rtmp, 1(Rdst) |
|
| 223 |
- |
|
| 224 |
-#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \ |
|
| 225 |
- MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \ |
|
| 226 |
- MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) |
|
| 227 |
- |
|
| 228 |
-// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key) |
|
| 229 |
-TEXT ·poly1305_auth_armv6(SB), $196-16 |
|
| 230 |
- // The value 196, just above, is the sum of 64 (the size of the context |
|
| 231 |
- // structure) and 132 (the amount of stack needed). |
|
| 232 |
- // |
|
| 233 |
- // At this point, the stack pointer (R13) has been moved down. It |
|
| 234 |
- // points to the saved link register and there's 196 bytes of free |
|
| 235 |
- // space above it. |
|
| 236 |
- // |
|
| 237 |
- // The stack for this function looks like: |
|
| 238 |
- // |
|
| 239 |
- // +--------------------- |
|
| 240 |
- // | |
|
| 241 |
- // | 64 bytes of context structure |
|
| 242 |
- // | |
|
| 243 |
- // +--------------------- |
|
| 244 |
- // | |
|
| 245 |
- // | 112 bytes for poly1305_blocks_armv6 |
|
| 246 |
- // | |
|
| 247 |
- // +--------------------- |
|
| 248 |
- // | 16 bytes of final block, constructed at |
|
| 249 |
- // | poly1305_finish_ext_armv6_skip8 |
|
| 250 |
- // +--------------------- |
|
| 251 |
- // | four bytes of saved 'g' |
|
| 252 |
- // +--------------------- |
|
| 253 |
- // | lr, saved by prelude <- R13 points here |
|
| 254 |
- // +--------------------- |
|
| 255 |
- MOVW g, 4(R13) |
|
| 256 |
- |
|
| 257 |
- MOVW out+0(FP), R4 |
|
| 258 |
- MOVW m+4(FP), R5 |
|
| 259 |
- MOVW mlen+8(FP), R6 |
|
| 260 |
- MOVW key+12(FP), R7 |
|
| 261 |
- |
|
| 262 |
- ADD $136, R13, R0 // 136 = 4 + 4 + 16 + 112 |
|
| 263 |
- MOVW R7, R1 |
|
| 264 |
- |
|
| 265 |
- // poly1305_init_ext_armv6 will write to the stack from R13+4, but |
|
| 266 |
- // that's ok because none of the other values have been written yet. |
|
| 267 |
- BL poly1305_init_ext_armv6<>(SB) |
|
| 268 |
- BIC.S $15, R6, R2 |
|
| 269 |
- BEQ poly1305_auth_armv6_noblocks |
|
| 270 |
- ADD $136, R13, R0 |
|
| 271 |
- MOVW R5, R1 |
|
| 272 |
- ADD R2, R5, R5 |
|
| 273 |
- SUB R2, R6, R6 |
|
| 274 |
- BL poly1305_blocks_armv6<>(SB) |
|
| 275 |
- |
|
| 276 |
-poly1305_auth_armv6_noblocks: |
|
| 277 |
- ADD $136, R13, R0 |
|
| 278 |
- MOVW R5, R1 |
|
| 279 |
- MOVW R6, R2 |
|
| 280 |
- MOVW R4, R3 |
|
| 281 |
- |
|
| 282 |
- MOVW R0, R5 |
|
| 283 |
- MOVW R1, R6 |
|
| 284 |
- MOVW R2, R7 |
|
| 285 |
- MOVW R3, R8 |
|
| 286 |
- AND.S R2, R2, R2 |
|
| 287 |
- BEQ poly1305_finish_ext_armv6_noremaining |
|
| 288 |
- EOR R0, R0 |
|
| 289 |
- ADD $8, R13, R9 // 8 = offset to 16 byte scratch space |
|
| 290 |
- MOVW R0, (R9) |
|
| 291 |
- MOVW R0, 4(R9) |
|
| 292 |
- MOVW R0, 8(R9) |
|
| 293 |
- MOVW R0, 12(R9) |
|
| 294 |
- WORD $0xe3110003 // TST R1, #3 not working see issue 5921 |
|
| 295 |
- BEQ poly1305_finish_ext_armv6_aligned |
|
| 296 |
- WORD $0xe3120008 // TST R2, #8 not working see issue 5921 |
|
| 297 |
- BEQ poly1305_finish_ext_armv6_skip8 |
|
| 298 |
- MOVWP_UNALIGNED(R1, R9, g) |
|
| 299 |
- MOVWP_UNALIGNED(R1, R9, g) |
|
| 300 |
- |
|
| 301 |
-poly1305_finish_ext_armv6_skip8: |
|
| 302 |
- WORD $0xe3120004 // TST $4, R2 not working see issue 5921 |
|
| 303 |
- BEQ poly1305_finish_ext_armv6_skip4 |
|
| 304 |
- MOVWP_UNALIGNED(R1, R9, g) |
|
| 305 |
- |
|
| 306 |
-poly1305_finish_ext_armv6_skip4: |
|
| 307 |
- WORD $0xe3120002 // TST $2, R2 not working see issue 5921 |
|
| 308 |
- BEQ poly1305_finish_ext_armv6_skip2 |
|
| 309 |
- MOVHUP_UNALIGNED(R1, R9, g) |
|
| 310 |
- B poly1305_finish_ext_armv6_skip2 |
|
| 311 |
- |
|
| 312 |
-poly1305_finish_ext_armv6_aligned: |
|
| 313 |
- WORD $0xe3120008 // TST R2, #8 not working see issue 5921 |
|
| 314 |
- BEQ poly1305_finish_ext_armv6_skip8_aligned |
|
| 315 |
- MOVM.IA.W (R1), [g-R11] |
|
| 316 |
- MOVM.IA.W [g-R11], (R9) |
|
| 317 |
- |
|
| 318 |
-poly1305_finish_ext_armv6_skip8_aligned: |
|
| 319 |
- WORD $0xe3120004 // TST $4, R2 not working see issue 5921 |
|
| 320 |
- BEQ poly1305_finish_ext_armv6_skip4_aligned |
|
| 321 |
- MOVW.P 4(R1), g |
|
| 322 |
- MOVW.P g, 4(R9) |
|
| 323 |
- |
|
| 324 |
-poly1305_finish_ext_armv6_skip4_aligned: |
|
| 325 |
- WORD $0xe3120002 // TST $2, R2 not working see issue 5921 |
|
| 326 |
- BEQ poly1305_finish_ext_armv6_skip2 |
|
| 327 |
- MOVHU.P 2(R1), g |
|
| 328 |
- MOVH.P g, 2(R9) |
|
| 329 |
- |
|
| 330 |
-poly1305_finish_ext_armv6_skip2: |
|
| 331 |
- WORD $0xe3120001 // TST $1, R2 not working see issue 5921 |
|
| 332 |
- BEQ poly1305_finish_ext_armv6_skip1 |
|
| 333 |
- MOVBU.P 1(R1), g |
|
| 334 |
- MOVBU.P g, 1(R9) |
|
| 335 |
- |
|
| 336 |
-poly1305_finish_ext_armv6_skip1: |
|
| 337 |
- MOVW $1, R11 |
|
| 338 |
- MOVBU R11, 0(R9) |
|
| 339 |
- MOVW R11, 56(R5) |
|
| 340 |
- MOVW R5, R0 |
|
| 341 |
- ADD $8, R13, R1 |
|
| 342 |
- MOVW $16, R2 |
|
| 343 |
- BL poly1305_blocks_armv6<>(SB) |
|
| 344 |
- |
|
| 345 |
-poly1305_finish_ext_armv6_noremaining: |
|
| 346 |
- MOVW 20(R5), R0 |
|
| 347 |
- MOVW 24(R5), R1 |
|
| 348 |
- MOVW 28(R5), R2 |
|
| 349 |
- MOVW 32(R5), R3 |
|
| 350 |
- MOVW 36(R5), R4 |
|
| 351 |
- MOVW R4>>26, R12 |
|
| 352 |
- BIC $0xfc000000, R4, R4 |
|
| 353 |
- ADD R12<<2, R12, R12 |
|
| 354 |
- ADD R12, R0, R0 |
|
| 355 |
- MOVW R0>>26, R12 |
|
| 356 |
- BIC $0xfc000000, R0, R0 |
|
| 357 |
- ADD R12, R1, R1 |
|
| 358 |
- MOVW R1>>26, R12 |
|
| 359 |
- BIC $0xfc000000, R1, R1 |
|
| 360 |
- ADD R12, R2, R2 |
|
| 361 |
- MOVW R2>>26, R12 |
|
| 362 |
- BIC $0xfc000000, R2, R2 |
|
| 363 |
- ADD R12, R3, R3 |
|
| 364 |
- MOVW R3>>26, R12 |
|
| 365 |
- BIC $0xfc000000, R3, R3 |
|
| 366 |
- ADD R12, R4, R4 |
|
| 367 |
- ADD $5, R0, R6 |
|
| 368 |
- MOVW R6>>26, R12 |
|
| 369 |
- BIC $0xfc000000, R6, R6 |
|
| 370 |
- ADD R12, R1, R7 |
|
| 371 |
- MOVW R7>>26, R12 |
|
| 372 |
- BIC $0xfc000000, R7, R7 |
|
| 373 |
- ADD R12, R2, g |
|
| 374 |
- MOVW g>>26, R12 |
|
| 375 |
- BIC $0xfc000000, g, g |
|
| 376 |
- ADD R12, R3, R11 |
|
| 377 |
- MOVW $-(1<<26), R12 |
|
| 378 |
- ADD R11>>26, R12, R12 |
|
| 379 |
- BIC $0xfc000000, R11, R11 |
|
| 380 |
- ADD R12, R4, R9 |
|
| 381 |
- MOVW R9>>31, R12 |
|
| 382 |
- SUB $1, R12 |
|
| 383 |
- AND R12, R6, R6 |
|
| 384 |
- AND R12, R7, R7 |
|
| 385 |
- AND R12, g, g |
|
| 386 |
- AND R12, R11, R11 |
|
| 387 |
- AND R12, R9, R9 |
|
| 388 |
- MVN R12, R12 |
|
| 389 |
- AND R12, R0, R0 |
|
| 390 |
- AND R12, R1, R1 |
|
| 391 |
- AND R12, R2, R2 |
|
| 392 |
- AND R12, R3, R3 |
|
| 393 |
- AND R12, R4, R4 |
|
| 394 |
- ORR R6, R0, R0 |
|
| 395 |
- ORR R7, R1, R1 |
|
| 396 |
- ORR g, R2, R2 |
|
| 397 |
- ORR R11, R3, R3 |
|
| 398 |
- ORR R9, R4, R4 |
|
| 399 |
- ORR R1<<26, R0, R0 |
|
| 400 |
- MOVW R1>>6, R1 |
|
| 401 |
- ORR R2<<20, R1, R1 |
|
| 402 |
- MOVW R2>>12, R2 |
|
| 403 |
- ORR R3<<14, R2, R2 |
|
| 404 |
- MOVW R3>>18, R3 |
|
| 405 |
- ORR R4<<8, R3, R3 |
|
| 406 |
- MOVW 40(R5), R6 |
|
| 407 |
- MOVW 44(R5), R7 |
|
| 408 |
- MOVW 48(R5), g |
|
| 409 |
- MOVW 52(R5), R11 |
|
| 410 |
- ADD.S R6, R0, R0 |
|
| 411 |
- ADC.S R7, R1, R1 |
|
| 412 |
- ADC.S g, R2, R2 |
|
| 413 |
- ADC.S R11, R3, R3 |
|
| 414 |
- MOVM.IA [R0-R3], (R8) |
|
| 415 |
- MOVW R5, R12 |
|
| 416 |
- EOR R0, R0, R0 |
|
| 417 |
- EOR R1, R1, R1 |
|
| 418 |
- EOR R2, R2, R2 |
|
| 419 |
- EOR R3, R3, R3 |
|
| 420 |
- EOR R4, R4, R4 |
|
| 421 |
- EOR R5, R5, R5 |
|
| 422 |
- EOR R6, R6, R6 |
|
| 423 |
- EOR R7, R7, R7 |
|
| 424 |
- MOVM.IA.W [R0-R7], (R12) |
|
| 425 |
- MOVM.IA [R0-R7], (R12) |
|
| 426 |
- MOVW 4(R13), g |
|
| 427 |
- RET |
| ... | ... |
@@ -2,18 +2,29 @@ |
| 2 | 2 |
// Use of this source code is governed by a BSD-style |
| 3 | 3 |
// license that can be found in the LICENSE file. |
| 4 | 4 |
|
| 5 |
+// This file provides the generic implementation of Sum and MAC. Other files |
|
| 6 |
+// might provide optimized assembly implementations of some of this code. |
|
| 7 |
+ |
|
| 5 | 8 |
package poly1305 |
| 6 | 9 |
|
| 7 | 10 |
import "encoding/binary" |
| 8 | 11 |
|
| 9 |
-const ( |
|
| 10 |
- msgBlock = uint32(1 << 24) |
|
| 11 |
- finalBlock = uint32(0) |
|
| 12 |
-) |
|
| 12 |
+// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag |
|
| 13 |
+// for a 64 bytes message is approximately |
|
| 14 |
+// |
|
| 15 |
+// s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r mod 2¹³⁰ - 5 |
|
| 16 |
+// |
|
| 17 |
+// for some secret r and s. It can be computed sequentially like |
|
| 18 |
+// |
|
| 19 |
+// for len(msg) > 0: |
|
| 20 |
+// h += read(msg, 16) |
|
| 21 |
+// h *= r |
|
| 22 |
+// h %= 2¹³⁰ - 5 |
|
| 23 |
+// return h + s |
|
| 24 |
+// |
|
| 25 |
+// All the complexity is about doing performant constant-time math on numbers |
|
| 26 |
+// larger than any available numeric type. |
|
| 13 | 27 |
|
| 14 |
-// sumGeneric generates an authenticator for msg using a one-time key and |
|
| 15 |
-// puts the 16-byte result into out. This is the generic implementation of |
|
| 16 |
-// Sum and should be called if no assembly implementation is available. |
|
| 17 | 28 |
func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
|
| 18 | 29 |
h := newMACGeneric(key) |
| 19 | 30 |
h.Write(msg) |
| ... | ... |
@@ -21,152 +32,276 @@ func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
|
| 21 | 21 |
} |
| 22 | 22 |
|
| 23 | 23 |
func newMACGeneric(key *[32]byte) (h macGeneric) {
|
| 24 |
- h.r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff |
|
| 25 |
- h.r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03 |
|
| 26 |
- h.r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff |
|
| 27 |
- h.r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff |
|
| 28 |
- h.r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff |
|
| 29 |
- |
|
| 30 |
- h.s[0] = binary.LittleEndian.Uint32(key[16:]) |
|
| 31 |
- h.s[1] = binary.LittleEndian.Uint32(key[20:]) |
|
| 32 |
- h.s[2] = binary.LittleEndian.Uint32(key[24:]) |
|
| 33 |
- h.s[3] = binary.LittleEndian.Uint32(key[28:]) |
|
| 24 |
+ initialize(key, &h.r, &h.s) |
|
| 34 | 25 |
return |
| 35 | 26 |
} |
| 36 | 27 |
|
| 28 |
+// macState holds numbers in saturated 64-bit little-endian limbs. That is, |
|
| 29 |
+// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸. |
|
| 30 |
+type macState struct {
|
|
| 31 |
+ // h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but |
|
| 32 |
+ // can grow larger during and after rounds. |
|
| 33 |
+ h [3]uint64 |
|
| 34 |
+ // r and s are the private key components. |
|
| 35 |
+ r [2]uint64 |
|
| 36 |
+ s [2]uint64 |
|
| 37 |
+} |
|
| 38 |
+ |
|
| 37 | 39 |
type macGeneric struct {
|
| 38 |
- h, r [5]uint32 |
|
| 39 |
- s [4]uint32 |
|
| 40 |
+ macState |
|
| 40 | 41 |
|
| 41 | 42 |
buffer [TagSize]byte |
| 42 | 43 |
offset int |
| 43 | 44 |
} |
| 44 | 45 |
|
| 45 |
-func (h *macGeneric) Write(p []byte) (n int, err error) {
|
|
| 46 |
- n = len(p) |
|
| 46 |
+// Write splits the incoming message into TagSize chunks, and passes them to |
|
| 47 |
+// update. It buffers incomplete chunks. |
|
| 48 |
+func (h *macGeneric) Write(p []byte) (int, error) {
|
|
| 49 |
+ nn := len(p) |
|
| 47 | 50 |
if h.offset > 0 {
|
| 48 |
- remaining := TagSize - h.offset |
|
| 49 |
- if n < remaining {
|
|
| 50 |
- h.offset += copy(h.buffer[h.offset:], p) |
|
| 51 |
- return n, nil |
|
| 51 |
+ n := copy(h.buffer[h.offset:], p) |
|
| 52 |
+ if h.offset+n < TagSize {
|
|
| 53 |
+ h.offset += n |
|
| 54 |
+ return nn, nil |
|
| 52 | 55 |
} |
| 53 |
- copy(h.buffer[h.offset:], p[:remaining]) |
|
| 54 |
- p = p[remaining:] |
|
| 56 |
+ p = p[n:] |
|
| 55 | 57 |
h.offset = 0 |
| 56 |
- updateGeneric(h.buffer[:], msgBlock, &(h.h), &(h.r)) |
|
| 58 |
+ updateGeneric(&h.macState, h.buffer[:]) |
|
| 57 | 59 |
} |
| 58 |
- if nn := len(p) - (len(p) % TagSize); nn > 0 {
|
|
| 59 |
- updateGeneric(p, msgBlock, &(h.h), &(h.r)) |
|
| 60 |
- p = p[nn:] |
|
| 60 |
+ if n := len(p) - (len(p) % TagSize); n > 0 {
|
|
| 61 |
+ updateGeneric(&h.macState, p[:n]) |
|
| 62 |
+ p = p[n:] |
|
| 61 | 63 |
} |
| 62 | 64 |
if len(p) > 0 {
|
| 63 | 65 |
h.offset += copy(h.buffer[h.offset:], p) |
| 64 | 66 |
} |
| 65 |
- return n, nil |
|
| 67 |
+ return nn, nil |
|
| 66 | 68 |
} |
| 67 | 69 |
|
| 68 |
-func (h *macGeneric) Sum(out *[16]byte) {
|
|
| 69 |
- H, R := h.h, h.r |
|
| 70 |
+// Sum flushes the last incomplete chunk from the buffer, if any, and generates |
|
| 71 |
+// the MAC output. It does not modify its state, in order to allow for multiple |
|
| 72 |
+// calls to Sum, even if no Write is allowed after Sum. |
|
| 73 |
+func (h *macGeneric) Sum(out *[TagSize]byte) {
|
|
| 74 |
+ state := h.macState |
|
| 70 | 75 |
if h.offset > 0 {
|
| 71 |
- var buffer [TagSize]byte |
|
| 72 |
- copy(buffer[:], h.buffer[:h.offset]) |
|
| 73 |
- buffer[h.offset] = 1 // invariant: h.offset < TagSize |
|
| 74 |
- updateGeneric(buffer[:], finalBlock, &H, &R) |
|
| 76 |
+ updateGeneric(&state, h.buffer[:h.offset]) |
|
| 75 | 77 |
} |
| 76 |
- finalizeGeneric(out, &H, &(h.s)) |
|
| 78 |
+ finalize(out, &state.h, &state.s) |
|
| 79 |
+} |
|
| 80 |
+ |
|
| 81 |
+// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It |
|
| 82 |
+// clears some bits of the secret coefficient to make it possible to implement |
|
| 83 |
+// multiplication more efficiently. |
|
| 84 |
+const ( |
|
| 85 |
+ rMask0 = 0x0FFFFFFC0FFFFFFF |
|
| 86 |
+ rMask1 = 0x0FFFFFFC0FFFFFFC |
|
| 87 |
+) |
|
| 88 |
+ |
|
| 89 |
+func initialize(key *[32]byte, r, s *[2]uint64) {
|
|
| 90 |
+ r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0 |
|
| 91 |
+ r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1 |
|
| 92 |
+ s[0] = binary.LittleEndian.Uint64(key[16:24]) |
|
| 93 |
+ s[1] = binary.LittleEndian.Uint64(key[24:32]) |
|
| 94 |
+} |
|
| 95 |
+ |
|
| 96 |
+// uint128 holds a 128-bit number as two 64-bit limbs, for use with the |
|
| 97 |
+// bits.Mul64 and bits.Add64 intrinsics. |
|
| 98 |
+type uint128 struct {
|
|
| 99 |
+ lo, hi uint64 |
|
| 100 |
+} |
|
| 101 |
+ |
|
| 102 |
+func mul64(a, b uint64) uint128 {
|
|
| 103 |
+ hi, lo := bitsMul64(a, b) |
|
| 104 |
+ return uint128{lo, hi}
|
|
| 77 | 105 |
} |
| 78 | 106 |
|
| 79 |
-func updateGeneric(msg []byte, flag uint32, h, r *[5]uint32) {
|
|
| 80 |
- h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4] |
|
| 81 |
- r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4]) |
|
| 82 |
- R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5 |
|
| 83 |
- |
|
| 84 |
- for len(msg) >= TagSize {
|
|
| 85 |
- // h += msg |
|
| 86 |
- h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff |
|
| 87 |
- h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff |
|
| 88 |
- h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff |
|
| 89 |
- h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff |
|
| 90 |
- h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag |
|
| 91 |
- |
|
| 92 |
- // h *= r |
|
| 93 |
- d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1) |
|
| 94 |
- d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2) |
|
| 95 |
- d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3) |
|
| 96 |
- d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4) |
|
| 97 |
- d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0) |
|
| 98 |
- |
|
| 99 |
- // h %= p |
|
| 100 |
- h0 = uint32(d0) & 0x3ffffff |
|
| 101 |
- h1 = uint32(d1) & 0x3ffffff |
|
| 102 |
- h2 = uint32(d2) & 0x3ffffff |
|
| 103 |
- h3 = uint32(d3) & 0x3ffffff |
|
| 104 |
- h4 = uint32(d4) & 0x3ffffff |
|
| 105 |
- |
|
| 106 |
- h0 += uint32(d4>>26) * 5 |
|
| 107 |
- h1 += h0 >> 26 |
|
| 108 |
- h0 = h0 & 0x3ffffff |
|
| 109 |
- |
|
| 110 |
- msg = msg[TagSize:] |
|
| 107 |
+func add128(a, b uint128) uint128 {
|
|
| 108 |
+ lo, c := bitsAdd64(a.lo, b.lo, 0) |
|
| 109 |
+ hi, c := bitsAdd64(a.hi, b.hi, c) |
|
| 110 |
+ if c != 0 {
|
|
| 111 |
+ panic("poly1305: unexpected overflow")
|
|
| 111 | 112 |
} |
| 113 |
+ return uint128{lo, hi}
|
|
| 114 |
+} |
|
| 112 | 115 |
|
| 113 |
- h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4 |
|
| 116 |
+func shiftRightBy2(a uint128) uint128 {
|
|
| 117 |
+ a.lo = a.lo>>2 | (a.hi&3)<<62 |
|
| 118 |
+ a.hi = a.hi >> 2 |
|
| 119 |
+ return a |
|
| 114 | 120 |
} |
| 115 | 121 |
|
| 116 |
-func finalizeGeneric(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
|
|
| 117 |
- h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4] |
|
| 118 |
- |
|
| 119 |
- // h %= p reduction |
|
| 120 |
- h2 += h1 >> 26 |
|
| 121 |
- h1 &= 0x3ffffff |
|
| 122 |
- h3 += h2 >> 26 |
|
| 123 |
- h2 &= 0x3ffffff |
|
| 124 |
- h4 += h3 >> 26 |
|
| 125 |
- h3 &= 0x3ffffff |
|
| 126 |
- h0 += 5 * (h4 >> 26) |
|
| 127 |
- h4 &= 0x3ffffff |
|
| 128 |
- h1 += h0 >> 26 |
|
| 129 |
- h0 &= 0x3ffffff |
|
| 130 |
- |
|
| 131 |
- // h - p |
|
| 132 |
- t0 := h0 + 5 |
|
| 133 |
- t1 := h1 + (t0 >> 26) |
|
| 134 |
- t2 := h2 + (t1 >> 26) |
|
| 135 |
- t3 := h3 + (t2 >> 26) |
|
| 136 |
- t4 := h4 + (t3 >> 26) - (1 << 26) |
|
| 137 |
- t0 &= 0x3ffffff |
|
| 138 |
- t1 &= 0x3ffffff |
|
| 139 |
- t2 &= 0x3ffffff |
|
| 140 |
- t3 &= 0x3ffffff |
|
| 141 |
- |
|
| 142 |
- // select h if h < p else h - p |
|
| 143 |
- t_mask := (t4 >> 31) - 1 |
|
| 144 |
- h_mask := ^t_mask |
|
| 145 |
- h0 = (h0 & h_mask) | (t0 & t_mask) |
|
| 146 |
- h1 = (h1 & h_mask) | (t1 & t_mask) |
|
| 147 |
- h2 = (h2 & h_mask) | (t2 & t_mask) |
|
| 148 |
- h3 = (h3 & h_mask) | (t3 & t_mask) |
|
| 149 |
- h4 = (h4 & h_mask) | (t4 & t_mask) |
|
| 150 |
- |
|
| 151 |
- // h %= 2^128 |
|
| 152 |
- h0 |= h1 << 26 |
|
| 153 |
- h1 = ((h1 >> 6) | (h2 << 20)) |
|
| 154 |
- h2 = ((h2 >> 12) | (h3 << 14)) |
|
| 155 |
- h3 = ((h3 >> 18) | (h4 << 8)) |
|
| 156 |
- |
|
| 157 |
- // s: the s part of the key |
|
| 158 |
- // tag = (h + s) % (2^128) |
|
| 159 |
- t := uint64(h0) + uint64(s[0]) |
|
| 160 |
- h0 = uint32(t) |
|
| 161 |
- t = uint64(h1) + uint64(s[1]) + (t >> 32) |
|
| 162 |
- h1 = uint32(t) |
|
| 163 |
- t = uint64(h2) + uint64(s[2]) + (t >> 32) |
|
| 164 |
- h2 = uint32(t) |
|
| 165 |
- t = uint64(h3) + uint64(s[3]) + (t >> 32) |
|
| 166 |
- h3 = uint32(t) |
|
| 167 |
- |
|
| 168 |
- binary.LittleEndian.PutUint32(out[0:], h0) |
|
| 169 |
- binary.LittleEndian.PutUint32(out[4:], h1) |
|
| 170 |
- binary.LittleEndian.PutUint32(out[8:], h2) |
|
| 171 |
- binary.LittleEndian.PutUint32(out[12:], h3) |
|
| 122 |
+// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of |
|
| 123 |
+// 128 bits of message, it computes |
|
| 124 |
+// |
|
| 125 |
+// h₊ = (h + m) * r mod 2¹³⁰ - 5 |
|
| 126 |
+// |
|
| 127 |
+// If the msg length is not a multiple of TagSize, it assumes the last |
|
| 128 |
+// incomplete chunk is the final one. |
|
| 129 |
+func updateGeneric(state *macState, msg []byte) {
|
|
| 130 |
+ h0, h1, h2 := state.h[0], state.h[1], state.h[2] |
|
| 131 |
+ r0, r1 := state.r[0], state.r[1] |
|
| 132 |
+ |
|
| 133 |
+ for len(msg) > 0 {
|
|
| 134 |
+ var c uint64 |
|
| 135 |
+ |
|
| 136 |
+ // For the first step, h + m, we use a chain of bits.Add64 intrinsics. |
|
| 137 |
+ // The resulting value of h might exceed 2¹³⁰ - 5, but will be partially |
|
| 138 |
+ // reduced at the end of the multiplication below. |
|
| 139 |
+ // |
|
| 140 |
+ // The spec requires us to set a bit just above the message size, not to |
|
| 141 |
+ // hide leading zeroes. For full chunks, that's 1 << 128, so we can just |
|
| 142 |
+ // add 1 to the most significant (2¹²⁸) limb, h2. |
|
| 143 |
+ if len(msg) >= TagSize {
|
|
| 144 |
+ h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0) |
|
| 145 |
+ h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c) |
|
| 146 |
+ h2 += c + 1 |
|
| 147 |
+ |
|
| 148 |
+ msg = msg[TagSize:] |
|
| 149 |
+ } else {
|
|
| 150 |
+ var buf [TagSize]byte |
|
| 151 |
+ copy(buf[:], msg) |
|
| 152 |
+ buf[len(msg)] = 1 |
|
| 153 |
+ |
|
| 154 |
+ h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0) |
|
| 155 |
+ h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c) |
|
| 156 |
+ h2 += c |
|
| 157 |
+ |
|
| 158 |
+ msg = nil |
|
| 159 |
+ } |
|
| 160 |
+ |
|
| 161 |
+ // Multiplication of big number limbs is similar to elementary school |
|
| 162 |
+ // columnar multiplication. Instead of digits, there are 64-bit limbs. |
|
| 163 |
+ // |
|
| 164 |
+ // We are multiplying a 3 limbs number, h, by a 2 limbs number, r. |
|
| 165 |
+ // |
|
| 166 |
+ // h2 h1 h0 x |
|
| 167 |
+ // r1 r0 = |
|
| 168 |
+ // ---------------- |
|
| 169 |
+ // h2r0 h1r0 h0r0 <-- individual 128-bit products |
|
| 170 |
+ // + h2r1 h1r1 h0r1 |
|
| 171 |
+ // ------------------------ |
|
| 172 |
+ // m3 m2 m1 m0 <-- result in 128-bit overlapping limbs |
|
| 173 |
+ // ------------------------ |
|
| 174 |
+ // m3.hi m2.hi m1.hi m0.hi <-- carry propagation |
|
| 175 |
+ // + m3.lo m2.lo m1.lo m0.lo |
|
| 176 |
+ // ------------------------------- |
|
| 177 |
+ // t4 t3 t2 t1 t0 <-- final result in 64-bit limbs |
|
| 178 |
+ // |
|
| 179 |
+ // The main difference from pen-and-paper multiplication is that we do |
|
| 180 |
+ // carry propagation in a separate step, as if we wrote two digit sums |
|
| 181 |
+ // at first (the 128-bit limbs), and then carried the tens all at once. |
|
| 182 |
+ |
|
| 183 |
+ h0r0 := mul64(h0, r0) |
|
| 184 |
+ h1r0 := mul64(h1, r0) |
|
| 185 |
+ h2r0 := mul64(h2, r0) |
|
| 186 |
+ h0r1 := mul64(h0, r1) |
|
| 187 |
+ h1r1 := mul64(h1, r1) |
|
| 188 |
+ h2r1 := mul64(h2, r1) |
|
| 189 |
+ |
|
| 190 |
+ // Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their |
|
| 191 |
+ // top 4 bits cleared by rMask{0,1}, we know that their product is not going
|
|
| 192 |
+ // to overflow 64 bits, so we can ignore the high part of the products. |
|
| 193 |
+ // |
|
| 194 |
+ // This also means that the product doesn't have a fifth limb (t4). |
|
| 195 |
+ if h2r0.hi != 0 {
|
|
| 196 |
+ panic("poly1305: unexpected overflow")
|
|
| 197 |
+ } |
|
| 198 |
+ if h2r1.hi != 0 {
|
|
| 199 |
+ panic("poly1305: unexpected overflow")
|
|
| 200 |
+ } |
|
| 201 |
+ |
|
| 202 |
+ m0 := h0r0 |
|
| 203 |
+ m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again |
|
| 204 |
+ m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1. |
|
| 205 |
+ m3 := h2r1 |
|
| 206 |
+ |
|
| 207 |
+ t0 := m0.lo |
|
| 208 |
+ t1, c := bitsAdd64(m1.lo, m0.hi, 0) |
|
| 209 |
+ t2, c := bitsAdd64(m2.lo, m1.hi, c) |
|
| 210 |
+ t3, _ := bitsAdd64(m3.lo, m2.hi, c) |
|
| 211 |
+ |
|
| 212 |
+ // Now we have the result as 4 64-bit limbs, and we need to reduce it |
|
| 213 |
+ // modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do |
|
| 214 |
+ // a cheap partial reduction according to the reduction identity |
|
| 215 |
+ // |
|
| 216 |
+ // c * 2¹³⁰ + n = c * 5 + n mod 2¹³⁰ - 5 |
|
| 217 |
+ // |
|
| 218 |
+ // because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is |
|
| 219 |
+ // likely to be larger than 2¹³⁰ - 5, but still small enough to fit the |
|
| 220 |
+ // assumptions we make about h in the rest of the code. |
|
| 221 |
+ // |
|
| 222 |
+ // See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23 |
|
| 223 |
+ |
|
| 224 |
+ // We split the final result at the 2¹³⁰ mark into h and cc, the carry. |
|
| 225 |
+ // Note that the carry bits are effectively shifted left by 2, in other |
|
| 226 |
+ // words, cc = c * 4 for the c in the reduction identity. |
|
| 227 |
+ h0, h1, h2 = t0, t1, t2&maskLow2Bits |
|
| 228 |
+ cc := uint128{t2 & maskNotLow2Bits, t3}
|
|
| 229 |
+ |
|
| 230 |
+ // To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c. |
|
| 231 |
+ |
|
| 232 |
+ h0, c = bitsAdd64(h0, cc.lo, 0) |
|
| 233 |
+ h1, c = bitsAdd64(h1, cc.hi, c) |
|
| 234 |
+ h2 += c |
|
| 235 |
+ |
|
| 236 |
+ cc = shiftRightBy2(cc) |
|
| 237 |
+ |
|
| 238 |
+ h0, c = bitsAdd64(h0, cc.lo, 0) |
|
| 239 |
+ h1, c = bitsAdd64(h1, cc.hi, c) |
|
| 240 |
+ h2 += c |
|
| 241 |
+ |
|
| 242 |
+ // h2 is at most 3 + 1 + 1 = 5, making the whole of h at most |
|
| 243 |
+ // |
|
| 244 |
+ // 5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1 |
|
| 245 |
+ } |
|
| 246 |
+ |
|
| 247 |
+ state.h[0], state.h[1], state.h[2] = h0, h1, h2 |
|
| 248 |
+} |
|
| 249 |
+ |
|
| 250 |
+const ( |
|
| 251 |
+ maskLow2Bits uint64 = 0x0000000000000003 |
|
| 252 |
+ maskNotLow2Bits uint64 = ^maskLow2Bits |
|
| 253 |
+) |
|
| 254 |
+ |
|
| 255 |
+// select64 returns x if v == 1 and y if v == 0, in constant time. |
|
| 256 |
+func select64(v, x, y uint64) uint64 { return ^(v-1)&x | (v-1)&y }
|
|
| 257 |
+ |
|
| 258 |
+// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order. |
|
| 259 |
+const ( |
|
| 260 |
+ p0 = 0xFFFFFFFFFFFFFFFB |
|
| 261 |
+ p1 = 0xFFFFFFFFFFFFFFFF |
|
| 262 |
+ p2 = 0x0000000000000003 |
|
| 263 |
+) |
|
| 264 |
+ |
|
| 265 |
+// finalize completes the modular reduction of h and computes |
|
| 266 |
+// |
|
| 267 |
+// out = h + s mod 2¹²⁸ |
|
| 268 |
+// |
|
| 269 |
+func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
|
|
| 270 |
+ h0, h1, h2 := h[0], h[1], h[2] |
|
| 271 |
+ |
|
| 272 |
+ // After the partial reduction in updateGeneric, h might be more than |
|
| 273 |
+ // 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction |
|
| 274 |
+ // in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the |
|
| 275 |
+ // result if the subtraction underflows, and t otherwise. |
|
| 276 |
+ |
|
| 277 |
+ hMinusP0, b := bitsSub64(h0, p0, 0) |
|
| 278 |
+ hMinusP1, b := bitsSub64(h1, p1, b) |
|
| 279 |
+ _, b = bitsSub64(h2, p2, b) |
|
| 280 |
+ |
|
| 281 |
+ // h = h if h < p else h - p |
|
| 282 |
+ h0 = select64(b, h0, hMinusP0) |
|
| 283 |
+ h1 = select64(b, h1, hMinusP1) |
|
| 284 |
+ |
|
| 285 |
+ // Finally, we compute the last Poly1305 step |
|
| 286 |
+ // |
|
| 287 |
+ // tag = h + s mod 2¹²⁸ |
|
| 288 |
+ // |
|
| 289 |
+ // by just doing a wide addition with the 128 low bits of h and discarding |
|
| 290 |
+ // the overflow. |
|
| 291 |
+ h0, c := bitsAdd64(h0, s[0], 0) |
|
| 292 |
+ h1, _ = bitsAdd64(h1, s[1], c) |
|
| 293 |
+ |
|
| 294 |
+ binary.LittleEndian.PutUint64(out[0:8], h0) |
|
| 295 |
+ binary.LittleEndian.PutUint64(out[8:16], h1) |
|
| 172 | 296 |
} |
| ... | ... |
@@ -2,14 +2,11 @@ |
| 2 | 2 |
// Use of this source code is governed by a BSD-style |
| 3 | 3 |
// license that can be found in the LICENSE file. |
| 4 | 4 |
|
| 5 |
-// +build s390x,!go1.11 !arm,!amd64,!s390x gccgo appengine nacl |
|
| 5 |
+// +build s390x,!go1.11 !amd64,!s390x,!ppc64le gccgo appengine nacl |
|
| 6 | 6 |
|
| 7 | 7 |
package poly1305 |
| 8 | 8 |
|
| 9 |
-// Sum generates an authenticator for msg using a one-time key and puts the |
|
| 10 |
-// 16-byte result into out. Authenticating two different messages with the same |
|
| 11 |
-// key allows an attacker to forge messages at will. |
|
| 12 |
-func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
|
|
| 9 |
+func sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
|
|
| 13 | 10 |
h := newMAC(key) |
| 14 | 11 |
h.Write(msg) |
| 15 | 12 |
h.Sum(out) |
| 16 | 13 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,58 @@ |
| 0 |
+// Copyright 2019 The Go Authors. All rights reserved. |
|
| 1 |
+// Use of this source code is governed by a BSD-style |
|
| 2 |
+// license that can be found in the LICENSE file. |
|
| 3 |
+ |
|
| 4 |
+// +build ppc64le,!gccgo,!appengine |
|
| 5 |
+ |
|
| 6 |
+package poly1305 |
|
| 7 |
+ |
|
| 8 |
+//go:noescape |
|
| 9 |
+func update(state *macState, msg []byte) |
|
| 10 |
+ |
|
| 11 |
+func sum(out *[16]byte, m []byte, key *[32]byte) {
|
|
| 12 |
+ h := newMAC(key) |
|
| 13 |
+ h.Write(m) |
|
| 14 |
+ h.Sum(out) |
|
| 15 |
+} |
|
| 16 |
+ |
|
| 17 |
+func newMAC(key *[32]byte) (h mac) {
|
|
| 18 |
+ initialize(key, &h.r, &h.s) |
|
| 19 |
+ return |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+// mac is a wrapper for macGeneric that redirects calls that would have gone to |
|
| 23 |
+// updateGeneric to update. |
|
| 24 |
+// |
|
| 25 |
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but |
|
| 26 |
+// using function pointers would carry a major performance cost. |
|
| 27 |
+type mac struct{ macGeneric }
|
|
| 28 |
+ |
|
| 29 |
+func (h *mac) Write(p []byte) (int, error) {
|
|
| 30 |
+ nn := len(p) |
|
| 31 |
+ if h.offset > 0 {
|
|
| 32 |
+ n := copy(h.buffer[h.offset:], p) |
|
| 33 |
+ if h.offset+n < TagSize {
|
|
| 34 |
+ h.offset += n |
|
| 35 |
+ return nn, nil |
|
| 36 |
+ } |
|
| 37 |
+ p = p[n:] |
|
| 38 |
+ h.offset = 0 |
|
| 39 |
+ update(&h.macState, h.buffer[:]) |
|
| 40 |
+ } |
|
| 41 |
+ if n := len(p) - (len(p) % TagSize); n > 0 {
|
|
| 42 |
+ update(&h.macState, p[:n]) |
|
| 43 |
+ p = p[n:] |
|
| 44 |
+ } |
|
| 45 |
+ if len(p) > 0 {
|
|
| 46 |
+ h.offset += copy(h.buffer[h.offset:], p) |
|
| 47 |
+ } |
|
| 48 |
+ return nn, nil |
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+func (h *mac) Sum(out *[16]byte) {
|
|
| 52 |
+ state := h.macState |
|
| 53 |
+ if h.offset > 0 {
|
|
| 54 |
+ update(&state, h.buffer[:h.offset]) |
|
| 55 |
+ } |
|
| 56 |
+ finalize(out, &state.h, &state.s) |
|
| 57 |
+} |
| 0 | 58 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,181 @@ |
| 0 |
+// Copyright 2019 The Go Authors. All rights reserved. |
|
| 1 |
+// Use of this source code is governed by a BSD-style |
|
| 2 |
+// license that can be found in the LICENSE file. |
|
| 3 |
+ |
|
| 4 |
+// +build ppc64le,!gccgo,!appengine |
|
| 5 |
+ |
|
| 6 |
+#include "textflag.h" |
|
| 7 |
+ |
|
| 8 |
+// This was ported from the amd64 implementation. |
|
| 9 |
+ |
|
| 10 |
+#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \ |
|
| 11 |
+ MOVD (msg), t0; \ |
|
| 12 |
+ MOVD 8(msg), t1; \ |
|
| 13 |
+ MOVD $1, t2; \ |
|
| 14 |
+ ADDC t0, h0, h0; \ |
|
| 15 |
+ ADDE t1, h1, h1; \ |
|
| 16 |
+ ADDE t2, h2; \ |
|
| 17 |
+ ADD $16, msg |
|
| 18 |
+ |
|
| 19 |
+#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \ |
|
| 20 |
+ MULLD r0, h0, t0; \ |
|
| 21 |
+ MULLD r0, h1, t4; \ |
|
| 22 |
+ MULHDU r0, h0, t1; \ |
|
| 23 |
+ MULHDU r0, h1, t5; \ |
|
| 24 |
+ ADDC t4, t1, t1; \ |
|
| 25 |
+ MULLD r0, h2, t2; \ |
|
| 26 |
+ ADDZE t5; \ |
|
| 27 |
+ MULHDU r1, h0, t4; \ |
|
| 28 |
+ MULLD r1, h0, h0; \ |
|
| 29 |
+ ADD t5, t2, t2; \ |
|
| 30 |
+ ADDC h0, t1, t1; \ |
|
| 31 |
+ MULLD h2, r1, t3; \ |
|
| 32 |
+ ADDZE t4, h0; \ |
|
| 33 |
+ MULHDU r1, h1, t5; \ |
|
| 34 |
+ MULLD r1, h1, t4; \ |
|
| 35 |
+ ADDC t4, t2, t2; \ |
|
| 36 |
+ ADDE t5, t3, t3; \ |
|
| 37 |
+ ADDC h0, t2, t2; \ |
|
| 38 |
+ MOVD $-4, t4; \ |
|
| 39 |
+ MOVD t0, h0; \ |
|
| 40 |
+ MOVD t1, h1; \ |
|
| 41 |
+ ADDZE t3; \ |
|
| 42 |
+ ANDCC $3, t2, h2; \ |
|
| 43 |
+ AND t2, t4, t0; \ |
|
| 44 |
+ ADDC t0, h0, h0; \ |
|
| 45 |
+ ADDE t3, h1, h1; \ |
|
| 46 |
+ SLD $62, t3, t4; \ |
|
| 47 |
+ SRD $2, t2; \ |
|
| 48 |
+ ADDZE h2; \ |
|
| 49 |
+ OR t4, t2, t2; \ |
|
| 50 |
+ SRD $2, t3; \ |
|
| 51 |
+ ADDC t2, h0, h0; \ |
|
| 52 |
+ ADDE t3, h1, h1; \ |
|
| 53 |
+ ADDZE h2 |
|
| 54 |
+ |
|
| 55 |
+DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF |
|
| 56 |
+DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC |
|
| 57 |
+GLOBL ·poly1305Mask<>(SB), RODATA, $16 |
|
| 58 |
+ |
|
| 59 |
+// func update(state *[7]uint64, msg []byte) |
|
| 60 |
+TEXT ·update(SB), $0-32 |
|
| 61 |
+ MOVD state+0(FP), R3 |
|
| 62 |
+ MOVD msg_base+8(FP), R4 |
|
| 63 |
+ MOVD msg_len+16(FP), R5 |
|
| 64 |
+ |
|
| 65 |
+ MOVD 0(R3), R8 // h0 |
|
| 66 |
+ MOVD 8(R3), R9 // h1 |
|
| 67 |
+ MOVD 16(R3), R10 // h2 |
|
| 68 |
+ MOVD 24(R3), R11 // r0 |
|
| 69 |
+ MOVD 32(R3), R12 // r1 |
|
| 70 |
+ |
|
| 71 |
+ CMP R5, $16 |
|
| 72 |
+ BLT bytes_between_0_and_15 |
|
| 73 |
+ |
|
| 74 |
+loop: |
|
| 75 |
+ POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22) |
|
| 76 |
+ |
|
| 77 |
+multiply: |
|
| 78 |
+ POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21) |
|
| 79 |
+ ADD $-16, R5 |
|
| 80 |
+ CMP R5, $16 |
|
| 81 |
+ BGE loop |
|
| 82 |
+ |
|
| 83 |
+bytes_between_0_and_15: |
|
| 84 |
+ CMP $0, R5 |
|
| 85 |
+ BEQ done |
|
| 86 |
+ MOVD $0, R16 // h0 |
|
| 87 |
+ MOVD $0, R17 // h1 |
|
| 88 |
+ |
|
| 89 |
+flush_buffer: |
|
| 90 |
+ CMP R5, $8 |
|
| 91 |
+ BLE just1 |
|
| 92 |
+ |
|
| 93 |
+ MOVD $8, R21 |
|
| 94 |
+ SUB R21, R5, R21 |
|
| 95 |
+ |
|
| 96 |
+ // Greater than 8 -- load the rightmost remaining bytes in msg |
|
| 97 |
+ // and put into R17 (h1) |
|
| 98 |
+ MOVD (R4)(R21), R17 |
|
| 99 |
+ MOVD $16, R22 |
|
| 100 |
+ |
|
| 101 |
+ // Find the offset to those bytes |
|
| 102 |
+ SUB R5, R22, R22 |
|
| 103 |
+ SLD $3, R22 |
|
| 104 |
+ |
|
| 105 |
+ // Shift to get only the bytes in msg |
|
| 106 |
+ SRD R22, R17, R17 |
|
| 107 |
+ |
|
| 108 |
+ // Put 1 at high end |
|
| 109 |
+ MOVD $1, R23 |
|
| 110 |
+ SLD $3, R21 |
|
| 111 |
+ SLD R21, R23, R23 |
|
| 112 |
+ OR R23, R17, R17 |
|
| 113 |
+ |
|
| 114 |
+ // Remainder is 8 |
|
| 115 |
+ MOVD $8, R5 |
|
| 116 |
+ |
|
| 117 |
+just1: |
|
| 118 |
+ CMP R5, $8 |
|
| 119 |
+ BLT less8 |
|
| 120 |
+ |
|
| 121 |
+ // Exactly 8 |
|
| 122 |
+ MOVD (R4), R16 |
|
| 123 |
+ |
|
| 124 |
+ CMP $0, R17 |
|
| 125 |
+ |
|
| 126 |
+ // Check if we've already set R17; if not |
|
| 127 |
+ // set 1 to indicate end of msg. |
|
| 128 |
+ BNE carry |
|
| 129 |
+ MOVD $1, R17 |
|
| 130 |
+ BR carry |
|
| 131 |
+ |
|
| 132 |
+less8: |
|
| 133 |
+ MOVD $0, R16 // h0 |
|
| 134 |
+ MOVD $0, R22 // shift count |
|
| 135 |
+ CMP R5, $4 |
|
| 136 |
+ BLT less4 |
|
| 137 |
+ MOVWZ (R4), R16 |
|
| 138 |
+ ADD $4, R4 |
|
| 139 |
+ ADD $-4, R5 |
|
| 140 |
+ MOVD $32, R22 |
|
| 141 |
+ |
|
| 142 |
+less4: |
|
| 143 |
+ CMP R5, $2 |
|
| 144 |
+ BLT less2 |
|
| 145 |
+ MOVHZ (R4), R21 |
|
| 146 |
+ SLD R22, R21, R21 |
|
| 147 |
+ OR R16, R21, R16 |
|
| 148 |
+ ADD $16, R22 |
|
| 149 |
+ ADD $-2, R5 |
|
| 150 |
+ ADD $2, R4 |
|
| 151 |
+ |
|
| 152 |
+less2: |
|
| 153 |
+ CMP $0, R5 |
|
| 154 |
+ BEQ insert1 |
|
| 155 |
+ MOVBZ (R4), R21 |
|
| 156 |
+ SLD R22, R21, R21 |
|
| 157 |
+ OR R16, R21, R16 |
|
| 158 |
+ ADD $8, R22 |
|
| 159 |
+ |
|
| 160 |
+insert1: |
|
| 161 |
+ // Insert 1 at end of msg |
|
| 162 |
+ MOVD $1, R21 |
|
| 163 |
+ SLD R22, R21, R21 |
|
| 164 |
+ OR R16, R21, R16 |
|
| 165 |
+ |
|
| 166 |
+carry: |
|
| 167 |
+ // Add new values to h0, h1, h2 |
|
| 168 |
+ ADDC R16, R8 |
|
| 169 |
+ ADDE R17, R9 |
|
| 170 |
+ ADDE $0, R10 |
|
| 171 |
+ MOVD $16, R5 |
|
| 172 |
+ ADD R5, R4 |
|
| 173 |
+ BR multiply |
|
| 174 |
+ |
|
| 175 |
+done: |
|
| 176 |
+ // Save h0, h1, h2 in state |
|
| 177 |
+ MOVD R8, 0(R3) |
|
| 178 |
+ MOVD R9, 8(R3) |
|
| 179 |
+ MOVD R10, 16(R3) |
|
| 180 |
+ RET |
| ... | ... |
@@ -22,10 +22,7 @@ func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte) |
| 22 | 22 |
//go:noescape |
| 23 | 23 |
func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte) |
| 24 | 24 |
|
| 25 |
-// Sum generates an authenticator for m using a one-time key and puts the |
|
| 26 |
-// 16-byte result into out. Authenticating two different messages with the same |
|
| 27 |
-// key allows an attacker to forge messages at will. |
|
| 28 |
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
|
|
| 25 |
+func sum(out *[16]byte, m []byte, key *[32]byte) {
|
|
| 29 | 26 |
if cpu.S390X.HasVX {
|
| 30 | 27 |
var mPtr *byte |
| 31 | 28 |
if len(m) > 0 {
|