Browse code

vendor: add archive/tar

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>

Tonis Tiigi authored on 2017/07/14 10:17:16
Showing 10 changed files
... ...
@@ -107,6 +107,7 @@ RUN set -x \
107 107
 # IMPORTANT: If the version of Go is updated, the Windows to Linux CI machines
108 108
 #            will need updating, to avoid errors. Ping #docker-maintainers on IRC
109 109
 #            with a heads-up.
110
+# IMPORTANT: When updating this please note that stdlib archive/tar pkg is vendored
110 111
 ENV GO_VERSION 1.8.3
111 112
 RUN curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-amd64.tar.gz" \
112 113
 	| tar -xzC /usr/local
... ...
@@ -136,3 +136,11 @@ github.com/Nvveen/Gotty a8b993ba6abdb0e0c12b0125c603323a71c7790c https://github.
136 136
 github.com/docker/go-metrics d466d4f6fd960e01820085bd7e1a24426ee7ef18
137 137
 
138 138
 github.com/opencontainers/selinux v1.0.0-rc1
139
+
140
+# archive/tar
141
+# mkdir -p ./vendor/archive
142
+# git clone git://github.com/tonistiigi/go-1.git ./go
143
+# git --git-dir ./go/.git --work-tree ./go checkout revert-prefix-ignore
144
+# cp -a go/src/archive/tar ./vendor/archive/tar
145
+# rm -rf ./go
146
+# vndr
139 147
\ No newline at end of file
140 148
new file mode 100644
... ...
@@ -0,0 +1,286 @@
0
+// Copyright 2009 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// Package tar implements access to tar archives.
5
+// It aims to cover most of the variations, including those produced
6
+// by GNU and BSD tars.
7
+//
8
+// References:
9
+//   http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
10
+//   http://www.gnu.org/software/tar/manual/html_node/Standard.html
11
+//   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
12
+package tar
13
+
14
+import (
15
+	"errors"
16
+	"fmt"
17
+	"os"
18
+	"path"
19
+	"time"
20
+)
21
+
22
+// BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit
23
+// architectures. If a large value is encountered when decoding, the result
24
+// stored in Header will be the truncated version.
25
+
26
+// Header type flags.
27
+const (
28
+	TypeReg           = '0'    // regular file
29
+	TypeRegA          = '\x00' // regular file
30
+	TypeLink          = '1'    // hard link
31
+	TypeSymlink       = '2'    // symbolic link
32
+	TypeChar          = '3'    // character device node
33
+	TypeBlock         = '4'    // block device node
34
+	TypeDir           = '5'    // directory
35
+	TypeFifo          = '6'    // fifo node
36
+	TypeCont          = '7'    // reserved
37
+	TypeXHeader       = 'x'    // extended header
38
+	TypeXGlobalHeader = 'g'    // global extended header
39
+	TypeGNULongName   = 'L'    // Next file has a long name
40
+	TypeGNULongLink   = 'K'    // Next file symlinks to a file w/ a long name
41
+	TypeGNUSparse     = 'S'    // sparse file
42
+)
43
+
44
+// A Header represents a single header in a tar archive.
45
+// Some fields may not be populated.
46
+type Header struct {
47
+	Name       string    // name of header file entry
48
+	Mode       int64     // permission and mode bits
49
+	Uid        int       // user id of owner
50
+	Gid        int       // group id of owner
51
+	Size       int64     // length in bytes
52
+	ModTime    time.Time // modified time
53
+	Typeflag   byte      // type of header entry
54
+	Linkname   string    // target name of link
55
+	Uname      string    // user name of owner
56
+	Gname      string    // group name of owner
57
+	Devmajor   int64     // major number of character or block device
58
+	Devminor   int64     // minor number of character or block device
59
+	AccessTime time.Time // access time
60
+	ChangeTime time.Time // status change time
61
+	Xattrs     map[string]string
62
+}
63
+
64
+// FileInfo returns an os.FileInfo for the Header.
65
+func (h *Header) FileInfo() os.FileInfo {
66
+	return headerFileInfo{h}
67
+}
68
+
69
+// headerFileInfo implements os.FileInfo.
70
+type headerFileInfo struct {
71
+	h *Header
72
+}
73
+
74
+func (fi headerFileInfo) Size() int64        { return fi.h.Size }
75
+func (fi headerFileInfo) IsDir() bool        { return fi.Mode().IsDir() }
76
+func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime }
77
+func (fi headerFileInfo) Sys() interface{}   { return fi.h }
78
+
79
+// Name returns the base name of the file.
80
+func (fi headerFileInfo) Name() string {
81
+	if fi.IsDir() {
82
+		return path.Base(path.Clean(fi.h.Name))
83
+	}
84
+	return path.Base(fi.h.Name)
85
+}
86
+
87
+// Mode returns the permission and mode bits for the headerFileInfo.
88
+func (fi headerFileInfo) Mode() (mode os.FileMode) {
89
+	// Set file permission bits.
90
+	mode = os.FileMode(fi.h.Mode).Perm()
91
+
92
+	// Set setuid, setgid and sticky bits.
93
+	if fi.h.Mode&c_ISUID != 0 {
94
+		// setuid
95
+		mode |= os.ModeSetuid
96
+	}
97
+	if fi.h.Mode&c_ISGID != 0 {
98
+		// setgid
99
+		mode |= os.ModeSetgid
100
+	}
101
+	if fi.h.Mode&c_ISVTX != 0 {
102
+		// sticky
103
+		mode |= os.ModeSticky
104
+	}
105
+
106
+	// Set file mode bits.
107
+	// clear perm, setuid, setgid and sticky bits.
108
+	m := os.FileMode(fi.h.Mode) &^ 07777
109
+	if m == c_ISDIR {
110
+		// directory
111
+		mode |= os.ModeDir
112
+	}
113
+	if m == c_ISFIFO {
114
+		// named pipe (FIFO)
115
+		mode |= os.ModeNamedPipe
116
+	}
117
+	if m == c_ISLNK {
118
+		// symbolic link
119
+		mode |= os.ModeSymlink
120
+	}
121
+	if m == c_ISBLK {
122
+		// device file
123
+		mode |= os.ModeDevice
124
+	}
125
+	if m == c_ISCHR {
126
+		// Unix character device
127
+		mode |= os.ModeDevice
128
+		mode |= os.ModeCharDevice
129
+	}
130
+	if m == c_ISSOCK {
131
+		// Unix domain socket
132
+		mode |= os.ModeSocket
133
+	}
134
+
135
+	switch fi.h.Typeflag {
136
+	case TypeSymlink:
137
+		// symbolic link
138
+		mode |= os.ModeSymlink
139
+	case TypeChar:
140
+		// character device node
141
+		mode |= os.ModeDevice
142
+		mode |= os.ModeCharDevice
143
+	case TypeBlock:
144
+		// block device node
145
+		mode |= os.ModeDevice
146
+	case TypeDir:
147
+		// directory
148
+		mode |= os.ModeDir
149
+	case TypeFifo:
150
+		// fifo node
151
+		mode |= os.ModeNamedPipe
152
+	}
153
+
154
+	return mode
155
+}
156
+
157
+// sysStat, if non-nil, populates h from system-dependent fields of fi.
158
+var sysStat func(fi os.FileInfo, h *Header) error
159
+
160
+// Mode constants from the tar spec.
161
+const (
162
+	c_ISUID  = 04000   // Set uid
163
+	c_ISGID  = 02000   // Set gid
164
+	c_ISVTX  = 01000   // Save text (sticky bit)
165
+	c_ISDIR  = 040000  // Directory
166
+	c_ISFIFO = 010000  // FIFO
167
+	c_ISREG  = 0100000 // Regular file
168
+	c_ISLNK  = 0120000 // Symbolic link
169
+	c_ISBLK  = 060000  // Block special file
170
+	c_ISCHR  = 020000  // Character special file
171
+	c_ISSOCK = 0140000 // Socket
172
+)
173
+
174
+// Keywords for the PAX Extended Header
175
+const (
176
+	paxAtime    = "atime"
177
+	paxCharset  = "charset"
178
+	paxComment  = "comment"
179
+	paxCtime    = "ctime" // please note that ctime is not a valid pax header.
180
+	paxGid      = "gid"
181
+	paxGname    = "gname"
182
+	paxLinkpath = "linkpath"
183
+	paxMtime    = "mtime"
184
+	paxPath     = "path"
185
+	paxSize     = "size"
186
+	paxUid      = "uid"
187
+	paxUname    = "uname"
188
+	paxXattr    = "SCHILY.xattr."
189
+	paxNone     = ""
190
+)
191
+
192
+// FileInfoHeader creates a partially-populated Header from fi.
193
+// If fi describes a symlink, FileInfoHeader records link as the link target.
194
+// If fi describes a directory, a slash is appended to the name.
195
+// Because os.FileInfo's Name method returns only the base name of
196
+// the file it describes, it may be necessary to modify the Name field
197
+// of the returned header to provide the full path name of the file.
198
+func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
199
+	if fi == nil {
200
+		return nil, errors.New("tar: FileInfo is nil")
201
+	}
202
+	fm := fi.Mode()
203
+	h := &Header{
204
+		Name:    fi.Name(),
205
+		ModTime: fi.ModTime(),
206
+		Mode:    int64(fm.Perm()), // or'd with c_IS* constants later
207
+	}
208
+	switch {
209
+	case fm.IsRegular():
210
+		h.Mode |= c_ISREG
211
+		h.Typeflag = TypeReg
212
+		h.Size = fi.Size()
213
+	case fi.IsDir():
214
+		h.Typeflag = TypeDir
215
+		h.Mode |= c_ISDIR
216
+		h.Name += "/"
217
+	case fm&os.ModeSymlink != 0:
218
+		h.Typeflag = TypeSymlink
219
+		h.Mode |= c_ISLNK
220
+		h.Linkname = link
221
+	case fm&os.ModeDevice != 0:
222
+		if fm&os.ModeCharDevice != 0 {
223
+			h.Mode |= c_ISCHR
224
+			h.Typeflag = TypeChar
225
+		} else {
226
+			h.Mode |= c_ISBLK
227
+			h.Typeflag = TypeBlock
228
+		}
229
+	case fm&os.ModeNamedPipe != 0:
230
+		h.Typeflag = TypeFifo
231
+		h.Mode |= c_ISFIFO
232
+	case fm&os.ModeSocket != 0:
233
+		h.Mode |= c_ISSOCK
234
+	default:
235
+		return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm)
236
+	}
237
+	if fm&os.ModeSetuid != 0 {
238
+		h.Mode |= c_ISUID
239
+	}
240
+	if fm&os.ModeSetgid != 0 {
241
+		h.Mode |= c_ISGID
242
+	}
243
+	if fm&os.ModeSticky != 0 {
244
+		h.Mode |= c_ISVTX
245
+	}
246
+	// If possible, populate additional fields from OS-specific
247
+	// FileInfo fields.
248
+	if sys, ok := fi.Sys().(*Header); ok {
249
+		// This FileInfo came from a Header (not the OS). Use the
250
+		// original Header to populate all remaining fields.
251
+		h.Uid = sys.Uid
252
+		h.Gid = sys.Gid
253
+		h.Uname = sys.Uname
254
+		h.Gname = sys.Gname
255
+		h.AccessTime = sys.AccessTime
256
+		h.ChangeTime = sys.ChangeTime
257
+		if sys.Xattrs != nil {
258
+			h.Xattrs = make(map[string]string)
259
+			for k, v := range sys.Xattrs {
260
+				h.Xattrs[k] = v
261
+			}
262
+		}
263
+		if sys.Typeflag == TypeLink {
264
+			// hard link
265
+			h.Typeflag = TypeLink
266
+			h.Size = 0
267
+			h.Linkname = sys.Linkname
268
+		}
269
+	}
270
+	if sysStat != nil {
271
+		return h, sysStat(fi, h)
272
+	}
273
+	return h, nil
274
+}
275
+
276
+// isHeaderOnlyType checks if the given type flag is of the type that has no
277
+// data section even if a size is specified.
278
+func isHeaderOnlyType(flag byte) bool {
279
+	switch flag {
280
+	case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo:
281
+		return true
282
+	default:
283
+		return false
284
+	}
285
+}
0 286
new file mode 100644
... ...
@@ -0,0 +1,197 @@
0
+// Copyright 2016 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+package tar
5
+
6
+// Constants to identify various tar formats.
7
+const (
8
+	// The format is unknown.
9
+	formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc...
10
+
11
+	// The format of the original Unix V7 tar tool prior to standardization.
12
+	formatV7
13
+
14
+	// The old and new GNU formats, which are incompatible with USTAR.
15
+	// This does cover the old GNU sparse extension.
16
+	// This does not cover the GNU sparse extensions using PAX headers,
17
+	// versions 0.0, 0.1, and 1.0; these fall under the PAX format.
18
+	formatGNU
19
+
20
+	// Schily's tar format, which is incompatible with USTAR.
21
+	// This does not cover STAR extensions to the PAX format; these fall under
22
+	// the PAX format.
23
+	formatSTAR
24
+
25
+	// USTAR is the former standardization of tar defined in POSIX.1-1988.
26
+	// This is incompatible with the GNU and STAR formats.
27
+	formatUSTAR
28
+
29
+	// PAX is the latest standardization of tar defined in POSIX.1-2001.
30
+	// This is an extension of USTAR and is "backwards compatible" with it.
31
+	//
32
+	// Some newer formats add their own extensions to PAX, such as GNU sparse
33
+	// files and SCHILY extended attributes. Since they are backwards compatible
34
+	// with PAX, they will be labelled as "PAX".
35
+	formatPAX
36
+)
37
+
38
+// Magics used to identify various formats.
39
+const (
40
+	magicGNU, versionGNU     = "ustar ", " \x00"
41
+	magicUSTAR, versionUSTAR = "ustar\x00", "00"
42
+	trailerSTAR              = "tar\x00"
43
+)
44
+
45
+// Size constants from various tar specifications.
46
+const (
47
+	blockSize  = 512 // Size of each block in a tar stream
48
+	nameSize   = 100 // Max length of the name field in USTAR format
49
+	prefixSize = 155 // Max length of the prefix field in USTAR format
50
+)
51
+
52
+var zeroBlock block
53
+
54
+type block [blockSize]byte
55
+
56
+// Convert block to any number of formats.
57
+func (b *block) V7() *headerV7       { return (*headerV7)(b) }
58
+func (b *block) GNU() *headerGNU     { return (*headerGNU)(b) }
59
+func (b *block) STAR() *headerSTAR   { return (*headerSTAR)(b) }
60
+func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
61
+func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
62
+
63
+// GetFormat checks that the block is a valid tar header based on the checksum.
64
+// It then attempts to guess the specific format based on magic values.
65
+// If the checksum fails, then formatUnknown is returned.
66
+func (b *block) GetFormat() (format int) {
67
+	// Verify checksum.
68
+	var p parser
69
+	value := p.parseOctal(b.V7().Chksum())
70
+	chksum1, chksum2 := b.ComputeChecksum()
71
+	if p.err != nil || (value != chksum1 && value != chksum2) {
72
+		return formatUnknown
73
+	}
74
+
75
+	// Guess the magic values.
76
+	magic := string(b.USTAR().Magic())
77
+	version := string(b.USTAR().Version())
78
+	trailer := string(b.STAR().Trailer())
79
+	switch {
80
+	case magic == magicUSTAR && trailer == trailerSTAR:
81
+		return formatSTAR
82
+	case magic == magicUSTAR:
83
+		return formatUSTAR
84
+	case magic == magicGNU && version == versionGNU:
85
+		return formatGNU
86
+	default:
87
+		return formatV7
88
+	}
89
+}
90
+
91
+// SetFormat writes the magic values necessary for specified format
92
+// and then updates the checksum accordingly.
93
+func (b *block) SetFormat(format int) {
94
+	// Set the magic values.
95
+	switch format {
96
+	case formatV7:
97
+		// Do nothing.
98
+	case formatGNU:
99
+		copy(b.GNU().Magic(), magicGNU)
100
+		copy(b.GNU().Version(), versionGNU)
101
+	case formatSTAR:
102
+		copy(b.STAR().Magic(), magicUSTAR)
103
+		copy(b.STAR().Version(), versionUSTAR)
104
+		copy(b.STAR().Trailer(), trailerSTAR)
105
+	case formatUSTAR, formatPAX:
106
+		copy(b.USTAR().Magic(), magicUSTAR)
107
+		copy(b.USTAR().Version(), versionUSTAR)
108
+	default:
109
+		panic("invalid format")
110
+	}
111
+
112
+	// Update checksum.
113
+	// This field is special in that it is terminated by a NULL then space.
114
+	var f formatter
115
+	field := b.V7().Chksum()
116
+	chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
117
+	f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
118
+	field[7] = ' '
119
+}
120
+
121
+// ComputeChecksum computes the checksum for the header block.
122
+// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
123
+// signed byte values.
124
+// We compute and return both.
125
+func (b *block) ComputeChecksum() (unsigned, signed int64) {
126
+	for i, c := range b {
127
+		if 148 <= i && i < 156 {
128
+			c = ' ' // Treat the checksum field itself as all spaces.
129
+		}
130
+		unsigned += int64(uint8(c))
131
+		signed += int64(int8(c))
132
+	}
133
+	return unsigned, signed
134
+}
135
+
136
+type headerV7 [blockSize]byte
137
+
138
+func (h *headerV7) Name() []byte     { return h[000:][:100] }
139
+func (h *headerV7) Mode() []byte     { return h[100:][:8] }
140
+func (h *headerV7) UID() []byte      { return h[108:][:8] }
141
+func (h *headerV7) GID() []byte      { return h[116:][:8] }
142
+func (h *headerV7) Size() []byte     { return h[124:][:12] }
143
+func (h *headerV7) ModTime() []byte  { return h[136:][:12] }
144
+func (h *headerV7) Chksum() []byte   { return h[148:][:8] }
145
+func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
146
+func (h *headerV7) LinkName() []byte { return h[157:][:100] }
147
+
148
+type headerGNU [blockSize]byte
149
+
150
+func (h *headerGNU) V7() *headerV7       { return (*headerV7)(h) }
151
+func (h *headerGNU) Magic() []byte       { return h[257:][:6] }
152
+func (h *headerGNU) Version() []byte     { return h[263:][:2] }
153
+func (h *headerGNU) UserName() []byte    { return h[265:][:32] }
154
+func (h *headerGNU) GroupName() []byte   { return h[297:][:32] }
155
+func (h *headerGNU) DevMajor() []byte    { return h[329:][:8] }
156
+func (h *headerGNU) DevMinor() []byte    { return h[337:][:8] }
157
+func (h *headerGNU) AccessTime() []byte  { return h[345:][:12] }
158
+func (h *headerGNU) ChangeTime() []byte  { return h[357:][:12] }
159
+func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
160
+func (h *headerGNU) RealSize() []byte    { return h[483:][:12] }
161
+
162
+type headerSTAR [blockSize]byte
163
+
164
+func (h *headerSTAR) V7() *headerV7      { return (*headerV7)(h) }
165
+func (h *headerSTAR) Magic() []byte      { return h[257:][:6] }
166
+func (h *headerSTAR) Version() []byte    { return h[263:][:2] }
167
+func (h *headerSTAR) UserName() []byte   { return h[265:][:32] }
168
+func (h *headerSTAR) GroupName() []byte  { return h[297:][:32] }
169
+func (h *headerSTAR) DevMajor() []byte   { return h[329:][:8] }
170
+func (h *headerSTAR) DevMinor() []byte   { return h[337:][:8] }
171
+func (h *headerSTAR) Prefix() []byte     { return h[345:][:131] }
172
+func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
173
+func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
174
+func (h *headerSTAR) Trailer() []byte    { return h[508:][:4] }
175
+
176
+type headerUSTAR [blockSize]byte
177
+
178
+func (h *headerUSTAR) V7() *headerV7     { return (*headerV7)(h) }
179
+func (h *headerUSTAR) Magic() []byte     { return h[257:][:6] }
180
+func (h *headerUSTAR) Version() []byte   { return h[263:][:2] }
181
+func (h *headerUSTAR) UserName() []byte  { return h[265:][:32] }
182
+func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
183
+func (h *headerUSTAR) DevMajor() []byte  { return h[329:][:8] }
184
+func (h *headerUSTAR) DevMinor() []byte  { return h[337:][:8] }
185
+func (h *headerUSTAR) Prefix() []byte    { return h[345:][:155] }
186
+
187
+type sparseArray []byte
188
+
189
+func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) }
190
+func (s sparseArray) IsExtended() []byte     { return s[24*s.MaxEntries():][:1] }
191
+func (s sparseArray) MaxEntries() int        { return len(s) / 24 }
192
+
193
+type sparseNode []byte
194
+
195
+func (s sparseNode) Offset() []byte   { return s[00:][:12] }
196
+func (s sparseNode) NumBytes() []byte { return s[12:][:12] }
0 197
new file mode 100644
... ...
@@ -0,0 +1,800 @@
0
+// Copyright 2009 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+package tar
5
+
6
+// TODO(dsymonds):
7
+//   - pax extensions
8
+
9
+import (
10
+	"bytes"
11
+	"errors"
12
+	"io"
13
+	"io/ioutil"
14
+	"math"
15
+	"strconv"
16
+	"strings"
17
+	"time"
18
+)
19
+
20
+var (
21
+	ErrHeader = errors.New("archive/tar: invalid tar header")
22
+)
23
+
24
+// A Reader provides sequential access to the contents of a tar archive.
25
+// A tar archive consists of a sequence of files.
26
+// The Next method advances to the next file in the archive (including the first),
27
+// and then it can be treated as an io.Reader to access the file's data.
28
+type Reader struct {
29
+	r    io.Reader
30
+	pad  int64          // amount of padding (ignored) after current file entry
31
+	curr numBytesReader // reader for current file entry
32
+	blk  block          // buffer to use as temporary local storage
33
+
34
+	// err is a persistent error.
35
+	// It is only the responsibility of every exported method of Reader to
36
+	// ensure that this error is sticky.
37
+	err error
38
+}
39
+
40
+// A numBytesReader is an io.Reader with a numBytes method, returning the number
41
+// of bytes remaining in the underlying encoded data.
42
+type numBytesReader interface {
43
+	io.Reader
44
+	numBytes() int64
45
+}
46
+
47
+// A regFileReader is a numBytesReader for reading file data from a tar archive.
48
+type regFileReader struct {
49
+	r  io.Reader // underlying reader
50
+	nb int64     // number of unread bytes for current file entry
51
+}
52
+
53
+// A sparseFileReader is a numBytesReader for reading sparse file data from a
54
+// tar archive.
55
+type sparseFileReader struct {
56
+	rfr   numBytesReader // Reads the sparse-encoded file data
57
+	sp    []sparseEntry  // The sparse map for the file
58
+	pos   int64          // Keeps track of file position
59
+	total int64          // Total size of the file
60
+}
61
+
62
+// A sparseEntry holds a single entry in a sparse file's sparse map.
63
+//
64
+// Sparse files are represented using a series of sparseEntrys.
65
+// Despite the name, a sparseEntry represents an actual data fragment that
66
+// references data found in the underlying archive stream. All regions not
67
+// covered by a sparseEntry are logically filled with zeros.
68
+//
69
+// For example, if the underlying raw file contains the 10-byte data:
70
+//	var compactData = "abcdefgh"
71
+//
72
+// And the sparse map has the following entries:
73
+//	var sp = []sparseEntry{
74
+//		{offset: 2,  numBytes: 5} // Data fragment for [2..7]
75
+//		{offset: 18, numBytes: 3} // Data fragment for [18..21]
76
+//	}
77
+//
78
+// Then the content of the resulting sparse file with a "real" size of 25 is:
79
+//	var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
80
+type sparseEntry struct {
81
+	offset   int64 // Starting position of the fragment
82
+	numBytes int64 // Length of the fragment
83
+}
84
+
85
+// Keywords for GNU sparse files in a PAX extended header
86
+const (
87
+	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
88
+	paxGNUSparseOffset    = "GNU.sparse.offset"
89
+	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
90
+	paxGNUSparseMap       = "GNU.sparse.map"
91
+	paxGNUSparseName      = "GNU.sparse.name"
92
+	paxGNUSparseMajor     = "GNU.sparse.major"
93
+	paxGNUSparseMinor     = "GNU.sparse.minor"
94
+	paxGNUSparseSize      = "GNU.sparse.size"
95
+	paxGNUSparseRealSize  = "GNU.sparse.realsize"
96
+)
97
+
98
+// NewReader creates a new Reader reading from r.
99
+func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
100
+
101
+// Next advances to the next entry in the tar archive.
102
+//
103
+// io.EOF is returned at the end of the input.
104
+func (tr *Reader) Next() (*Header, error) {
105
+	if tr.err != nil {
106
+		return nil, tr.err
107
+	}
108
+	hdr, err := tr.next()
109
+	tr.err = err
110
+	return hdr, err
111
+}
112
+
113
+func (tr *Reader) next() (*Header, error) {
114
+	var extHdrs map[string]string
115
+
116
+	// Externally, Next iterates through the tar archive as if it is a series of
117
+	// files. Internally, the tar format often uses fake "files" to add meta
118
+	// data that describes the next file. These meta data "files" should not
119
+	// normally be visible to the outside. As such, this loop iterates through
120
+	// one or more "header files" until it finds a "normal file".
121
+loop:
122
+	for {
123
+		if err := tr.skipUnread(); err != nil {
124
+			return nil, err
125
+		}
126
+		hdr, rawHdr, err := tr.readHeader()
127
+		if err != nil {
128
+			return nil, err
129
+		}
130
+		if err := tr.handleRegularFile(hdr); err != nil {
131
+			return nil, err
132
+		}
133
+
134
+		// Check for PAX/GNU special headers and files.
135
+		switch hdr.Typeflag {
136
+		case TypeXHeader:
137
+			extHdrs, err = parsePAX(tr)
138
+			if err != nil {
139
+				return nil, err
140
+			}
141
+			continue loop // This is a meta header affecting the next header
142
+		case TypeGNULongName, TypeGNULongLink:
143
+			realname, err := ioutil.ReadAll(tr)
144
+			if err != nil {
145
+				return nil, err
146
+			}
147
+
148
+			// Convert GNU extensions to use PAX headers.
149
+			if extHdrs == nil {
150
+				extHdrs = make(map[string]string)
151
+			}
152
+			var p parser
153
+			switch hdr.Typeflag {
154
+			case TypeGNULongName:
155
+				extHdrs[paxPath] = p.parseString(realname)
156
+			case TypeGNULongLink:
157
+				extHdrs[paxLinkpath] = p.parseString(realname)
158
+			}
159
+			if p.err != nil {
160
+				return nil, p.err
161
+			}
162
+			continue loop // This is a meta header affecting the next header
163
+		default:
164
+			// The old GNU sparse format is handled here since it is technically
165
+			// just a regular file with additional attributes.
166
+
167
+			if err := mergePAX(hdr, extHdrs); err != nil {
168
+				return nil, err
169
+			}
170
+
171
+			// The extended headers may have updated the size.
172
+			// Thus, setup the regFileReader again after merging PAX headers.
173
+			if err := tr.handleRegularFile(hdr); err != nil {
174
+				return nil, err
175
+			}
176
+
177
+			// Sparse formats rely on being able to read from the logical data
178
+			// section; there must be a preceding call to handleRegularFile.
179
+			if err := tr.handleSparseFile(hdr, rawHdr, extHdrs); err != nil {
180
+				return nil, err
181
+			}
182
+			return hdr, nil // This is a file, so stop
183
+		}
184
+	}
185
+}
186
+
187
+// handleRegularFile sets up the current file reader and padding such that it
188
+// can only read the following logical data section. It will properly handle
189
+// special headers that contain no data section.
190
+func (tr *Reader) handleRegularFile(hdr *Header) error {
191
+	nb := hdr.Size
192
+	if isHeaderOnlyType(hdr.Typeflag) {
193
+		nb = 0
194
+	}
195
+	if nb < 0 {
196
+		return ErrHeader
197
+	}
198
+
199
+	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
200
+	tr.curr = &regFileReader{r: tr.r, nb: nb}
201
+	return nil
202
+}
203
+
204
+// handleSparseFile checks if the current file is a sparse format of any type
205
+// and sets the curr reader appropriately.
206
+func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error {
207
+	var sp []sparseEntry
208
+	var err error
209
+	if hdr.Typeflag == TypeGNUSparse {
210
+		sp, err = tr.readOldGNUSparseMap(hdr, rawHdr)
211
+		if err != nil {
212
+			return err
213
+		}
214
+	} else {
215
+		sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
216
+		if err != nil {
217
+			return err
218
+		}
219
+	}
220
+
221
+	// If sp is non-nil, then this is a sparse file.
222
+	// Note that it is possible for len(sp) to be zero.
223
+	if sp != nil {
224
+		tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size)
225
+	}
226
+	return err
227
+}
228
+
229
+// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
230
+// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
231
+// be treated as a regular file.
232
+func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
233
+	var sparseFormat string
234
+
235
+	// Check for sparse format indicators
236
+	major, majorOk := headers[paxGNUSparseMajor]
237
+	minor, minorOk := headers[paxGNUSparseMinor]
238
+	sparseName, sparseNameOk := headers[paxGNUSparseName]
239
+	_, sparseMapOk := headers[paxGNUSparseMap]
240
+	sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
241
+	sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
242
+
243
+	// Identify which, if any, sparse format applies from which PAX headers are set
244
+	if majorOk && minorOk {
245
+		sparseFormat = major + "." + minor
246
+	} else if sparseNameOk && sparseMapOk {
247
+		sparseFormat = "0.1"
248
+	} else if sparseSizeOk {
249
+		sparseFormat = "0.0"
250
+	} else {
251
+		// Not a PAX format GNU sparse file.
252
+		return nil, nil
253
+	}
254
+
255
+	// Check for unknown sparse format
256
+	if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
257
+		return nil, nil
258
+	}
259
+
260
+	// Update hdr from GNU sparse PAX headers
261
+	if sparseNameOk {
262
+		hdr.Name = sparseName
263
+	}
264
+	if sparseSizeOk {
265
+		realSize, err := strconv.ParseInt(sparseSize, 10, 64)
266
+		if err != nil {
267
+			return nil, ErrHeader
268
+		}
269
+		hdr.Size = realSize
270
+	} else if sparseRealSizeOk {
271
+		realSize, err := strconv.ParseInt(sparseRealSize, 10, 64)
272
+		if err != nil {
273
+			return nil, ErrHeader
274
+		}
275
+		hdr.Size = realSize
276
+	}
277
+
278
+	// Set up the sparse map, according to the particular sparse format in use
279
+	var sp []sparseEntry
280
+	var err error
281
+	switch sparseFormat {
282
+	case "0.0", "0.1":
283
+		sp, err = readGNUSparseMap0x1(headers)
284
+	case "1.0":
285
+		sp, err = readGNUSparseMap1x0(tr.curr)
286
+	}
287
+	return sp, err
288
+}
289
+
290
+// mergePAX merges well known headers according to PAX standard.
291
+// In general headers with the same name as those found
292
+// in the header struct overwrite those found in the header
293
+// struct with higher precision or longer values. Esp. useful
294
+// for name and linkname fields.
295
+func mergePAX(hdr *Header, headers map[string]string) (err error) {
296
+	var id64 int64
297
+	for k, v := range headers {
298
+		switch k {
299
+		case paxPath:
300
+			hdr.Name = v
301
+		case paxLinkpath:
302
+			hdr.Linkname = v
303
+		case paxUname:
304
+			hdr.Uname = v
305
+		case paxGname:
306
+			hdr.Gname = v
307
+		case paxUid:
308
+			id64, err = strconv.ParseInt(v, 10, 64)
309
+			hdr.Uid = int(id64) // Integer overflow possible
310
+		case paxGid:
311
+			id64, err = strconv.ParseInt(v, 10, 64)
312
+			hdr.Gid = int(id64) // Integer overflow possible
313
+		case paxAtime:
314
+			hdr.AccessTime, err = parsePAXTime(v)
315
+		case paxMtime:
316
+			hdr.ModTime, err = parsePAXTime(v)
317
+		case paxCtime:
318
+			hdr.ChangeTime, err = parsePAXTime(v)
319
+		case paxSize:
320
+			hdr.Size, err = strconv.ParseInt(v, 10, 64)
321
+		default:
322
+			if strings.HasPrefix(k, paxXattr) {
323
+				if hdr.Xattrs == nil {
324
+					hdr.Xattrs = make(map[string]string)
325
+				}
326
+				hdr.Xattrs[k[len(paxXattr):]] = v
327
+			}
328
+		}
329
+		if err != nil {
330
+			return ErrHeader
331
+		}
332
+	}
333
+	return nil
334
+}
335
+
336
+// parsePAX parses PAX headers.
337
+// If an extended header (type 'x') is invalid, ErrHeader is returned
338
+func parsePAX(r io.Reader) (map[string]string, error) {
339
+	buf, err := ioutil.ReadAll(r)
340
+	if err != nil {
341
+		return nil, err
342
+	}
343
+	sbuf := string(buf)
344
+
345
+	// For GNU PAX sparse format 0.0 support.
346
+	// This function transforms the sparse format 0.0 headers into format 0.1
347
+	// headers since 0.0 headers were not PAX compliant.
348
+	var sparseMap []string
349
+
350
+	extHdrs := make(map[string]string)
351
+	for len(sbuf) > 0 {
352
+		key, value, residual, err := parsePAXRecord(sbuf)
353
+		if err != nil {
354
+			return nil, ErrHeader
355
+		}
356
+		sbuf = residual
357
+
358
+		switch key {
359
+		case paxGNUSparseOffset, paxGNUSparseNumBytes:
360
+			// Validate sparse header order and value.
361
+			if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
362
+				(len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
363
+				strings.Contains(value, ",") {
364
+				return nil, ErrHeader
365
+			}
366
+			sparseMap = append(sparseMap, value)
367
+		default:
368
+			// According to PAX specification, a value is stored only if it is
369
+			// non-empty. Otherwise, the key is deleted.
370
+			if len(value) > 0 {
371
+				extHdrs[key] = value
372
+			} else {
373
+				delete(extHdrs, key)
374
+			}
375
+		}
376
+	}
377
+	if len(sparseMap) > 0 {
378
+		extHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
379
+	}
380
+	return extHdrs, nil
381
+}
382
+
383
+// skipUnread skips any unread bytes in the existing file entry, as well as any
384
+// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
385
+// encountered in the data portion; it is okay to hit io.EOF in the padding.
386
+//
387
+// Note that this function still works properly even when sparse files are being
388
+// used since numBytes returns the bytes remaining in the underlying io.Reader.
389
+func (tr *Reader) skipUnread() error {
390
+	dataSkip := tr.numBytes()      // Number of data bytes to skip
391
+	totalSkip := dataSkip + tr.pad // Total number of bytes to skip
392
+	tr.curr, tr.pad = nil, 0
393
+
394
+	// If possible, Seek to the last byte before the end of the data section.
395
+	// Do this because Seek is often lazy about reporting errors; this will mask
396
+	// the fact that the tar stream may be truncated. We can rely on the
397
+	// io.CopyN done shortly afterwards to trigger any IO errors.
398
+	var seekSkipped int64 // Number of bytes skipped via Seek
399
+	if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
400
+		// Not all io.Seeker can actually Seek. For example, os.Stdin implements
401
+		// io.Seeker, but calling Seek always returns an error and performs
402
+		// no action. Thus, we try an innocent seek to the current position
403
+		// to see if Seek is really supported.
404
+		pos1, err := sr.Seek(0, io.SeekCurrent)
405
+		if err == nil {
406
+			// Seek seems supported, so perform the real Seek.
407
+			pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent)
408
+			if err != nil {
409
+				return err
410
+			}
411
+			seekSkipped = pos2 - pos1
412
+		}
413
+	}
414
+
415
+	copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
416
+	if err == io.EOF && seekSkipped+copySkipped < dataSkip {
417
+		err = io.ErrUnexpectedEOF
418
+	}
419
+	return err
420
+}
421
+
422
+// readHeader reads the next block header and assumes that the underlying reader
423
+// is already aligned to a block boundary. It returns the raw block of the
424
+// header in case further processing is required.
425
+//
426
+// The err will be set to io.EOF only when one of the following occurs:
427
+//	* Exactly 0 bytes are read and EOF is hit.
428
+//	* Exactly 1 block of zeros is read and EOF is hit.
429
+//	* At least 2 blocks of zeros are read.
430
+func (tr *Reader) readHeader() (*Header, *block, error) {
431
+	// Two blocks of zero bytes marks the end of the archive.
432
+	if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
433
+		return nil, nil, err // EOF is okay here; exactly 0 bytes read
434
+	}
435
+	if bytes.Equal(tr.blk[:], zeroBlock[:]) {
436
+		if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
437
+			return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
438
+		}
439
+		if bytes.Equal(tr.blk[:], zeroBlock[:]) {
440
+			return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read
441
+		}
442
+		return nil, nil, ErrHeader // Zero block and then non-zero block
443
+	}
444
+
445
+	// Verify the header matches a known format.
446
+	format := tr.blk.GetFormat()
447
+	if format == formatUnknown {
448
+		return nil, nil, ErrHeader
449
+	}
450
+
451
+	var p parser
452
+	hdr := new(Header)
453
+
454
+	// Unpack the V7 header.
455
+	v7 := tr.blk.V7()
456
+	hdr.Name = p.parseString(v7.Name())
457
+	hdr.Mode = p.parseNumeric(v7.Mode())
458
+	hdr.Uid = int(p.parseNumeric(v7.UID()))
459
+	hdr.Gid = int(p.parseNumeric(v7.GID()))
460
+	hdr.Size = p.parseNumeric(v7.Size())
461
+	hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
462
+	hdr.Typeflag = v7.TypeFlag()[0]
463
+	hdr.Linkname = p.parseString(v7.LinkName())
464
+
465
+	// Unpack format specific fields.
466
+	if format > formatV7 {
467
+		ustar := tr.blk.USTAR()
468
+		hdr.Uname = p.parseString(ustar.UserName())
469
+		hdr.Gname = p.parseString(ustar.GroupName())
470
+		if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
471
+			hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
472
+			hdr.Devminor = p.parseNumeric(ustar.DevMinor())
473
+		}
474
+
475
+		var prefix string
476
+		switch format {
477
+		case formatUSTAR, formatGNU:
478
+			// TODO(dsnet): Do not use the prefix field for the GNU format!
479
+			// See golang.org/issues/12594
480
+			ustar := tr.blk.USTAR()
481
+			prefix = p.parseString(ustar.Prefix())
482
+		case formatSTAR:
483
+			star := tr.blk.STAR()
484
+			prefix = p.parseString(star.Prefix())
485
+			hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
486
+			hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
487
+		}
488
+		if len(prefix) > 0 {
489
+			hdr.Name = prefix + "/" + hdr.Name
490
+		}
491
+	}
492
+	return hdr, &tr.blk, p.err
493
+}
494
+
495
+// readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
496
+// The sparse map is stored in the tar header if it's small enough.
497
+// If it's larger than four entries, then one or more extension headers are used
498
+// to store the rest of the sparse map.
499
+//
500
+// The Header.Size does not reflect the size of any extended headers used.
501
+// Thus, this function will read from the raw io.Reader to fetch extra headers.
502
+// This method mutates blk in the process.
503
+func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) {
504
+	// Make sure that the input format is GNU.
505
+	// Unfortunately, the STAR format also has a sparse header format that uses
506
+	// the same type flag but has a completely different layout.
507
+	if blk.GetFormat() != formatGNU {
508
+		return nil, ErrHeader
509
+	}
510
+
511
+	var p parser
512
+	hdr.Size = p.parseNumeric(blk.GNU().RealSize())
513
+	if p.err != nil {
514
+		return nil, p.err
515
+	}
516
+	var s sparseArray = blk.GNU().Sparse()
517
+	var sp = make([]sparseEntry, 0, s.MaxEntries())
518
+	for {
519
+		for i := 0; i < s.MaxEntries(); i++ {
520
+			// This termination condition is identical to GNU and BSD tar.
521
+			if s.Entry(i).Offset()[0] == 0x00 {
522
+				break // Don't return, need to process extended headers (even if empty)
523
+			}
524
+			offset := p.parseNumeric(s.Entry(i).Offset())
525
+			numBytes := p.parseNumeric(s.Entry(i).NumBytes())
526
+			if p.err != nil {
527
+				return nil, p.err
528
+			}
529
+			sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
530
+		}
531
+
532
+		if s.IsExtended()[0] > 0 {
533
+			// There are more entries. Read an extension header and parse its entries.
534
+			if _, err := io.ReadFull(tr.r, blk[:]); err != nil {
535
+				if err == io.EOF {
536
+					err = io.ErrUnexpectedEOF
537
+				}
538
+				return nil, err
539
+			}
540
+			s = blk.Sparse()
541
+			continue
542
+		}
543
+		return sp, nil // Done
544
+	}
545
+}
546
+
547
+// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
548
+// version 1.0. The format of the sparse map consists of a series of
549
+// newline-terminated numeric fields. The first field is the number of entries
550
+// and is always present. Following this are the entries, consisting of two
551
+// fields (offset, numBytes). This function must stop reading at the end
552
+// boundary of the block containing the last newline.
553
+//
554
+// Note that the GNU manual says that numeric values should be encoded in octal
555
+// format. However, the GNU tar utility itself outputs these values in decimal.
556
+// As such, this library treats values as being encoded in decimal.
557
+func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
558
+	var cntNewline int64
559
+	var buf bytes.Buffer
560
+	var blk = make([]byte, blockSize)
561
+
562
+	// feedTokens copies data in numBlock chunks from r into buf until there are
563
+	// at least cnt newlines in buf. It will not read more blocks than needed.
564
+	var feedTokens = func(cnt int64) error {
565
+		for cntNewline < cnt {
566
+			if _, err := io.ReadFull(r, blk); err != nil {
567
+				if err == io.EOF {
568
+					err = io.ErrUnexpectedEOF
569
+				}
570
+				return err
571
+			}
572
+			buf.Write(blk)
573
+			for _, c := range blk {
574
+				if c == '\n' {
575
+					cntNewline++
576
+				}
577
+			}
578
+		}
579
+		return nil
580
+	}
581
+
582
+	// nextToken gets the next token delimited by a newline. This assumes that
583
+	// at least one newline exists in the buffer.
584
+	var nextToken = func() string {
585
+		cntNewline--
586
+		tok, _ := buf.ReadString('\n')
587
+		return tok[:len(tok)-1] // Cut off newline
588
+	}
589
+
590
+	// Parse for the number of entries.
591
+	// Use integer overflow resistant math to check this.
592
+	if err := feedTokens(1); err != nil {
593
+		return nil, err
594
+	}
595
+	numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
596
+	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
597
+		return nil, ErrHeader
598
+	}
599
+
600
+	// Parse for all member entries.
601
+	// numEntries is trusted after this since a potential attacker must have
602
+	// committed resources proportional to what this library used.
603
+	if err := feedTokens(2 * numEntries); err != nil {
604
+		return nil, err
605
+	}
606
+	sp := make([]sparseEntry, 0, numEntries)
607
+	for i := int64(0); i < numEntries; i++ {
608
+		offset, err := strconv.ParseInt(nextToken(), 10, 64)
609
+		if err != nil {
610
+			return nil, ErrHeader
611
+		}
612
+		numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
613
+		if err != nil {
614
+			return nil, ErrHeader
615
+		}
616
+		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
617
+	}
618
+	return sp, nil
619
+}
620
+
621
+// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
622
+// version 0.1. The sparse map is stored in the PAX headers.
623
+func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
624
+	// Get number of entries.
625
+	// Use integer overflow resistant math to check this.
626
+	numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
627
+	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
628
+	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
629
+		return nil, ErrHeader
630
+	}
631
+
632
+	// There should be two numbers in sparseMap for each entry.
633
+	sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
634
+	if int64(len(sparseMap)) != 2*numEntries {
635
+		return nil, ErrHeader
636
+	}
637
+
638
+	// Loop through the entries in the sparse map.
639
+	// numEntries is trusted now.
640
+	sp := make([]sparseEntry, 0, numEntries)
641
+	for i := int64(0); i < numEntries; i++ {
642
+		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
643
+		if err != nil {
644
+			return nil, ErrHeader
645
+		}
646
+		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
647
+		if err != nil {
648
+			return nil, ErrHeader
649
+		}
650
+		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
651
+	}
652
+	return sp, nil
653
+}
654
+
655
+// numBytes returns the number of bytes left to read in the current file's entry
656
+// in the tar archive, or 0 if there is no current file.
657
+func (tr *Reader) numBytes() int64 {
658
+	if tr.curr == nil {
659
+		// No current file, so no bytes
660
+		return 0
661
+	}
662
+	return tr.curr.numBytes()
663
+}
664
+
665
+// Read reads from the current entry in the tar archive.
666
+// It returns 0, io.EOF when it reaches the end of that entry,
667
+// until Next is called to advance to the next entry.
668
+//
669
+// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
670
+// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
671
+// the Header.Size claims.
672
+func (tr *Reader) Read(b []byte) (int, error) {
673
+	if tr.err != nil {
674
+		return 0, tr.err
675
+	}
676
+	if tr.curr == nil {
677
+		return 0, io.EOF
678
+	}
679
+
680
+	n, err := tr.curr.Read(b)
681
+	if err != nil && err != io.EOF {
682
+		tr.err = err
683
+	}
684
+	return n, err
685
+}
686
+
687
+func (rfr *regFileReader) Read(b []byte) (n int, err error) {
688
+	if rfr.nb == 0 {
689
+		// file consumed
690
+		return 0, io.EOF
691
+	}
692
+	if int64(len(b)) > rfr.nb {
693
+		b = b[0:rfr.nb]
694
+	}
695
+	n, err = rfr.r.Read(b)
696
+	rfr.nb -= int64(n)
697
+
698
+	if err == io.EOF && rfr.nb > 0 {
699
+		err = io.ErrUnexpectedEOF
700
+	}
701
+	return
702
+}
703
+
704
+// numBytes returns the number of bytes left to read in the file's data in the tar archive.
705
+func (rfr *regFileReader) numBytes() int64 {
706
+	return rfr.nb
707
+}
708
+
709
+// newSparseFileReader creates a new sparseFileReader, but validates all of the
710
+// sparse entries before doing so.
711
+func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
712
+	if total < 0 {
713
+		return nil, ErrHeader // Total size cannot be negative
714
+	}
715
+
716
+	// Validate all sparse entries. These are the same checks as performed by
717
+	// the BSD tar utility.
718
+	for i, s := range sp {
719
+		switch {
720
+		case s.offset < 0 || s.numBytes < 0:
721
+			return nil, ErrHeader // Negative values are never okay
722
+		case s.offset > math.MaxInt64-s.numBytes:
723
+			return nil, ErrHeader // Integer overflow with large length
724
+		case s.offset+s.numBytes > total:
725
+			return nil, ErrHeader // Region extends beyond the "real" size
726
+		case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
727
+			return nil, ErrHeader // Regions can't overlap and must be in order
728
+		}
729
+	}
730
+	return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
731
+}
732
+
733
+// readHole reads a sparse hole ending at endOffset.
734
+func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
735
+	n64 := endOffset - sfr.pos
736
+	if n64 > int64(len(b)) {
737
+		n64 = int64(len(b))
738
+	}
739
+	n := int(n64)
740
+	for i := 0; i < n; i++ {
741
+		b[i] = 0
742
+	}
743
+	sfr.pos += n64
744
+	return n
745
+}
746
+
747
+// Read reads the sparse file data in expanded form.
748
+func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
749
+	// Skip past all empty fragments.
750
+	for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
751
+		sfr.sp = sfr.sp[1:]
752
+	}
753
+
754
+	// If there are no more fragments, then it is possible that there
755
+	// is one last sparse hole.
756
+	if len(sfr.sp) == 0 {
757
+		// This behavior matches the BSD tar utility.
758
+		// However, GNU tar stops returning data even if sfr.total is unmet.
759
+		if sfr.pos < sfr.total {
760
+			return sfr.readHole(b, sfr.total), nil
761
+		}
762
+		return 0, io.EOF
763
+	}
764
+
765
+	// In front of a data fragment, so read a hole.
766
+	if sfr.pos < sfr.sp[0].offset {
767
+		return sfr.readHole(b, sfr.sp[0].offset), nil
768
+	}
769
+
770
+	// In a data fragment, so read from it.
771
+	// This math is overflow free since we verify that offset and numBytes can
772
+	// be safely added when creating the sparseFileReader.
773
+	endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
774
+	bytesLeft := endPos - sfr.pos                   // Bytes left in fragment
775
+	if int64(len(b)) > bytesLeft {
776
+		b = b[:bytesLeft]
777
+	}
778
+
779
+	n, err = sfr.rfr.Read(b)
780
+	sfr.pos += int64(n)
781
+	if err == io.EOF {
782
+		if sfr.pos < endPos {
783
+			err = io.ErrUnexpectedEOF // There was supposed to be more data
784
+		} else if sfr.pos < sfr.total {
785
+			err = nil // There is still an implicit sparse hole at the end
786
+		}
787
+	}
788
+
789
+	if sfr.pos == endPos {
790
+		sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
791
+	}
792
+	return n, err
793
+}
794
+
795
+// numBytes returns the number of bytes left to read in the sparse file's
796
+// sparse-encoded data in the tar archive.
797
+func (sfr *sparseFileReader) numBytes() int64 {
798
+	return sfr.rfr.numBytes()
799
+}
0 800
new file mode 100644
... ...
@@ -0,0 +1,20 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build linux dragonfly openbsd solaris
5
+
6
+package tar
7
+
8
+import (
9
+	"syscall"
10
+	"time"
11
+)
12
+
13
+func statAtime(st *syscall.Stat_t) time.Time {
14
+	return time.Unix(st.Atim.Unix())
15
+}
16
+
17
+func statCtime(st *syscall.Stat_t) time.Time {
18
+	return time.Unix(st.Ctim.Unix())
19
+}
0 20
new file mode 100644
... ...
@@ -0,0 +1,20 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build darwin freebsd netbsd
5
+
6
+package tar
7
+
8
+import (
9
+	"syscall"
10
+	"time"
11
+)
12
+
13
+func statAtime(st *syscall.Stat_t) time.Time {
14
+	return time.Unix(st.Atimespec.Unix())
15
+}
16
+
17
+func statCtime(st *syscall.Stat_t) time.Time {
18
+	return time.Unix(st.Ctimespec.Unix())
19
+}
0 20
new file mode 100644
... ...
@@ -0,0 +1,32 @@
0
+// Copyright 2012 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+// +build linux darwin dragonfly freebsd openbsd netbsd solaris
5
+
6
+package tar
7
+
8
+import (
9
+	"os"
10
+	"syscall"
11
+)
12
+
13
+func init() {
14
+	sysStat = statUnix
15
+}
16
+
17
+func statUnix(fi os.FileInfo, h *Header) error {
18
+	sys, ok := fi.Sys().(*syscall.Stat_t)
19
+	if !ok {
20
+		return nil
21
+	}
22
+	h.Uid = int(sys.Uid)
23
+	h.Gid = int(sys.Gid)
24
+	// TODO(bradfitz): populate username & group.  os/user
25
+	// doesn't cache LookupId lookups, and lacks group
26
+	// lookup functions.
27
+	h.AccessTime = statAtime(sys)
28
+	h.ChangeTime = statCtime(sys)
29
+	// TODO(bradfitz): major/minor device numbers?
30
+	return nil
31
+}
0 32
new file mode 100644
... ...
@@ -0,0 +1,252 @@
0
+// Copyright 2016 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+package tar
5
+
6
+import (
7
+	"bytes"
8
+	"fmt"
9
+	"strconv"
10
+	"strings"
11
+	"time"
12
+)
13
+
14
+func isASCII(s string) bool {
15
+	for _, c := range s {
16
+		if c >= 0x80 {
17
+			return false
18
+		}
19
+	}
20
+	return true
21
+}
22
+
23
+func toASCII(s string) string {
24
+	if isASCII(s) {
25
+		return s
26
+	}
27
+	var buf bytes.Buffer
28
+	for _, c := range s {
29
+		if c < 0x80 {
30
+			buf.WriteByte(byte(c))
31
+		}
32
+	}
33
+	return buf.String()
34
+}
35
+
36
+type parser struct {
37
+	err error // Last error seen
38
+}
39
+
40
+type formatter struct {
41
+	err error // Last error seen
42
+}
43
+
44
+// parseString parses bytes as a NUL-terminated C-style string.
45
+// If a NUL byte is not found then the whole slice is returned as a string.
46
+func (*parser) parseString(b []byte) string {
47
+	n := 0
48
+	for n < len(b) && b[n] != 0 {
49
+		n++
50
+	}
51
+	return string(b[0:n])
52
+}
53
+
54
+// Write s into b, terminating it with a NUL if there is room.
55
+func (f *formatter) formatString(b []byte, s string) {
56
+	if len(s) > len(b) {
57
+		f.err = ErrFieldTooLong
58
+		return
59
+	}
60
+	ascii := toASCII(s)
61
+	copy(b, ascii)
62
+	if len(ascii) < len(b) {
63
+		b[len(ascii)] = 0
64
+	}
65
+}
66
+
67
+// fitsInBase256 reports whether x can be encoded into n bytes using base-256
68
+// encoding. Unlike octal encoding, base-256 encoding does not require that the
69
+// string ends with a NUL character. Thus, all n bytes are available for output.
70
+//
71
+// If operating in binary mode, this assumes strict GNU binary mode; which means
72
+// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
73
+// equivalent to the sign bit in two's complement form.
74
+func fitsInBase256(n int, x int64) bool {
75
+	var binBits = uint(n-1) * 8
76
+	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
77
+}
78
+
79
+// parseNumeric parses the input as being encoded in either base-256 or octal.
80
+// This function may return negative numbers.
81
+// If parsing fails or an integer overflow occurs, err will be set.
82
+func (p *parser) parseNumeric(b []byte) int64 {
83
+	// Check for base-256 (binary) format first.
84
+	// If the first bit is set, then all following bits constitute a two's
85
+	// complement encoded number in big-endian byte order.
86
+	if len(b) > 0 && b[0]&0x80 != 0 {
87
+		// Handling negative numbers relies on the following identity:
88
+		//	-a-1 == ^a
89
+		//
90
+		// If the number is negative, we use an inversion mask to invert the
91
+		// data bytes and treat the value as an unsigned number.
92
+		var inv byte // 0x00 if positive or zero, 0xff if negative
93
+		if b[0]&0x40 != 0 {
94
+			inv = 0xff
95
+		}
96
+
97
+		var x uint64
98
+		for i, c := range b {
99
+			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
100
+			if i == 0 {
101
+				c &= 0x7f // Ignore signal bit in first byte
102
+			}
103
+			if (x >> 56) > 0 {
104
+				p.err = ErrHeader // Integer overflow
105
+				return 0
106
+			}
107
+			x = x<<8 | uint64(c)
108
+		}
109
+		if (x >> 63) > 0 {
110
+			p.err = ErrHeader // Integer overflow
111
+			return 0
112
+		}
113
+		if inv == 0xff {
114
+			return ^int64(x)
115
+		}
116
+		return int64(x)
117
+	}
118
+
119
+	// Normal case is base-8 (octal) format.
120
+	return p.parseOctal(b)
121
+}
122
+
123
+// Write x into b, as binary (GNUtar/star extension).
124
+func (f *formatter) formatNumeric(b []byte, x int64) {
125
+	if fitsInBase256(len(b), x) {
126
+		for i := len(b) - 1; i >= 0; i-- {
127
+			b[i] = byte(x)
128
+			x >>= 8
129
+		}
130
+		b[0] |= 0x80 // Highest bit indicates binary format
131
+		return
132
+	}
133
+
134
+	f.formatOctal(b, 0) // Last resort, just write zero
135
+	f.err = ErrFieldTooLong
136
+}
137
+
138
+func (p *parser) parseOctal(b []byte) int64 {
139
+	// Because unused fields are filled with NULs, we need
140
+	// to skip leading NULs. Fields may also be padded with
141
+	// spaces or NULs.
142
+	// So we remove leading and trailing NULs and spaces to
143
+	// be sure.
144
+	b = bytes.Trim(b, " \x00")
145
+
146
+	if len(b) == 0 {
147
+		return 0
148
+	}
149
+	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
150
+	if perr != nil {
151
+		p.err = ErrHeader
152
+	}
153
+	return int64(x)
154
+}
155
+
156
+func (f *formatter) formatOctal(b []byte, x int64) {
157
+	s := strconv.FormatInt(x, 8)
158
+	// Add leading zeros, but leave room for a NUL.
159
+	if n := len(b) - len(s) - 1; n > 0 {
160
+		s = strings.Repeat("0", n) + s
161
+	}
162
+	f.formatString(b, s)
163
+}
164
+
165
+// parsePAXTime takes a string of the form %d.%d as described in the PAX
166
+// specification. Note that this implementation allows for negative timestamps,
167
+// which is allowed for by the PAX specification, but not always portable.
168
+func parsePAXTime(s string) (time.Time, error) {
169
+	const maxNanoSecondDigits = 9
170
+
171
+	// Split string into seconds and sub-seconds parts.
172
+	ss, sn := s, ""
173
+	if pos := strings.IndexByte(s, '.'); pos >= 0 {
174
+		ss, sn = s[:pos], s[pos+1:]
175
+	}
176
+
177
+	// Parse the seconds.
178
+	secs, err := strconv.ParseInt(ss, 10, 64)
179
+	if err != nil {
180
+		return time.Time{}, ErrHeader
181
+	}
182
+	if len(sn) == 0 {
183
+		return time.Unix(secs, 0), nil // No sub-second values
184
+	}
185
+
186
+	// Parse the nanoseconds.
187
+	if strings.Trim(sn, "0123456789") != "" {
188
+		return time.Time{}, ErrHeader
189
+	}
190
+	if len(sn) < maxNanoSecondDigits {
191
+		sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
192
+	} else {
193
+		sn = sn[:maxNanoSecondDigits] // Right truncate
194
+	}
195
+	nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
196
+	if len(ss) > 0 && ss[0] == '-' {
197
+		return time.Unix(secs, -1*int64(nsecs)), nil // Negative correction
198
+	}
199
+	return time.Unix(secs, int64(nsecs)), nil
200
+}
201
+
202
+// TODO(dsnet): Implement formatPAXTime.
203
+
204
+// parsePAXRecord parses the input PAX record string into a key-value pair.
205
+// If parsing is successful, it will slice off the currently read record and
206
+// return the remainder as r.
207
+//
208
+// A PAX record is of the following form:
209
+//	"%d %s=%s\n" % (size, key, value)
210
+func parsePAXRecord(s string) (k, v, r string, err error) {
211
+	// The size field ends at the first space.
212
+	sp := strings.IndexByte(s, ' ')
213
+	if sp == -1 {
214
+		return "", "", s, ErrHeader
215
+	}
216
+
217
+	// Parse the first token as a decimal integer.
218
+	n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
219
+	if perr != nil || n < 5 || int64(len(s)) < n {
220
+		return "", "", s, ErrHeader
221
+	}
222
+
223
+	// Extract everything between the space and the final newline.
224
+	rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
225
+	if nl != "\n" {
226
+		return "", "", s, ErrHeader
227
+	}
228
+
229
+	// The first equals separates the key from the value.
230
+	eq := strings.IndexByte(rec, '=')
231
+	if eq == -1 {
232
+		return "", "", s, ErrHeader
233
+	}
234
+	return rec[:eq], rec[eq+1:], rem, nil
235
+}
236
+
237
+// formatPAXRecord formats a single PAX record, prefixing it with the
238
+// appropriate length.
239
+func formatPAXRecord(k, v string) string {
240
+	const padding = 3 // Extra padding for ' ', '=', and '\n'
241
+	size := len(k) + len(v) + padding
242
+	size += len(strconv.Itoa(size))
243
+	record := fmt.Sprintf("%d %s=%s\n", size, k, v)
244
+
245
+	// Final adjustment if adding size field increased the record size.
246
+	if len(record) != size {
247
+		size = len(record)
248
+		record = fmt.Sprintf("%d %s=%s\n", size, k, v)
249
+	}
250
+	return record
251
+}
0 252
new file mode 100644
... ...
@@ -0,0 +1,364 @@
0
+// Copyright 2009 The Go Authors. All rights reserved.
1
+// Use of this source code is governed by a BSD-style
2
+// license that can be found in the LICENSE file.
3
+
4
+package tar
5
+
6
+// TODO(dsymonds):
7
+// - catch more errors (no first header, etc.)
8
+
9
+import (
10
+	"bytes"
11
+	"errors"
12
+	"fmt"
13
+	"io"
14
+	"path"
15
+	"sort"
16
+	"strconv"
17
+	"strings"
18
+	"time"
19
+)
20
+
21
+var (
22
+	ErrWriteTooLong    = errors.New("archive/tar: write too long")
23
+	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
24
+	ErrWriteAfterClose = errors.New("archive/tar: write after close")
25
+	errInvalidHeader   = errors.New("archive/tar: header field too long or contains invalid values")
26
+)
27
+
28
+// A Writer provides sequential writing of a tar archive in POSIX.1 format.
29
+// A tar archive consists of a sequence of files.
30
+// Call WriteHeader to begin a new file, and then call Write to supply that file's data,
31
+// writing at most hdr.Size bytes in total.
32
+type Writer struct {
33
+	w          io.Writer
34
+	err        error
35
+	nb         int64 // number of unwritten bytes for current file entry
36
+	pad        int64 // amount of padding to write after current file entry
37
+	closed     bool
38
+	usedBinary bool  // whether the binary numeric field extension was used
39
+	preferPax  bool  // use PAX header instead of binary numeric header
40
+	hdrBuff    block // buffer to use in writeHeader when writing a regular header
41
+	paxHdrBuff block // buffer to use in writeHeader when writing a PAX header
42
+}
43
+
44
+// NewWriter creates a new Writer writing to w.
45
+func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
46
+
47
+// Flush finishes writing the current file (optional).
48
+func (tw *Writer) Flush() error {
49
+	if tw.nb > 0 {
50
+		tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
51
+		return tw.err
52
+	}
53
+
54
+	n := tw.nb + tw.pad
55
+	for n > 0 && tw.err == nil {
56
+		nr := n
57
+		if nr > blockSize {
58
+			nr = blockSize
59
+		}
60
+		var nw int
61
+		nw, tw.err = tw.w.Write(zeroBlock[0:nr])
62
+		n -= int64(nw)
63
+	}
64
+	tw.nb = 0
65
+	tw.pad = 0
66
+	return tw.err
67
+}
68
+
69
+var (
70
+	minTime = time.Unix(0, 0)
71
+	// There is room for 11 octal digits (33 bits) of mtime.
72
+	maxTime = minTime.Add((1<<33 - 1) * time.Second)
73
+)
74
+
75
+// WriteHeader writes hdr and prepares to accept the file's contents.
76
+// WriteHeader calls Flush if it is not the first header.
77
+// Calling after a Close will return ErrWriteAfterClose.
78
+func (tw *Writer) WriteHeader(hdr *Header) error {
79
+	return tw.writeHeader(hdr, true)
80
+}
81
+
82
+// WriteHeader writes hdr and prepares to accept the file's contents.
83
+// WriteHeader calls Flush if it is not the first header.
84
+// Calling after a Close will return ErrWriteAfterClose.
85
+// As this method is called internally by writePax header to allow it to
86
+// suppress writing the pax header.
87
+func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
88
+	if tw.closed {
89
+		return ErrWriteAfterClose
90
+	}
91
+	if tw.err == nil {
92
+		tw.Flush()
93
+	}
94
+	if tw.err != nil {
95
+		return tw.err
96
+	}
97
+
98
+	// a map to hold pax header records, if any are needed
99
+	paxHeaders := make(map[string]string)
100
+
101
+	// TODO(dsnet): we might want to use PAX headers for
102
+	// subsecond time resolution, but for now let's just capture
103
+	// too long fields or non ascii characters
104
+
105
+	// We need to select which scratch buffer to use carefully,
106
+	// since this method is called recursively to write PAX headers.
107
+	// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
108
+	// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
109
+	// already being used by the non-recursive call, so we must use paxHdrBuff.
110
+	header := &tw.hdrBuff
111
+	if !allowPax {
112
+		header = &tw.paxHdrBuff
113
+	}
114
+	copy(header[:], zeroBlock[:])
115
+
116
+	// Wrappers around formatter that automatically sets paxHeaders if the
117
+	// argument extends beyond the capacity of the input byte slice.
118
+	var f formatter
119
+	var formatString = func(b []byte, s string, paxKeyword string) {
120
+		needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s)
121
+		if needsPaxHeader {
122
+			paxHeaders[paxKeyword] = s
123
+			return
124
+		}
125
+		f.formatString(b, s)
126
+	}
127
+	var formatNumeric = func(b []byte, x int64, paxKeyword string) {
128
+		// Try octal first.
129
+		s := strconv.FormatInt(x, 8)
130
+		if len(s) < len(b) {
131
+			f.formatOctal(b, x)
132
+			return
133
+		}
134
+
135
+		// If it is too long for octal, and PAX is preferred, use a PAX header.
136
+		if paxKeyword != paxNone && tw.preferPax {
137
+			f.formatOctal(b, 0)
138
+			s := strconv.FormatInt(x, 10)
139
+			paxHeaders[paxKeyword] = s
140
+			return
141
+		}
142
+
143
+		tw.usedBinary = true
144
+		f.formatNumeric(b, x)
145
+	}
146
+
147
+	// Handle out of range ModTime carefully.
148
+	var modTime int64
149
+	if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
150
+		modTime = hdr.ModTime.Unix()
151
+	}
152
+
153
+	v7 := header.V7()
154
+	formatString(v7.Name(), hdr.Name, paxPath)
155
+	// TODO(dsnet): The GNU format permits the mode field to be encoded in
156
+	// base-256 format. Thus, we can use formatNumeric instead of formatOctal.
157
+	f.formatOctal(v7.Mode(), hdr.Mode)
158
+	formatNumeric(v7.UID(), int64(hdr.Uid), paxUid)
159
+	formatNumeric(v7.GID(), int64(hdr.Gid), paxGid)
160
+	formatNumeric(v7.Size(), hdr.Size, paxSize)
161
+	// TODO(dsnet): Consider using PAX for finer time granularity.
162
+	formatNumeric(v7.ModTime(), modTime, paxNone)
163
+	v7.TypeFlag()[0] = hdr.Typeflag
164
+	formatString(v7.LinkName(), hdr.Linkname, paxLinkpath)
165
+
166
+	ustar := header.USTAR()
167
+	formatString(ustar.UserName(), hdr.Uname, paxUname)
168
+	formatString(ustar.GroupName(), hdr.Gname, paxGname)
169
+	formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone)
170
+	formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone)
171
+
172
+	// TODO(dsnet): The logic surrounding the prefix field is broken when trying
173
+	// to encode the header as GNU format. The challenge with the current logic
174
+	// is that we are unsure what format we are using at any given moment until
175
+	// we have processed *all* of the fields. The problem is that by the time
176
+	// all fields have been processed, some work has already been done to handle
177
+	// each field under the assumption that it is for one given format or
178
+	// another. In some situations, this causes the Writer to be confused and
179
+	// encode a prefix field when the format being used is GNU. Thus, producing
180
+	// an invalid tar file.
181
+	//
182
+	// As a short-term fix, we disable the logic to use the prefix field, which
183
+	// will force the badly generated GNU files to become encoded as being
184
+	// the PAX format.
185
+	//
186
+	// As an alternative fix, we could hard-code preferPax to be true. However,
187
+	// this is problematic for the following reasons:
188
+	//	* The preferPax functionality is not tested at all.
189
+	//	* This can result in headers that try to use both the GNU and PAX
190
+	//	features at the same time, which is also wrong.
191
+	//
192
+	// The proper fix for this is to use a two-pass method:
193
+	//	* The first pass simply determines what set of formats can possibly
194
+	//	encode the given header.
195
+	//	* The second pass actually encodes the header as that given format
196
+	//	without worrying about violating the format.
197
+	//
198
+	// See the following:
199
+	//	https://golang.org/issue/12594
200
+	//	https://golang.org/issue/17630
201
+	//	https://golang.org/issue/9683
202
+	const usePrefix = false
203
+
204
+	// try to use a ustar header when only the name is too long
205
+	_, paxPathUsed := paxHeaders[paxPath]
206
+	if usePrefix && !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
207
+		prefix, suffix, ok := splitUSTARPath(hdr.Name)
208
+		if ok {
209
+			// Since we can encode in USTAR format, disable PAX header.
210
+			delete(paxHeaders, paxPath)
211
+
212
+			// Update the path fields
213
+			formatString(v7.Name(), suffix, paxNone)
214
+			formatString(ustar.Prefix(), prefix, paxNone)
215
+		}
216
+	}
217
+
218
+	if tw.usedBinary {
219
+		header.SetFormat(formatGNU)
220
+	} else {
221
+		header.SetFormat(formatUSTAR)
222
+	}
223
+
224
+	// Check if there were any formatting errors.
225
+	if f.err != nil {
226
+		tw.err = f.err
227
+		return tw.err
228
+	}
229
+
230
+	if allowPax {
231
+		for k, v := range hdr.Xattrs {
232
+			paxHeaders[paxXattr+k] = v
233
+		}
234
+	}
235
+
236
+	if len(paxHeaders) > 0 {
237
+		if !allowPax {
238
+			return errInvalidHeader
239
+		}
240
+		if err := tw.writePAXHeader(hdr, paxHeaders); err != nil {
241
+			return err
242
+		}
243
+	}
244
+	tw.nb = hdr.Size
245
+	tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
246
+
247
+	_, tw.err = tw.w.Write(header[:])
248
+	return tw.err
249
+}
250
+
251
+// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
252
+// If the path is not splittable, then it will return ("", "", false).
253
+func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
254
+	length := len(name)
255
+	if length <= nameSize || !isASCII(name) {
256
+		return "", "", false
257
+	} else if length > prefixSize+1 {
258
+		length = prefixSize + 1
259
+	} else if name[length-1] == '/' {
260
+		length--
261
+	}
262
+
263
+	i := strings.LastIndex(name[:length], "/")
264
+	nlen := len(name) - i - 1 // nlen is length of suffix
265
+	plen := i                 // plen is length of prefix
266
+	if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize {
267
+		return "", "", false
268
+	}
269
+	return name[:i], name[i+1:], true
270
+}
271
+
272
+// writePaxHeader writes an extended pax header to the
273
+// archive.
274
+func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error {
275
+	// Prepare extended header
276
+	ext := new(Header)
277
+	ext.Typeflag = TypeXHeader
278
+	// Setting ModTime is required for reader parsing to
279
+	// succeed, and seems harmless enough.
280
+	ext.ModTime = hdr.ModTime
281
+	// The spec asks that we namespace our pseudo files
282
+	// with the current pid. However, this results in differing outputs
283
+	// for identical inputs. As such, the constant 0 is now used instead.
284
+	// golang.org/issue/12358
285
+	dir, file := path.Split(hdr.Name)
286
+	fullName := path.Join(dir, "PaxHeaders.0", file)
287
+
288
+	ascii := toASCII(fullName)
289
+	if len(ascii) > nameSize {
290
+		ascii = ascii[:nameSize]
291
+	}
292
+	ext.Name = ascii
293
+	// Construct the body
294
+	var buf bytes.Buffer
295
+
296
+	// Keys are sorted before writing to body to allow deterministic output.
297
+	keys := make([]string, 0, len(paxHeaders))
298
+	for k := range paxHeaders {
299
+		keys = append(keys, k)
300
+	}
301
+	sort.Strings(keys)
302
+
303
+	for _, k := range keys {
304
+		fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k]))
305
+	}
306
+
307
+	ext.Size = int64(len(buf.Bytes()))
308
+	if err := tw.writeHeader(ext, false); err != nil {
309
+		return err
310
+	}
311
+	if _, err := tw.Write(buf.Bytes()); err != nil {
312
+		return err
313
+	}
314
+	if err := tw.Flush(); err != nil {
315
+		return err
316
+	}
317
+	return nil
318
+}
319
+
320
+// Write writes to the current entry in the tar archive.
321
+// Write returns the error ErrWriteTooLong if more than
322
+// hdr.Size bytes are written after WriteHeader.
323
+func (tw *Writer) Write(b []byte) (n int, err error) {
324
+	if tw.closed {
325
+		err = ErrWriteAfterClose
326
+		return
327
+	}
328
+	overwrite := false
329
+	if int64(len(b)) > tw.nb {
330
+		b = b[0:tw.nb]
331
+		overwrite = true
332
+	}
333
+	n, err = tw.w.Write(b)
334
+	tw.nb -= int64(n)
335
+	if err == nil && overwrite {
336
+		err = ErrWriteTooLong
337
+		return
338
+	}
339
+	tw.err = err
340
+	return
341
+}
342
+
343
+// Close closes the tar archive, flushing any unwritten
344
+// data to the underlying writer.
345
+func (tw *Writer) Close() error {
346
+	if tw.err != nil || tw.closed {
347
+		return tw.err
348
+	}
349
+	tw.Flush()
350
+	tw.closed = true
351
+	if tw.err != nil {
352
+		return tw.err
353
+	}
354
+
355
+	// trailer: two zero blocks
356
+	for i := 0; i < 2; i++ {
357
+		_, tw.err = tw.w.Write(zeroBlock[:])
358
+		if tw.err != nil {
359
+			break
360
+		}
361
+	}
362
+	return tw.err
363
+}