This addresses handling of non-utf8 file names, namely iso-8859-1.
https://github.com/docker/docker/issues/16516
Reported-by: @kwk
Signed-off-by: Vincent Batts <vbatts@redhat.com>
| ... | ... |
@@ -35,7 +35,7 @@ clone git github.com/hashicorp/consul v0.5.2 |
| 35 | 35 |
|
| 36 | 36 |
# get graph and distribution packages |
| 37 | 37 |
clone git github.com/docker/distribution ec87e9b6971d831f0eff752ddb54fb64693e51cd # docker/1.8 branch |
| 38 |
-clone git github.com/vbatts/tar-split v0.9.6 |
|
| 38 |
+clone git github.com/vbatts/tar-split v0.9.10 |
|
| 39 | 39 |
|
| 40 | 40 |
clone git github.com/docker/notary ac05822d7d71ef077df3fc24f506672282a1feea |
| 41 | 41 |
clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce |
| ... | ... |
@@ -139,8 +139,8 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) {
|
| 139 | 139 |
} |
| 140 | 140 |
|
| 141 | 141 |
switch fi.h.Typeflag {
|
| 142 |
- case TypeLink, TypeSymlink: |
|
| 143 |
- // hard link, symbolic link |
|
| 142 |
+ case TypeSymlink: |
|
| 143 |
+ // symbolic link |
|
| 144 | 144 |
mode |= os.ModeSymlink |
| 145 | 145 |
case TypeChar: |
| 146 | 146 |
// character device node |
| ... | ... |
@@ -249,6 +249,30 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
|
| 249 | 249 |
if fm&os.ModeSticky != 0 {
|
| 250 | 250 |
h.Mode |= c_ISVTX |
| 251 | 251 |
} |
| 252 |
+ // If possible, populate additional fields from OS-specific |
|
| 253 |
+ // FileInfo fields. |
|
| 254 |
+ if sys, ok := fi.Sys().(*Header); ok {
|
|
| 255 |
+ // This FileInfo came from a Header (not the OS). Use the |
|
| 256 |
+ // original Header to populate all remaining fields. |
|
| 257 |
+ h.Uid = sys.Uid |
|
| 258 |
+ h.Gid = sys.Gid |
|
| 259 |
+ h.Uname = sys.Uname |
|
| 260 |
+ h.Gname = sys.Gname |
|
| 261 |
+ h.AccessTime = sys.AccessTime |
|
| 262 |
+ h.ChangeTime = sys.ChangeTime |
|
| 263 |
+ if sys.Xattrs != nil {
|
|
| 264 |
+ h.Xattrs = make(map[string]string) |
|
| 265 |
+ for k, v := range sys.Xattrs {
|
|
| 266 |
+ h.Xattrs[k] = v |
|
| 267 |
+ } |
|
| 268 |
+ } |
|
| 269 |
+ if sys.Typeflag == TypeLink {
|
|
| 270 |
+ // hard link |
|
| 271 |
+ h.Typeflag = TypeLink |
|
| 272 |
+ h.Size = 0 |
|
| 273 |
+ h.Linkname = sys.Linkname |
|
| 274 |
+ } |
|
| 275 |
+ } |
|
| 252 | 276 |
if sysStat != nil {
|
| 253 | 277 |
return h, sysStat(fi, h) |
| 254 | 278 |
} |
| ... | ... |
@@ -138,7 +138,13 @@ func (tr *Reader) Next() (*Header, error) {
|
| 138 | 138 |
// We actually read the whole file, |
| 139 | 139 |
// but this skips alignment padding |
| 140 | 140 |
tr.skipUnread() |
| 141 |
+ if tr.err != nil {
|
|
| 142 |
+ return nil, tr.err |
|
| 143 |
+ } |
|
| 141 | 144 |
hdr = tr.readHeader() |
| 145 |
+ if hdr == nil {
|
|
| 146 |
+ return nil, tr.err |
|
| 147 |
+ } |
|
| 142 | 148 |
mergePAX(hdr, headers) |
| 143 | 149 |
|
| 144 | 150 |
// Check for a PAX format sparse file |
| ... | ... |
@@ -397,7 +403,7 @@ func parsePAX(r io.Reader) (map[string]string, error) {
|
| 397 | 397 |
} |
| 398 | 398 |
// Parse the first token as a decimal integer. |
| 399 | 399 |
n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) |
| 400 |
- if err != nil {
|
|
| 400 |
+ if err != nil || n < 5 || int64(len(buf)) < n {
|
|
| 401 | 401 |
return nil, ErrHeader |
| 402 | 402 |
} |
| 403 | 403 |
// Extract everything between the decimal and the n -1 on the |
| ... | ... |
@@ -553,6 +559,10 @@ func (tr *Reader) readHeader() *Header {
|
| 553 | 553 |
hdr.Uid = int(tr.octal(s.next(8))) |
| 554 | 554 |
hdr.Gid = int(tr.octal(s.next(8))) |
| 555 | 555 |
hdr.Size = tr.octal(s.next(12)) |
| 556 |
+ if hdr.Size < 0 {
|
|
| 557 |
+ tr.err = ErrHeader |
|
| 558 |
+ return nil |
|
| 559 |
+ } |
|
| 556 | 560 |
hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) |
| 557 | 561 |
s.next(8) // chksum |
| 558 | 562 |
hdr.Typeflag = s.next(1)[0] |
| ... | ... |
@@ -895,6 +905,9 @@ func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
|
| 895 | 895 |
// Otherwise, we're at the end of the file |
| 896 | 896 |
return 0, io.EOF |
| 897 | 897 |
} |
| 898 |
+ if sfr.tot < sfr.sp[0].offset {
|
|
| 899 |
+ return 0, io.ErrUnexpectedEOF |
|
| 900 |
+ } |
|
| 898 | 901 |
if sfr.pos < sfr.sp[0].offset {
|
| 899 | 902 |
// We're in a hole |
| 900 | 903 |
n = sfr.readHole(b, sfr.sp[0].offset) |
| ... | ... |
@@ -355,7 +355,7 @@ func paxHeader(msg string) string {
|
| 355 | 355 |
// hdr.Size bytes are written after WriteHeader. |
| 356 | 356 |
func (tw *Writer) Write(b []byte) (n int, err error) {
|
| 357 | 357 |
if tw.closed {
|
| 358 |
- err = ErrWriteTooLong |
|
| 358 |
+ err = ErrWriteAfterClose |
|
| 359 | 359 |
return |
| 360 | 360 |
} |
| 361 | 361 |
overwrite := false |
| ... | ... |
@@ -39,7 +39,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose |
| 39 | 39 |
if entry.Size == 0 {
|
| 40 | 40 |
continue |
| 41 | 41 |
} |
| 42 |
- fh, err := fg.Get(entry.Name) |
|
| 42 |
+ fh, err := fg.Get(entry.GetName()) |
|
| 43 | 43 |
if err != nil {
|
| 44 | 44 |
pw.CloseWithError(err) |
| 45 | 45 |
return |
| ... | ... |
@@ -56,7 +56,7 @@ func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadClose |
| 56 | 56 |
// but since it's coming through the PipeReader, the context of |
| 57 | 57 |
// _which_ file would be lost... |
| 58 | 58 |
fh.Close() |
| 59 |
- pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name))
|
|
| 59 |
+ pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.GetName()))
|
|
| 60 | 60 |
return |
| 61 | 61 |
} |
| 62 | 62 |
fh.Close() |
| ... | ... |
@@ -92,13 +92,16 @@ func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io |
| 92 | 92 |
} |
| 93 | 93 |
} |
| 94 | 94 |
|
| 95 |
- // File entries added, regardless of size |
|
| 96 |
- _, err = p.AddEntry(storage.Entry{
|
|
| 95 |
+ entry := storage.Entry{
|
|
| 97 | 96 |
Type: storage.FileType, |
| 98 |
- Name: hdr.Name, |
|
| 99 | 97 |
Size: hdr.Size, |
| 100 | 98 |
Payload: csum, |
| 101 |
- }) |
|
| 99 |
+ } |
|
| 100 |
+ // For proper marshalling of non-utf8 characters |
|
| 101 |
+ entry.SetName(hdr.Name) |
|
| 102 |
+ |
|
| 103 |
+ // File entries added, regardless of size |
|
| 104 |
+ _, err = p.AddEntry(entry) |
|
| 102 | 105 |
if err != nil {
|
| 103 | 106 |
pW.CloseWithError(err) |
| 104 | 107 |
return |
| ... | ... |
@@ -1,5 +1,7 @@ |
| 1 | 1 |
package storage |
| 2 | 2 |
|
| 3 |
+import "unicode/utf8" |
|
| 4 |
+ |
|
| 3 | 5 |
// Entries is for sorting by Position |
| 4 | 6 |
type Entries []Entry |
| 5 | 7 |
|
| ... | ... |
@@ -33,7 +35,44 @@ const ( |
| 33 | 33 |
type Entry struct {
|
| 34 | 34 |
Type Type `json:"type"` |
| 35 | 35 |
Name string `json:"name,omitempty"` |
| 36 |
+ NameRaw []byte `json:"name_raw,omitempty"` |
|
| 36 | 37 |
Size int64 `json:"size,omitempty"` |
| 37 | 38 |
Payload []byte `json:"payload"` // SegmentType stores payload here; FileType stores crc64 checksum here; |
| 38 | 39 |
Position int `json:"position"` |
| 39 | 40 |
} |
| 41 |
+ |
|
| 42 |
+// SetName will check name for valid UTF-8 string, and set the appropriate |
|
| 43 |
+// field. See https://github.com/vbatts/tar-split/issues/17 |
|
| 44 |
+func (e *Entry) SetName(name string) {
|
|
| 45 |
+ if utf8.ValidString(name) {
|
|
| 46 |
+ e.Name = name |
|
| 47 |
+ } else {
|
|
| 48 |
+ e.NameRaw = []byte(name) |
|
| 49 |
+ } |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 52 |
+// SetNameBytes will check name for valid UTF-8 string, and set the appropriate |
|
| 53 |
+// field |
|
| 54 |
+func (e *Entry) SetNameBytes(name []byte) {
|
|
| 55 |
+ if utf8.Valid(name) {
|
|
| 56 |
+ e.Name = string(name) |
|
| 57 |
+ } else {
|
|
| 58 |
+ e.NameRaw = name |
|
| 59 |
+ } |
|
| 60 |
+} |
|
| 61 |
+ |
|
| 62 |
+// GetName returns the string for the entry's name, regardless of the field stored in |
|
| 63 |
+func (e *Entry) GetName() string {
|
|
| 64 |
+ if len(e.NameRaw) > 0 {
|
|
| 65 |
+ return string(e.NameRaw) |
|
| 66 |
+ } |
|
| 67 |
+ return e.Name |
|
| 68 |
+} |
|
| 69 |
+ |
|
| 70 |
+// GetNameBytes returns the bytes for the entry's name, regardless of the field stored in |
|
| 71 |
+func (e *Entry) GetNameBytes() []byte {
|
|
| 72 |
+ if len(e.NameRaw) > 0 {
|
|
| 73 |
+ return e.NameRaw |
|
| 74 |
+ } |
|
| 75 |
+ return []byte(e.Name) |
|
| 76 |
+} |
| ... | ... |
@@ -6,6 +6,7 @@ import ( |
| 6 | 6 |
"errors" |
| 7 | 7 |
"io" |
| 8 | 8 |
"path/filepath" |
| 9 |
+ "unicode/utf8" |
|
| 9 | 10 |
) |
| 10 | 11 |
|
| 11 | 12 |
// ErrDuplicatePath occurs when a tar archive has more than one entry for the |
| ... | ... |
@@ -61,7 +62,7 @@ func (jup *jsonUnpacker) Next() (*Entry, error) {
|
| 61 | 61 |
|
| 62 | 62 |
// check for dup name |
| 63 | 63 |
if e.Type == FileType {
|
| 64 |
- cName := filepath.Clean(e.Name) |
|
| 64 |
+ cName := filepath.Clean(e.GetName()) |
|
| 65 | 65 |
if _, ok := jup.seen[cName]; ok {
|
| 66 | 66 |
return nil, ErrDuplicatePath |
| 67 | 67 |
} |
| ... | ... |
@@ -93,9 +94,17 @@ type jsonPacker struct {
|
| 93 | 93 |
type seenNames map[string]struct{}
|
| 94 | 94 |
|
| 95 | 95 |
func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
|
| 96 |
+ // if Name is not valid utf8, switch it to raw first. |
|
| 97 |
+ if e.Name != "" {
|
|
| 98 |
+ if !utf8.ValidString(e.Name) {
|
|
| 99 |
+ e.NameRaw = []byte(e.Name) |
|
| 100 |
+ e.Name = "" |
|
| 101 |
+ } |
|
| 102 |
+ } |
|
| 103 |
+ |
|
| 96 | 104 |
// check early for dup name |
| 97 | 105 |
if e.Type == FileType {
|
| 98 |
- cName := filepath.Clean(e.Name) |
|
| 106 |
+ cName := filepath.Clean(e.GetName()) |
|
| 99 | 107 |
if _, ok := jp.seen[cName]; ok {
|
| 100 | 108 |
return -1, ErrDuplicatePath |
| 101 | 109 |
} |