Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(131)

Delta Between Two Patch Sets: src/pkg/archive/tar/writer.go

Issue 6700047: code review 6700047: archive/tar: read/write extended pax/gnu tar archives (Closed)
Left Patch Set: diff -r 39b31d81b947 https://code.google.com/p/go Created 11 years, 5 months ago
Right Patch Set: diff -r 439cb8bad388 https://code.google.com/p/go Created 11 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/archive/tar/testdata/ustar.tar ('k') | src/pkg/archive/tar/writer_test.go » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2009 The Go Authors. All rights reserved. 1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 package tar 5 package tar
6 6
7 // TODO(dsymonds): 7 // TODO(dsymonds):
8 // - catch more errors (no first header, etc.) 8 // - catch more errors (no first header, etc.)
9 9
10 import ( 10 import (
11 "bytes" 11 "bytes"
12 "errors" 12 "errors"
13 "fmt" 13 "fmt"
14 "io" 14 "io"
15 "os" 15 "os"
16 » "path/filepath" 16 » "path"
17 "strconv" 17 "strconv"
18 "strings"
19 "time"
18 ) 20 )
19 21
20 var ( 22 var (
21 ErrWriteTooLong = errors.New("archive/tar: write too long") 23 ErrWriteTooLong = errors.New("archive/tar: write too long")
22 ErrFieldTooLong = errors.New("archive/tar: header field too long") 24 ErrFieldTooLong = errors.New("archive/tar: header field too long")
23 ErrWriteAfterClose = errors.New("archive/tar: write after close") 25 ErrWriteAfterClose = errors.New("archive/tar: write after close")
26 errNameTooLong = errors.New("archive/tar: name too long")
24 ) 27 )
25
26 // The pid of the current process. Assumes go doesn't fork.
dsymonds 2012/10/31 02:44:37 Don't make this assumption. Just call os.Getpid()
shanemhansen 2012/10/31 04:26:31 No problem. Someone asked me to cache the value, I
27 var currentPid int
28
29 func cachedPid() int {
30 if currentPid == 0 {
31 currentPid = os.Getpid()
32 }
33 return currentPid
34 }
35 28
36 // A Writer provides sequential writing of a tar archive in POSIX.1 format. 29 // A Writer provides sequential writing of a tar archive in POSIX.1 format.
37 // A tar archive consists of a sequence of files. 30 // A tar archive consists of a sequence of files.
38 // Call WriteHeader to begin a new file, and then call Write to supply that file 's data, 31 // Call WriteHeader to begin a new file, and then call Write to supply that file 's data,
39 // writing at most hdr.Size bytes in total. 32 // writing at most hdr.Size bytes in total.
40 //
41 // Example:
42 // tw := tar.NewWriter(w)
43 // hdr := new(tar.Header)
44 // hdr.Size = length of data in bytes
45 // // populate other hdr fields as desired
46 // if err := tw.WriteHeader(hdr); err != nil {
47 // // handle error
48 // }
49 // io.Copy(tw, data)
50 // tw.Close()
51 type Writer struct { 33 type Writer struct {
52 w io.Writer 34 w io.Writer
53 err error 35 err error
54 nb int64 // number of unwritten bytes for current file entry 36 nb int64 // number of unwritten bytes for current file entry
55 pad int64 // amount of padding to write after current file entry 37 pad int64 // amount of padding to write after current file entry
56 closed bool 38 closed bool
57 usedBinary bool // whether the binary numeric field extension was used 39 usedBinary bool // whether the binary numeric field extension was used
58 } 40 }
59 41
60 // NewWriter creates a new Writer writing to w. 42 // NewWriter creates a new Writer writing to w.
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 } 98 }
117 // Too big: use binary (big-endian). 99 // Too big: use binary (big-endian).
118 tw.usedBinary = true 100 tw.usedBinary = true
119 for i := len(b) - 1; x > 0 && i >= 0; i-- { 101 for i := len(b) - 1; x > 0 && i >= 0; i-- {
120 b[i] = byte(x) 102 b[i] = byte(x)
121 x >>= 8 103 x >>= 8
122 } 104 }
123 b[0] |= 0x80 // highest bit indicates binary format 105 b[0] |= 0x80 // highest bit indicates binary format
124 } 106 }
125 107
108 var (
109 minTime = time.Unix(0, 0)
110 // There is room for 11 octal digits (33 bits) of mtime.
111 maxTime = minTime.Add((1<<33 - 1) * time.Second)
112 )
113
126 // WriteHeader writes hdr and prepares to accept the file's contents. 114 // WriteHeader writes hdr and prepares to accept the file's contents.
127 // WriteHeader calls Flush if it is not the first header. 115 // WriteHeader calls Flush if it is not the first header.
128 // Calling after a Close will return ErrWriteAfterClose. 116 // Calling after a Close will return ErrWriteAfterClose.
129 func (tw *Writer) WriteHeader(hdr *Header) error { 117 func (tw *Writer) WriteHeader(hdr *Header) error {
130 if tw.closed { 118 if tw.closed {
131 return ErrWriteAfterClose 119 return ErrWriteAfterClose
132 } 120 }
133 if tw.err == nil { 121 if tw.err == nil {
134 tw.Flush() 122 tw.Flush()
135 } 123 }
136 if tw.err != nil { 124 if tw.err != nil {
137 return tw.err 125 return tw.err
138 } 126 }
139 // Decide whether or not to use PAX extensions 127 // Decide whether or not to use PAX extensions
140 // TODO(shanemhansen): we might want to use PAX headers for 128 // TODO(shanemhansen): we might want to use PAX headers for
141 // subsecond time resolution, but for now let's just capture 129 // subsecond time resolution, but for now let's just capture
142 // the long name/long symlink use case. 130 // the long name/long symlink use case.
143 » use_pax_header := len(hdr.Name) > 100 || len(hdr.Linkname) > 100 131 » suffix := hdr.Name
dsymonds 2012/10/31 02:44:37 these are not Go names. Drop your underscores and
shanemhansen 2012/10/31 04:26:31 Done.
144 » if use_pax_header { 132 » prefix := ""
dsymonds 2012/10/31 02:44:37 I think the contents of this if could be its own m
shanemhansen 2012/10/31 04:26:31 Done.
145 » » // Slip an extra header in there. 133 » if len(hdr.Name) > fileNameSize || len(hdr.Linkname) > fileNameSize {
146 » » extra_header := new(Header) 134 » » var err error
147 » » extra_header.Typeflag = TypeXHeader 135 » » prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
148 » » // Setting ModTime is required for reader parsing to 136 » » // Either we were unable to pack the long name into ustar format
149 » » // succeed, and seems harmless enough. 137 » » // or the link name is too long; use PAX headers.
150 » » extra_header.ModTime = hdr.ModTime 138 » » if err == errNameTooLong || len(hdr.Linkname) > fileNameSize {
151 » » // The spec asks that we namespace our psuedo files 139 » » » if err := tw.writePAXHeader(hdr); err != nil {
152 » » // with the current pid. 140 » » » » return err
153 » » pid := cachedPid() 141 » » » }
154 » » dir, file := filepath.Split(hdr.Name) 142 » » } else if err != nil {
155 » » extra_header.Name = filepath.Join(dir,
156 » » » fmt.Sprintf("PaxHeaders.%d", pid), file)[0:100]
157 » » // Construct the body
158 » » var buf bytes.Buffer
159 » » if len(hdr.Name) > 100 {
160 » » » msg := fmt.Sprintf(" path=%s\n", hdr.Name)
161 » » » size := len(msg) + 1
162 » » » length := string(size)
163 » » » size += len(length)
164 » » » fmt.Fprintf(&buf, "%d%s", size, msg)
165 » » }
166 » » if len(hdr.Linkname) > 100 {
167 » » » msg := fmt.Sprintf(" linkpath=%s\n", hdr.Name)
168 » » » size := len(msg) + 1
169 » » » length := string(size)
170 » » » size += len(length)
171 » » » fmt.Fprintf(&buf, "%d%s", size, msg)
172 » » }
173 » » extra_header.Size = int64(len(buf.Bytes()))
174 » » err := tw.WriteHeader(extra_header)
175 » » if err != nil {
176 return err 143 return err
177 } 144 }
178 _, err = tw.Write(buf.Bytes())
179 if err != nil {
180 return err
181 }
182 err = tw.Flush()
183 if err != nil {
184 return err
185 }
186 } 145 }
187 tw.nb = int64(hdr.Size) 146 tw.nb = int64(hdr.Size)
188 tw.pad = -tw.nb & (blockSize - 1) // blockSize is a power of two 147 tw.pad = -tw.nb & (blockSize - 1) // blockSize is a power of two
189 148
190 header := make([]byte, blockSize) 149 header := make([]byte, blockSize)
191 s := slicer(header) 150 s := slicer(header)
192 » copy(s.next(100), []byte(hdr.Name)) 151 » tw.cString(s.next(fileNameSize), suffix)
193 152
194 » tw.octal(s.next(8), hdr.Mode) // 100:108 153 » // Handle out of range ModTime carefully.
195 » tw.numeric(s.next(8), int64(hdr.Uid)) // 108:116 154 » var modTime int64
196 » tw.numeric(s.next(8), int64(hdr.Gid)) // 116:124 155 » if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
197 » tw.numeric(s.next(12), hdr.Size) // 124:136 156 » » modTime = hdr.ModTime.Unix()
198 » tw.numeric(s.next(12), hdr.ModTime.Unix()) // 136:148 157 » }
199 » s.next(8) // chksum (148:156) 158
200 » s.next(1)[0] = hdr.Typeflag // 156:157 159 » tw.octal(s.next(8), hdr.Mode) // 100:108
201 » tw.cString(s.next(100), hdr.Linkname) // linkname (157:257) 160 » tw.numeric(s.next(8), int64(hdr.Uid)) // 108:116
202 » copy(s.next(8), []byte("ustar\x0000")) // 257:265 161 » tw.numeric(s.next(8), int64(hdr.Gid)) // 116:124
203 » tw.cString(s.next(32), hdr.Uname) // 265:297 162 » tw.numeric(s.next(12), hdr.Size) // 124:136
204 » tw.cString(s.next(32), hdr.Gname) // 297:329 163 » tw.numeric(s.next(12), modTime) // 136:148
205 » tw.numeric(s.next(8), hdr.Devmajor) // 329:337 164 » s.next(8) // chksum (148:156)
206 » tw.numeric(s.next(8), hdr.Devminor) // 337:345 165 » s.next(1)[0] = hdr.Typeflag // 156:157
207 166 » tw.cString(s.next(100), hdr.Linkname) // linkname (157:257)
167 » copy(s.next(8), []byte("ustar\x0000")) // 257:265
168 » tw.cString(s.next(32), hdr.Uname) // 265:297
169 » tw.cString(s.next(32), hdr.Gname) // 297:329
170 » tw.numeric(s.next(8), hdr.Devmajor) // 329:337
171 » tw.numeric(s.next(8), hdr.Devminor) // 337:345
172 » tw.cString(s.next(155), prefix) // 345:500
208 // Use the GNU magic instead of POSIX magic if we used any GNU extension s. 173 // Use the GNU magic instead of POSIX magic if we used any GNU extension s.
209 if tw.usedBinary { 174 if tw.usedBinary {
210 copy(header[257:265], []byte("ustar \x00")) 175 copy(header[257:265], []byte("ustar \x00"))
176 }
177 // Use the ustar magic if we used ustar long names.
178 if len(prefix) > 0 {
179 copy(header[257:265], []byte("ustar\000"))
211 } 180 }
212 181
213 // The chksum field is terminated by a NUL and a space. 182 // The chksum field is terminated by a NUL and a space.
214 // This is different from the other octal fields. 183 // This is different from the other octal fields.
215 chksum, _ := checksum(header) 184 chksum, _ := checksum(header)
216 tw.octal(header[148:155], chksum) 185 tw.octal(header[148:155], chksum)
217 header[155] = ' ' 186 header[155] = ' '
218 187
219 if tw.err != nil { 188 if tw.err != nil {
220 // problem with header; probably integer too big for a field. 189 // problem with header; probably integer too big for a field.
221 return tw.err 190 return tw.err
222 } 191 }
223 192
224 _, tw.err = tw.w.Write(header) 193 _, tw.err = tw.w.Write(header)
225 194
226 return tw.err 195 return tw.err
196 }
197
198 // writeUSTARLongName splits a USTAR long name hdr.Name.
199 // name must be < 256 characters. errNameTooLong is returned
200 // if hdr.Name can't be split. The splitting heuristic
201 // is compatible with gnu tar.
202 func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err er ror) {
203 length := len(name)
204 if length > fileNamePrefixSize+1 {
205 length = fileNamePrefixSize + 1
206 } else if name[length-1] == '/' {
207 length--
208 }
209 i := strings.LastIndex(name[:length], "/")
210 nlen := length - i - 1
211 if i <= 0 || nlen > fileNameSize || nlen == 0 {
212 err = errNameTooLong
213 return
214 }
215 prefix, suffix = name[:i], name[i+1:]
216 return
217 }
218
219 // writePaxHeader writes an extended pax header to the
220 // archive.
221 func (tw *Writer) writePAXHeader(hdr *Header) error {
222 // Prepare extended header
223 ext := new(Header)
224 ext.Typeflag = TypeXHeader
225 // Setting ModTime is required for reader parsing to
226 // succeed, and seems harmless enough.
227 ext.ModTime = hdr.ModTime
228 // The spec asks that we namespace our pseudo files
229 // with the current pid.
230 pid := os.Getpid()
231 dir, file := path.Split(hdr.Name)
232 ext.Name = path.Join(dir,
233 fmt.Sprintf("PaxHeaders.%d", pid), file)[0:100]
234 // Construct the body
235 var buf bytes.Buffer
236 if len(hdr.Name) > fileNameSize {
237 fmt.Fprint(&buf, paxHeader("path="+hdr.Name))
238 }
239 if len(hdr.Linkname) > fileNameSize {
240 fmt.Fprint(&buf, paxHeader("linkpath="+hdr.Linkname))
241 }
242 ext.Size = int64(len(buf.Bytes()))
243 if err := tw.WriteHeader(ext); err != nil {
244 return err
245 }
246 if _, err := tw.Write(buf.Bytes()); err != nil {
247 return err
248 }
249 if err := tw.Flush(); err != nil {
250 return err
251 }
252 return nil
253 }
254
255 // paxHeader formats a single pax record, prefixing it with the appropriate leng th
256 func paxHeader(msg string) string {
257 const padding = 2 // Extra padding for space and newline
258 size := len(msg) + padding
259 size += len(strconv.Itoa(size))
260 record := fmt.Sprintf("%d %s\n", size, msg)
261 if len(record) != size {
262 // Final adjustment if adding size increased
263 // the number of digits in size
264 size = len(record)
265 record = fmt.Sprintf("%d %s\n", size, msg)
266 }
267 return record
227 } 268 }
228 269
229 // Write writes to the current entry in the tar archive. 270 // Write writes to the current entry in the tar archive.
230 // Write returns the error ErrWriteTooLong if more than 271 // Write returns the error ErrWriteTooLong if more than
231 // hdr.Size bytes are written after WriteHeader. 272 // hdr.Size bytes are written after WriteHeader.
232 func (tw *Writer) Write(b []byte) (n int, err error) { 273 func (tw *Writer) Write(b []byte) (n int, err error) {
233 if tw.closed { 274 if tw.closed {
234 err = ErrWriteTooLong 275 err = ErrWriteTooLong
235 return 276 return
236 } 277 }
(...skipping 26 matching lines...) Expand all
263 304
264 // trailer: two zero blocks 305 // trailer: two zero blocks
265 for i := 0; i < 2; i++ { 306 for i := 0; i < 2; i++ {
266 _, tw.err = tw.w.Write(zeroBlock) 307 _, tw.err = tw.w.Write(zeroBlock)
267 if tw.err != nil { 308 if tw.err != nil {
268 break 309 break
269 } 310 }
270 } 311 }
271 return tw.err 312 return tw.err
272 } 313 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b