Source file src/archive/zip/struct.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package zip provides support for reading and writing ZIP archives.
     7  
     8  See the [ZIP specification] for details.
     9  
    10  This package does not support disk spanning.
    11  
    12  A note about ZIP64:
    13  
    14  To be backwards compatible the FileHeader has both 32 and 64 bit Size
    15  fields. The 64 bit fields will always contain the correct value and
    16  for normal archives both fields will be the same. For files requiring
    17  the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit
    18  fields must be used instead.
    19  
    20  [ZIP specification]: https://www.pkware.com/appnote
    21  */
    22  package zip
    23  
    24  import (
    25  	"io/fs"
    26  	"path"
    27  	"time"
    28  )
    29  
    30  // Compression methods.
    31  const (
    32  	Store   uint16 = 0 // no compression
    33  	Deflate uint16 = 8 // DEFLATE compressed
    34  )
    35  
    36  const (
    37  	fileHeaderSignature      = 0x04034b50
    38  	directoryHeaderSignature = 0x02014b50
    39  	directoryEndSignature    = 0x06054b50
    40  	directory64LocSignature  = 0x07064b50
    41  	directory64EndSignature  = 0x06064b50
    42  	dataDescriptorSignature  = 0x08074b50 // de-facto standard; required by OS X Finder
    43  	fileHeaderLen            = 30         // + filename + extra
    44  	directoryHeaderLen       = 46         // + filename + extra + comment
    45  	directoryEndLen          = 22         // + comment
    46  	dataDescriptorLen        = 16         // four uint32: descriptor signature, crc32, compressed size, size
    47  	dataDescriptor64Len      = 24         // two uint32: signature, crc32 | two uint64: compressed size, size
    48  	directory64LocLen        = 20         //
    49  	directory64EndLen        = 56         // + extra
    50  
    51  	// Constants for the first byte in CreatorVersion.
    52  	creatorFAT    = 0
    53  	creatorUnix   = 3
    54  	creatorNTFS   = 11
    55  	creatorVFAT   = 14
    56  	creatorMacOSX = 19
    57  
    58  	// Version numbers.
    59  	zipVersion20 = 20 // 2.0
    60  	zipVersion45 = 45 // 4.5 (reads and writes zip64 archives)
    61  
    62  	// Limits for non zip64 files.
    63  	uint16max = (1 << 16) - 1
    64  	uint32max = (1 << 32) - 1
    65  
    66  	// Extra header IDs.
    67  	//
    68  	// IDs 0..31 are reserved for official use by PKWARE.
    69  	// IDs above that range are defined by third-party vendors.
    70  	// Since ZIP lacked high precision timestamps (nor a official specification
    71  	// of the timezone used for the date fields), many competing extra fields
    72  	// have been invented. Pervasive use effectively makes them "official".
    73  	//
    74  	// See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField
    75  	zip64ExtraID       = 0x0001 // Zip64 extended information
    76  	ntfsExtraID        = 0x000a // NTFS
    77  	unixExtraID        = 0x000d // UNIX
    78  	extTimeExtraID     = 0x5455 // Extended timestamp
    79  	infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension
    80  )
    81  
    82  // FileHeader describes a file within a ZIP file.
    83  // See the [ZIP specification] for details.
    84  //
    85  // [ZIP specification]: https://www.pkware.com/appnote
    86  type FileHeader struct {
    87  	// Name is the name of the file.
    88  	//
    89  	// It must be a relative path, not start with a drive letter (such as "C:"),
    90  	// and must use forward slashes instead of back slashes. A trailing slash
    91  	// indicates that this file is a directory and should have no data.
    92  	Name string
    93  
    94  	// Comment is any arbitrary user-defined string shorter than 64KiB.
    95  	Comment string
    96  
    97  	// NonUTF8 indicates that Name and Comment are not encoded in UTF-8.
    98  	//
    99  	// By specification, the only other encoding permitted should be CP-437,
   100  	// but historically many ZIP readers interpret Name and Comment as whatever
   101  	// the system's local character encoding happens to be.
   102  	//
   103  	// This flag should only be set if the user intends to encode a non-portable
   104  	// ZIP file for a specific localized region. Otherwise, the Writer
   105  	// automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings.
   106  	NonUTF8 bool
   107  
   108  	CreatorVersion uint16
   109  	ReaderVersion  uint16
   110  	Flags          uint16
   111  
   112  	// Method is the compression method. If zero, Store is used.
   113  	Method uint16
   114  
   115  	// Modified is the modified time of the file.
   116  	//
   117  	// When reading, an extended timestamp is preferred over the legacy MS-DOS
   118  	// date field, and the offset between the times is used as the timezone.
   119  	// If only the MS-DOS date is present, the timezone is assumed to be UTC.
   120  	//
   121  	// When writing, an extended timestamp (which is timezone-agnostic) is
   122  	// always emitted. The legacy MS-DOS date field is encoded according to the
   123  	// location of the Modified time.
   124  	Modified time.Time
   125  
   126  	// ModifiedTime is an MS-DOS-encoded time.
   127  	//
   128  	// Deprecated: Use Modified instead.
   129  	ModifiedTime uint16
   130  
   131  	// ModifiedDate is an MS-DOS-encoded date.
   132  	//
   133  	// Deprecated: Use Modified instead.
   134  	ModifiedDate uint16
   135  
   136  	// CRC32 is the CRC32 checksum of the file content.
   137  	CRC32 uint32
   138  
   139  	// CompressedSize is the compressed size of the file in bytes.
   140  	// If either the uncompressed or compressed size of the file
   141  	// does not fit in 32 bits, CompressedSize is set to ^uint32(0).
   142  	//
   143  	// Deprecated: Use CompressedSize64 instead.
   144  	CompressedSize uint32
   145  
   146  	// UncompressedSize is the compressed size of the file in bytes.
   147  	// If either the uncompressed or compressed size of the file
   148  	// does not fit in 32 bits, CompressedSize is set to ^uint32(0).
   149  	//
   150  	// Deprecated: Use UncompressedSize64 instead.
   151  	UncompressedSize uint32
   152  
   153  	// CompressedSize64 is the compressed size of the file in bytes.
   154  	CompressedSize64 uint64
   155  
   156  	// UncompressedSize64 is the uncompressed size of the file in bytes.
   157  	UncompressedSize64 uint64
   158  
   159  	Extra         []byte
   160  	ExternalAttrs uint32 // Meaning depends on CreatorVersion
   161  }
   162  
   163  // FileInfo returns an fs.FileInfo for the FileHeader.
   164  func (h *FileHeader) FileInfo() fs.FileInfo {
   165  	return headerFileInfo{h}
   166  }
   167  
   168  // headerFileInfo implements fs.FileInfo.
   169  type headerFileInfo struct {
   170  	fh *FileHeader
   171  }
   172  
   173  func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) }
   174  func (fi headerFileInfo) Size() int64 {
   175  	if fi.fh.UncompressedSize64 > 0 {
   176  		return int64(fi.fh.UncompressedSize64)
   177  	}
   178  	return int64(fi.fh.UncompressedSize)
   179  }
   180  func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() }
   181  func (fi headerFileInfo) ModTime() time.Time {
   182  	if fi.fh.Modified.IsZero() {
   183  		return fi.fh.ModTime()
   184  	}
   185  	return fi.fh.Modified.UTC()
   186  }
   187  func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() }
   188  func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() }
   189  func (fi headerFileInfo) Sys() any          { return fi.fh }
   190  
   191  func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil }
   192  
   193  // FileInfoHeader creates a partially-populated FileHeader from an
   194  // fs.FileInfo.
   195  // Because fs.FileInfo's Name method returns only the base name of
   196  // the file it describes, it may be necessary to modify the Name field
   197  // of the returned header to provide the full path name of the file.
   198  // If compression is desired, callers should set the FileHeader.Method
   199  // field; it is unset by default.
   200  func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) {
   201  	size := fi.Size()
   202  	fh := &FileHeader{
   203  		Name:               fi.Name(),
   204  		UncompressedSize64: uint64(size),
   205  	}
   206  	fh.SetModTime(fi.ModTime())
   207  	fh.SetMode(fi.Mode())
   208  	if fh.UncompressedSize64 > uint32max {
   209  		fh.UncompressedSize = uint32max
   210  	} else {
   211  		fh.UncompressedSize = uint32(fh.UncompressedSize64)
   212  	}
   213  	return fh, nil
   214  }
   215  
   216  type directoryEnd struct {
   217  	diskNbr            uint32 // unused
   218  	dirDiskNbr         uint32 // unused
   219  	dirRecordsThisDisk uint64 // unused
   220  	directoryRecords   uint64
   221  	directorySize      uint64
   222  	directoryOffset    uint64 // relative to file
   223  	commentLen         uint16
   224  	comment            string
   225  }
   226  
   227  // timeZone returns a *time.Location based on the provided offset.
   228  // If the offset is non-sensible, then this uses an offset of zero.
   229  func timeZone(offset time.Duration) *time.Location {
   230  	const (
   231  		minOffset   = -12 * time.Hour  // E.g., Baker island at -12:00
   232  		maxOffset   = +14 * time.Hour  // E.g., Line island at +14:00
   233  		offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45
   234  	)
   235  	offset = offset.Round(offsetAlias)
   236  	if offset < minOffset || maxOffset < offset {
   237  		offset = 0
   238  	}
   239  	return time.FixedZone("", int(offset/time.Second))
   240  }
   241  
   242  // msDosTimeToTime converts an MS-DOS date and time into a time.Time.
   243  // The resolution is 2s.
   244  // See: https://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx
   245  func msDosTimeToTime(dosDate, dosTime uint16) time.Time {
   246  	return time.Date(
   247  		// date bits 0-4: day of month; 5-8: month; 9-15: years since 1980
   248  		int(dosDate>>9+1980),
   249  		time.Month(dosDate>>5&0xf),
   250  		int(dosDate&0x1f),
   251  
   252  		// time bits 0-4: second/2; 5-10: minute; 11-15: hour
   253  		int(dosTime>>11),
   254  		int(dosTime>>5&0x3f),
   255  		int(dosTime&0x1f*2),
   256  		0, // nanoseconds
   257  
   258  		time.UTC,
   259  	)
   260  }
   261  
   262  // timeToMsDosTime converts a time.Time to an MS-DOS date and time.
   263  // The resolution is 2s.
   264  // See: https://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx
   265  func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) {
   266  	fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9)
   267  	fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11)
   268  	return
   269  }
   270  
   271  // ModTime returns the modification time in UTC using the legacy
   272  // ModifiedDate and ModifiedTime fields.
   273  //
   274  // Deprecated: Use Modified instead.
   275  func (h *FileHeader) ModTime() time.Time {
   276  	return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime)
   277  }
   278  
   279  // SetModTime sets the Modified, ModifiedTime, and ModifiedDate fields
   280  // to the given time in UTC.
   281  //
   282  // Deprecated: Use Modified instead.
   283  func (h *FileHeader) SetModTime(t time.Time) {
   284  	t = t.UTC() // Convert to UTC for compatibility
   285  	h.Modified = t
   286  	h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t)
   287  }
   288  
   289  const (
   290  	// Unix constants. The specification doesn't mention them,
   291  	// but these seem to be the values agreed on by tools.
   292  	s_IFMT   = 0xf000
   293  	s_IFSOCK = 0xc000
   294  	s_IFLNK  = 0xa000
   295  	s_IFREG  = 0x8000
   296  	s_IFBLK  = 0x6000
   297  	s_IFDIR  = 0x4000
   298  	s_IFCHR  = 0x2000
   299  	s_IFIFO  = 0x1000
   300  	s_ISUID  = 0x800
   301  	s_ISGID  = 0x400
   302  	s_ISVTX  = 0x200
   303  
   304  	msdosDir      = 0x10
   305  	msdosReadOnly = 0x01
   306  )
   307  
   308  // Mode returns the permission and mode bits for the FileHeader.
   309  func (h *FileHeader) Mode() (mode fs.FileMode) {
   310  	switch h.CreatorVersion >> 8 {
   311  	case creatorUnix, creatorMacOSX:
   312  		mode = unixModeToFileMode(h.ExternalAttrs >> 16)
   313  	case creatorNTFS, creatorVFAT, creatorFAT:
   314  		mode = msdosModeToFileMode(h.ExternalAttrs)
   315  	}
   316  	if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' {
   317  		mode |= fs.ModeDir
   318  	}
   319  	return mode
   320  }
   321  
   322  // SetMode changes the permission and mode bits for the FileHeader.
   323  func (h *FileHeader) SetMode(mode fs.FileMode) {
   324  	h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8
   325  	h.ExternalAttrs = fileModeToUnixMode(mode) << 16
   326  
   327  	// set MSDOS attributes too, as the original zip does.
   328  	if mode&fs.ModeDir != 0 {
   329  		h.ExternalAttrs |= msdosDir
   330  	}
   331  	if mode&0200 == 0 {
   332  		h.ExternalAttrs |= msdosReadOnly
   333  	}
   334  }
   335  
   336  // isZip64 reports whether the file size exceeds the 32 bit limit
   337  func (h *FileHeader) isZip64() bool {
   338  	return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max
   339  }
   340  
   341  func (f *FileHeader) hasDataDescriptor() bool {
   342  	return f.Flags&0x8 != 0
   343  }
   344  
   345  func msdosModeToFileMode(m uint32) (mode fs.FileMode) {
   346  	if m&msdosDir != 0 {
   347  		mode = fs.ModeDir | 0777
   348  	} else {
   349  		mode = 0666
   350  	}
   351  	if m&msdosReadOnly != 0 {
   352  		mode &^= 0222
   353  	}
   354  	return mode
   355  }
   356  
   357  func fileModeToUnixMode(mode fs.FileMode) uint32 {
   358  	var m uint32
   359  	switch mode & fs.ModeType {
   360  	default:
   361  		m = s_IFREG
   362  	case fs.ModeDir:
   363  		m = s_IFDIR
   364  	case fs.ModeSymlink:
   365  		m = s_IFLNK
   366  	case fs.ModeNamedPipe:
   367  		m = s_IFIFO
   368  	case fs.ModeSocket:
   369  		m = s_IFSOCK
   370  	case fs.ModeDevice:
   371  		m = s_IFBLK
   372  	case fs.ModeDevice | fs.ModeCharDevice:
   373  		m = s_IFCHR
   374  	}
   375  	if mode&fs.ModeSetuid != 0 {
   376  		m |= s_ISUID
   377  	}
   378  	if mode&fs.ModeSetgid != 0 {
   379  		m |= s_ISGID
   380  	}
   381  	if mode&fs.ModeSticky != 0 {
   382  		m |= s_ISVTX
   383  	}
   384  	return m | uint32(mode&0777)
   385  }
   386  
   387  func unixModeToFileMode(m uint32) fs.FileMode {
   388  	mode := fs.FileMode(m & 0777)
   389  	switch m & s_IFMT {
   390  	case s_IFBLK:
   391  		mode |= fs.ModeDevice
   392  	case s_IFCHR:
   393  		mode |= fs.ModeDevice | fs.ModeCharDevice
   394  	case s_IFDIR:
   395  		mode |= fs.ModeDir
   396  	case s_IFIFO:
   397  		mode |= fs.ModeNamedPipe
   398  	case s_IFLNK:
   399  		mode |= fs.ModeSymlink
   400  	case s_IFREG:
   401  		// nothing to do
   402  	case s_IFSOCK:
   403  		mode |= fs.ModeSocket
   404  	}
   405  	if m&s_ISGID != 0 {
   406  		mode |= fs.ModeSetgid
   407  	}
   408  	if m&s_ISUID != 0 {
   409  		mode |= fs.ModeSetuid
   410  	}
   411  	if m&s_ISVTX != 0 {
   412  		mode |= fs.ModeSticky
   413  	}
   414  	return mode
   415  }
   416  

View as plain text