Source file src/compress/lzw/writer.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package lzw
     6  
     7  import (
     8  	"bufio"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  )
    13  
    14  // A writer is a buffered, flushable writer.
    15  type writer interface {
    16  	io.ByteWriter
    17  	Flush() error
    18  }
    19  
    20  const (
    21  	// A code is a 12 bit value, stored as a uint32 when encoding to avoid
    22  	// type conversions when shifting bits.
    23  	maxCode     = 1<<12 - 1
    24  	invalidCode = 1<<32 - 1
    25  	// There are 1<<12 possible codes, which is an upper bound on the number of
    26  	// valid hash table entries at any given point in time. tableSize is 4x that.
    27  	tableSize = 4 * 1 << 12
    28  	tableMask = tableSize - 1
    29  	// A hash table entry is a uint32. Zero is an invalid entry since the
    30  	// lower 12 bits of a valid entry must be a non-literal code.
    31  	invalidEntry = 0
    32  )
    33  
    34  // Writer is an LZW compressor. It writes the compressed form of the data
    35  // to an underlying writer (see [NewWriter]).
    36  type Writer struct {
    37  	// w is the writer that compressed bytes are written to.
    38  	w writer
    39  	// litWidth is the width in bits of literal codes.
    40  	litWidth uint
    41  	// order, write, bits, nBits and width are the state for
    42  	// converting a code stream into a byte stream.
    43  	order Order
    44  	write func(*Writer, uint32) error
    45  	nBits uint
    46  	width uint
    47  	bits  uint32
    48  	// hi is the code implied by the next code emission.
    49  	// overflow is the code at which hi overflows the code width.
    50  	hi, overflow uint32
    51  	// savedCode is the accumulated code at the end of the most recent Write
    52  	// call. It is equal to invalidCode if there was no such call.
    53  	savedCode uint32
    54  	// err is the first error encountered during writing. Closing the writer
    55  	// will make any future Write calls return errClosed
    56  	err error
    57  	// table is the hash table from 20-bit keys to 12-bit values. Each table
    58  	// entry contains key<<12|val and collisions resolve by linear probing.
    59  	// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
    60  	// The values are a 12-bit code.
    61  	table [tableSize]uint32
    62  }
    63  
    64  // writeLSB writes the code c for "Least Significant Bits first" data.
    65  func (w *Writer) writeLSB(c uint32) error {
    66  	w.bits |= c << w.nBits
    67  	w.nBits += w.width
    68  	for w.nBits >= 8 {
    69  		if err := w.w.WriteByte(uint8(w.bits)); err != nil {
    70  			return err
    71  		}
    72  		w.bits >>= 8
    73  		w.nBits -= 8
    74  	}
    75  	return nil
    76  }
    77  
    78  // writeMSB writes the code c for "Most Significant Bits first" data.
    79  func (w *Writer) writeMSB(c uint32) error {
    80  	w.bits |= c << (32 - w.width - w.nBits)
    81  	w.nBits += w.width
    82  	for w.nBits >= 8 {
    83  		if err := w.w.WriteByte(uint8(w.bits >> 24)); err != nil {
    84  			return err
    85  		}
    86  		w.bits <<= 8
    87  		w.nBits -= 8
    88  	}
    89  	return nil
    90  }
    91  
    92  // errOutOfCodes is an internal error that means that the writer has run out
    93  // of unused codes and a clear code needs to be sent next.
    94  var errOutOfCodes = errors.New("lzw: out of codes")
    95  
    96  // incHi increments e.hi and checks for both overflow and running out of
    97  // unused codes. In the latter case, incHi sends a clear code, resets the
    98  // writer state and returns errOutOfCodes.
    99  func (w *Writer) incHi() error {
   100  	w.hi++
   101  	if w.hi == w.overflow {
   102  		w.width++
   103  		w.overflow <<= 1
   104  	}
   105  	if w.hi == maxCode {
   106  		clear := uint32(1) << w.litWidth
   107  		if err := w.write(w, clear); err != nil {
   108  			return err
   109  		}
   110  		w.width = w.litWidth + 1
   111  		w.hi = clear + 1
   112  		w.overflow = clear << 1
   113  		for i := range w.table {
   114  			w.table[i] = invalidEntry
   115  		}
   116  		return errOutOfCodes
   117  	}
   118  	return nil
   119  }
   120  
   121  // Write writes a compressed representation of p to w's underlying writer.
   122  func (w *Writer) Write(p []byte) (n int, err error) {
   123  	if w.err != nil {
   124  		return 0, w.err
   125  	}
   126  	if len(p) == 0 {
   127  		return 0, nil
   128  	}
   129  	if maxLit := uint8(1<<w.litWidth - 1); maxLit != 0xff {
   130  		for _, x := range p {
   131  			if x > maxLit {
   132  				w.err = errors.New("lzw: input byte too large for the litWidth")
   133  				return 0, w.err
   134  			}
   135  		}
   136  	}
   137  	n = len(p)
   138  	code := w.savedCode
   139  	if code == invalidCode {
   140  		// This is the first write; send a clear code.
   141  		// https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F
   142  		// "Variable-Length-Code LZW Compression" says that "Encoders should
   143  		// output a Clear code as the first code of each image data stream".
   144  		//
   145  		// LZW compression isn't only used by GIF, but it's cheap to follow
   146  		// that directive unconditionally.
   147  		clear := uint32(1) << w.litWidth
   148  		if err := w.write(w, clear); err != nil {
   149  			return 0, err
   150  		}
   151  		// After the starting clear code, the next code sent (for non-empty
   152  		// input) is always a literal code.
   153  		code, p = uint32(p[0]), p[1:]
   154  	}
   155  loop:
   156  	for _, x := range p {
   157  		literal := uint32(x)
   158  		key := code<<8 | literal
   159  		// If there is a hash table hit for this key then we continue the loop
   160  		// and do not emit a code yet.
   161  		hash := (key>>12 ^ key) & tableMask
   162  		for h, t := hash, w.table[hash]; t != invalidEntry; {
   163  			if key == t>>12 {
   164  				code = t & maxCode
   165  				continue loop
   166  			}
   167  			h = (h + 1) & tableMask
   168  			t = w.table[h]
   169  		}
   170  		// Otherwise, write the current code, and literal becomes the start of
   171  		// the next emitted code.
   172  		if w.err = w.write(w, code); w.err != nil {
   173  			return 0, w.err
   174  		}
   175  		code = literal
   176  		// Increment e.hi, the next implied code. If we run out of codes, reset
   177  		// the writer state (including clearing the hash table) and continue.
   178  		if err1 := w.incHi(); err1 != nil {
   179  			if err1 == errOutOfCodes {
   180  				continue
   181  			}
   182  			w.err = err1
   183  			return 0, w.err
   184  		}
   185  		// Otherwise, insert key -> e.hi into the map that e.table represents.
   186  		for {
   187  			if w.table[hash] == invalidEntry {
   188  				w.table[hash] = (key << 12) | w.hi
   189  				break
   190  			}
   191  			hash = (hash + 1) & tableMask
   192  		}
   193  	}
   194  	w.savedCode = code
   195  	return n, nil
   196  }
   197  
   198  // Close closes the [Writer], flushing any pending output. It does not close
   199  // w's underlying writer.
   200  func (w *Writer) Close() error {
   201  	if w.err != nil {
   202  		if w.err == errClosed {
   203  			return nil
   204  		}
   205  		return w.err
   206  	}
   207  	// Make any future calls to Write return errClosed.
   208  	w.err = errClosed
   209  	// Write the savedCode if valid.
   210  	if w.savedCode != invalidCode {
   211  		if err := w.write(w, w.savedCode); err != nil {
   212  			return err
   213  		}
   214  		if err := w.incHi(); err != nil && err != errOutOfCodes {
   215  			return err
   216  		}
   217  	} else {
   218  		// Write the starting clear code, as w.Write did not.
   219  		clear := uint32(1) << w.litWidth
   220  		if err := w.write(w, clear); err != nil {
   221  			return err
   222  		}
   223  	}
   224  	// Write the eof code.
   225  	eof := uint32(1)<<w.litWidth + 1
   226  	if err := w.write(w, eof); err != nil {
   227  		return err
   228  	}
   229  	// Write the final bits.
   230  	if w.nBits > 0 {
   231  		if w.order == MSB {
   232  			w.bits >>= 24
   233  		}
   234  		if err := w.w.WriteByte(uint8(w.bits)); err != nil {
   235  			return err
   236  		}
   237  	}
   238  	return w.w.Flush()
   239  }
   240  
   241  // Reset clears the [Writer]'s state and allows it to be reused again
   242  // as a new [Writer].
   243  func (w *Writer) Reset(dst io.Writer, order Order, litWidth int) {
   244  	*w = Writer{}
   245  	w.init(dst, order, litWidth)
   246  }
   247  
   248  // NewWriter creates a new [io.WriteCloser].
   249  // Writes to the returned [io.WriteCloser] are compressed and written to w.
   250  // It is the caller's responsibility to call Close on the WriteCloser when
   251  // finished writing.
   252  // The number of bits to use for literal codes, litWidth, must be in the
   253  // range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth.
   254  //
   255  // It is guaranteed that the underlying type of the returned [io.WriteCloser]
   256  // is a *[Writer].
   257  func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
   258  	return newWriter(w, order, litWidth)
   259  }
   260  
   261  func newWriter(dst io.Writer, order Order, litWidth int) *Writer {
   262  	w := new(Writer)
   263  	w.init(dst, order, litWidth)
   264  	return w
   265  }
   266  
   267  func (w *Writer) init(dst io.Writer, order Order, litWidth int) {
   268  	switch order {
   269  	case LSB:
   270  		w.write = (*Writer).writeLSB
   271  	case MSB:
   272  		w.write = (*Writer).writeMSB
   273  	default:
   274  		w.err = errors.New("lzw: unknown order")
   275  		return
   276  	}
   277  	if litWidth < 2 || 8 < litWidth {
   278  		w.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
   279  		return
   280  	}
   281  	bw, ok := dst.(writer)
   282  	if !ok && dst != nil {
   283  		bw = bufio.NewWriter(dst)
   284  	}
   285  	w.w = bw
   286  	lw := uint(litWidth)
   287  	w.order = order
   288  	w.width = 1 + lw
   289  	w.litWidth = lw
   290  	w.hi = 1<<lw + 1
   291  	w.overflow = 1 << (lw + 1)
   292  	w.savedCode = invalidCode
   293  }
   294  

View as plain text