forked from gitea/gitea
278 lines
6.5 KiB
Go
278 lines
6.5 KiB
Go
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package lzma
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
)
|
|
|
|
// decoder decodes a raw LZMA stream without any header.
|
|
type decoder struct {
|
|
// dictionary; the rear pointer of the buffer will be used for
|
|
// reading the data.
|
|
Dict *decoderDict
|
|
// decoder state
|
|
State *state
|
|
// range decoder
|
|
rd *rangeDecoder
|
|
// start stores the head value of the dictionary for the LZMA
|
|
// stream
|
|
start int64
|
|
// size of uncompressed data
|
|
size int64
|
|
// end-of-stream encountered
|
|
eos bool
|
|
// EOS marker found
|
|
eosMarker bool
|
|
}
|
|
|
|
// newDecoder creates a new decoder instance. The parameter size provides
|
|
// the expected byte size of the decompressed data. If the size is
|
|
// unknown use a negative value. In that case the decoder will look for
|
|
// a terminating end-of-stream marker.
|
|
func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) {
|
|
rd, err := newRangeDecoder(br)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d = &decoder{
|
|
State: state,
|
|
Dict: dict,
|
|
rd: rd,
|
|
size: size,
|
|
start: dict.pos(),
|
|
}
|
|
return d, nil
|
|
}
|
|
|
|
// Reopen restarts the decoder with a new byte reader and a new size. Reopen
|
|
// resets the Decompressed counter to zero.
|
|
func (d *decoder) Reopen(br io.ByteReader, size int64) error {
|
|
var err error
|
|
if d.rd, err = newRangeDecoder(br); err != nil {
|
|
return err
|
|
}
|
|
d.start = d.Dict.pos()
|
|
d.size = size
|
|
d.eos = false
|
|
return nil
|
|
}
|
|
|
|
// decodeLiteral decodes a single literal from the LZMA stream.
|
|
func (d *decoder) decodeLiteral() (op operation, err error) {
|
|
litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head)
|
|
match := d.Dict.byteAt(int(d.State.rep[0]) + 1)
|
|
s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return lit{s}, nil
|
|
}
|
|
|
|
// errEOS indicates that an EOS marker has been found.
|
|
var errEOS = errors.New("EOS marker found")
|
|
|
|
// readOp decodes the next operation from the compressed stream. It
|
|
// returns the operation. If an explicit end of stream marker is
|
|
// identified the eos error is returned.
|
|
func (d *decoder) readOp() (op operation, err error) {
|
|
// Value of the end of stream (EOS) marker
|
|
const eosDist = 1<<32 - 1
|
|
|
|
state, state2, posState := d.State.states(d.Dict.head)
|
|
|
|
b, err := d.State.isMatch[state2].Decode(d.rd)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if b == 0 {
|
|
// literal
|
|
op, err := d.decodeLiteral()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d.State.updateStateLiteral()
|
|
return op, nil
|
|
}
|
|
b, err = d.State.isRep[state].Decode(d.rd)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if b == 0 {
|
|
// simple match
|
|
d.State.rep[3], d.State.rep[2], d.State.rep[1] =
|
|
d.State.rep[2], d.State.rep[1], d.State.rep[0]
|
|
|
|
d.State.updateStateMatch()
|
|
// The length decoder returns the length offset.
|
|
n, err := d.State.lenCodec.Decode(d.rd, posState)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// The dist decoder returns the distance offset. The actual
|
|
// distance is 1 higher.
|
|
d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if d.State.rep[0] == eosDist {
|
|
d.eosMarker = true
|
|
return nil, errEOS
|
|
}
|
|
op = match{n: int(n) + minMatchLen,
|
|
distance: int64(d.State.rep[0]) + minDistance}
|
|
return op, nil
|
|
}
|
|
b, err = d.State.isRepG0[state].Decode(d.rd)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
dist := d.State.rep[0]
|
|
if b == 0 {
|
|
// rep match 0
|
|
b, err = d.State.isRepG0Long[state2].Decode(d.rd)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if b == 0 {
|
|
d.State.updateStateShortRep()
|
|
op = match{n: 1, distance: int64(dist) + minDistance}
|
|
return op, nil
|
|
}
|
|
} else {
|
|
b, err = d.State.isRepG1[state].Decode(d.rd)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if b == 0 {
|
|
dist = d.State.rep[1]
|
|
} else {
|
|
b, err = d.State.isRepG2[state].Decode(d.rd)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if b == 0 {
|
|
dist = d.State.rep[2]
|
|
} else {
|
|
dist = d.State.rep[3]
|
|
d.State.rep[3] = d.State.rep[2]
|
|
}
|
|
d.State.rep[2] = d.State.rep[1]
|
|
}
|
|
d.State.rep[1] = d.State.rep[0]
|
|
d.State.rep[0] = dist
|
|
}
|
|
n, err := d.State.repLenCodec.Decode(d.rd, posState)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d.State.updateStateRep()
|
|
op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance}
|
|
return op, nil
|
|
}
|
|
|
|
// apply takes the operation and transforms the decoder dictionary accordingly.
|
|
func (d *decoder) apply(op operation) error {
|
|
var err error
|
|
switch x := op.(type) {
|
|
case match:
|
|
err = d.Dict.writeMatch(x.distance, x.n)
|
|
case lit:
|
|
err = d.Dict.WriteByte(x.b)
|
|
default:
|
|
panic("op is neither a match nor a literal")
|
|
}
|
|
return err
|
|
}
|
|
|
|
// decompress fills the dictionary unless no space for new data is
|
|
// available. If the end of the LZMA stream has been reached io.EOF will
|
|
// be returned.
|
|
func (d *decoder) decompress() error {
|
|
if d.eos {
|
|
return io.EOF
|
|
}
|
|
for d.Dict.Available() >= maxMatchLen {
|
|
op, err := d.readOp()
|
|
switch err {
|
|
case nil:
|
|
break
|
|
case errEOS:
|
|
d.eos = true
|
|
if !d.rd.possiblyAtEnd() {
|
|
return errDataAfterEOS
|
|
}
|
|
if d.size >= 0 && d.size != d.Decompressed() {
|
|
return errSize
|
|
}
|
|
return io.EOF
|
|
case io.EOF:
|
|
d.eos = true
|
|
return io.ErrUnexpectedEOF
|
|
default:
|
|
return err
|
|
}
|
|
if err = d.apply(op); err != nil {
|
|
return err
|
|
}
|
|
if d.size >= 0 && d.Decompressed() >= d.size {
|
|
d.eos = true
|
|
if d.Decompressed() > d.size {
|
|
return errSize
|
|
}
|
|
if !d.rd.possiblyAtEnd() {
|
|
switch _, err = d.readOp(); err {
|
|
case nil:
|
|
return errSize
|
|
case io.EOF:
|
|
return io.ErrUnexpectedEOF
|
|
case errEOS:
|
|
break
|
|
default:
|
|
return err
|
|
}
|
|
}
|
|
return io.EOF
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Errors that may be returned while decoding data.
|
|
var (
|
|
errDataAfterEOS = errors.New("lzma: data after end of stream marker")
|
|
errSize = errors.New("lzma: wrong uncompressed data size")
|
|
)
|
|
|
|
// Read reads data from the buffer. If no more data is available io.EOF is
|
|
// returned.
|
|
func (d *decoder) Read(p []byte) (n int, err error) {
|
|
var k int
|
|
for {
|
|
// Read of decoder dict never returns an error.
|
|
k, err = d.Dict.Read(p[n:])
|
|
if err != nil {
|
|
panic(fmt.Errorf("dictionary read error %s", err))
|
|
}
|
|
if k == 0 && d.eos {
|
|
return n, io.EOF
|
|
}
|
|
n += k
|
|
if n >= len(p) {
|
|
return n, nil
|
|
}
|
|
if err = d.decompress(); err != nil && err != io.EOF {
|
|
return n, err
|
|
}
|
|
}
|
|
}
|
|
|
|
// Decompressed returns the number of bytes decompressed by the decoder.
|
|
func (d *decoder) Decompressed() int64 {
|
|
return d.Dict.pos() - d.start
|
|
}
|