forked from gitea/gitea
336 lines
9.1 KiB
Go
336 lines
9.1 KiB
Go
// Copyright 2015, Joe Tsai. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE.md file.
|
|
|
|
package prefix
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"encoding/binary"
|
|
"io"
|
|
"strings"
|
|
|
|
"github.com/dsnet/compress"
|
|
"github.com/dsnet/compress/internal"
|
|
"github.com/dsnet/compress/internal/errors"
|
|
)
|
|
|
|
// Reader implements a prefix decoder. If the input io.Reader satisfies the
|
|
// compress.ByteReader or compress.BufferedReader interface, then it also
|
|
// guarantees that it will never read more bytes than is necessary.
|
|
//
|
|
// For high performance, provide an io.Reader that satisfies the
|
|
// compress.BufferedReader interface. If the input does not satisfy either
|
|
// compress.ByteReader or compress.BufferedReader, then it will be internally
|
|
// wrapped with a bufio.Reader.
|
|
type Reader struct {
|
|
Offset int64 // Number of bytes read from the underlying io.Reader
|
|
|
|
rd io.Reader
|
|
byteRd compress.ByteReader // Set if rd is a ByteReader
|
|
bufRd compress.BufferedReader // Set if rd is a BufferedReader
|
|
|
|
bufBits uint64 // Buffer to hold some bits
|
|
numBits uint // Number of valid bits in bufBits
|
|
bigEndian bool // Do we treat input bytes as big endian?
|
|
|
|
// These fields are only used if rd is a compress.BufferedReader.
|
|
bufPeek []byte // Buffer for the Peek data
|
|
discardBits int // Number of bits to discard from reader
|
|
fedBits uint // Number of bits fed in last call to PullBits
|
|
|
|
// These fields are used to reduce allocations.
|
|
bb *buffer
|
|
br *bytesReader
|
|
sr *stringReader
|
|
bu *bufio.Reader
|
|
}
|
|
|
|
// Init initializes the bit Reader to read from r. If bigEndian is true, then
|
|
// bits will be read starting from the most-significant bits of a byte
|
|
// (as done in bzip2), otherwise it will read starting from the
|
|
// least-significant bits of a byte (such as for deflate and brotli).
|
|
func (pr *Reader) Init(r io.Reader, bigEndian bool) {
|
|
*pr = Reader{
|
|
rd: r,
|
|
bigEndian: bigEndian,
|
|
|
|
bb: pr.bb,
|
|
br: pr.br,
|
|
sr: pr.sr,
|
|
bu: pr.bu,
|
|
}
|
|
switch rr := r.(type) {
|
|
case *bytes.Buffer:
|
|
if pr.bb == nil {
|
|
pr.bb = new(buffer)
|
|
}
|
|
*pr.bb = buffer{Buffer: rr}
|
|
pr.bufRd = pr.bb
|
|
case *bytes.Reader:
|
|
if pr.br == nil {
|
|
pr.br = new(bytesReader)
|
|
}
|
|
*pr.br = bytesReader{Reader: rr}
|
|
pr.bufRd = pr.br
|
|
case *strings.Reader:
|
|
if pr.sr == nil {
|
|
pr.sr = new(stringReader)
|
|
}
|
|
*pr.sr = stringReader{Reader: rr}
|
|
pr.bufRd = pr.sr
|
|
case compress.BufferedReader:
|
|
pr.bufRd = rr
|
|
case compress.ByteReader:
|
|
pr.byteRd = rr
|
|
default:
|
|
if pr.bu == nil {
|
|
pr.bu = bufio.NewReader(nil)
|
|
}
|
|
pr.bu.Reset(r)
|
|
pr.rd, pr.bufRd = pr.bu, pr.bu
|
|
}
|
|
}
|
|
|
|
// BitsRead reports the total number of bits emitted from any Read method.
|
|
func (pr *Reader) BitsRead() int64 {
|
|
offset := 8*pr.Offset - int64(pr.numBits)
|
|
if pr.bufRd != nil {
|
|
discardBits := pr.discardBits + int(pr.fedBits-pr.numBits)
|
|
offset = 8*pr.Offset + int64(discardBits)
|
|
}
|
|
return offset
|
|
}
|
|
|
|
// IsBufferedReader reports whether the underlying io.Reader is also a
|
|
// compress.BufferedReader.
|
|
func (pr *Reader) IsBufferedReader() bool {
|
|
return pr.bufRd != nil
|
|
}
|
|
|
|
// ReadPads reads 0-7 bits from the bit buffer to achieve byte-alignment.
|
|
func (pr *Reader) ReadPads() uint {
|
|
nb := pr.numBits % 8
|
|
val := uint(pr.bufBits & uint64(1<<nb-1))
|
|
pr.bufBits >>= nb
|
|
pr.numBits -= nb
|
|
return val
|
|
}
|
|
|
|
// Read reads bytes into buf.
|
|
// The bit-ordering mode does not affect this method.
|
|
func (pr *Reader) Read(buf []byte) (cnt int, err error) {
|
|
if pr.numBits > 0 {
|
|
if pr.numBits%8 != 0 {
|
|
return 0, errorf(errors.Invalid, "non-aligned bit buffer")
|
|
}
|
|
for cnt = 0; len(buf) > cnt && pr.numBits > 0; cnt++ {
|
|
if pr.bigEndian {
|
|
buf[cnt] = internal.ReverseLUT[byte(pr.bufBits)]
|
|
} else {
|
|
buf[cnt] = byte(pr.bufBits)
|
|
}
|
|
pr.bufBits >>= 8
|
|
pr.numBits -= 8
|
|
}
|
|
return cnt, nil
|
|
}
|
|
if _, err := pr.Flush(); err != nil {
|
|
return 0, err
|
|
}
|
|
cnt, err = pr.rd.Read(buf)
|
|
pr.Offset += int64(cnt)
|
|
return cnt, err
|
|
}
|
|
|
|
// ReadOffset reads an offset value using the provided RangeCodes indexed by
|
|
// the symbol read.
|
|
func (pr *Reader) ReadOffset(pd *Decoder, rcs RangeCodes) uint {
|
|
rc := rcs[pr.ReadSymbol(pd)]
|
|
return uint(rc.Base) + pr.ReadBits(uint(rc.Len))
|
|
}
|
|
|
|
// TryReadBits attempts to read nb bits using the contents of the bit buffer
|
|
// alone. It returns the value and whether it succeeded.
|
|
//
|
|
// This method is designed to be inlined for performance reasons.
|
|
func (pr *Reader) TryReadBits(nb uint) (uint, bool) {
|
|
if pr.numBits < nb {
|
|
return 0, false
|
|
}
|
|
val := uint(pr.bufBits & uint64(1<<nb-1))
|
|
pr.bufBits >>= nb
|
|
pr.numBits -= nb
|
|
return val, true
|
|
}
|
|
|
|
// ReadBits reads nb bits in from the underlying reader.
|
|
func (pr *Reader) ReadBits(nb uint) uint {
|
|
if err := pr.PullBits(nb); err != nil {
|
|
errors.Panic(err)
|
|
}
|
|
val := uint(pr.bufBits & uint64(1<<nb-1))
|
|
pr.bufBits >>= nb
|
|
pr.numBits -= nb
|
|
return val
|
|
}
|
|
|
|
// TryReadSymbol attempts to decode the next symbol using the contents of the
|
|
// bit buffer alone. It returns the decoded symbol and whether it succeeded.
|
|
//
|
|
// This method is designed to be inlined for performance reasons.
|
|
func (pr *Reader) TryReadSymbol(pd *Decoder) (uint, bool) {
|
|
if pr.numBits < uint(pd.MinBits) || len(pd.chunks) == 0 {
|
|
return 0, false
|
|
}
|
|
chunk := pd.chunks[uint32(pr.bufBits)&pd.chunkMask]
|
|
nb := uint(chunk & countMask)
|
|
if nb > pr.numBits || nb > uint(pd.chunkBits) {
|
|
return 0, false
|
|
}
|
|
pr.bufBits >>= nb
|
|
pr.numBits -= nb
|
|
return uint(chunk >> countBits), true
|
|
}
|
|
|
|
// ReadSymbol reads the next symbol using the provided prefix Decoder.
|
|
func (pr *Reader) ReadSymbol(pd *Decoder) uint {
|
|
if len(pd.chunks) == 0 {
|
|
panicf(errors.Invalid, "decode with empty prefix tree")
|
|
}
|
|
|
|
nb := uint(pd.MinBits)
|
|
for {
|
|
if err := pr.PullBits(nb); err != nil {
|
|
errors.Panic(err)
|
|
}
|
|
chunk := pd.chunks[uint32(pr.bufBits)&pd.chunkMask]
|
|
nb = uint(chunk & countMask)
|
|
if nb > uint(pd.chunkBits) {
|
|
linkIdx := chunk >> countBits
|
|
chunk = pd.links[linkIdx][uint32(pr.bufBits>>pd.chunkBits)&pd.linkMask]
|
|
nb = uint(chunk & countMask)
|
|
}
|
|
if nb <= pr.numBits {
|
|
pr.bufBits >>= nb
|
|
pr.numBits -= nb
|
|
return uint(chunk >> countBits)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Flush updates the read offset of the underlying ByteReader.
|
|
// If reader is a compress.BufferedReader, then this calls Discard to update
|
|
// the read offset.
|
|
func (pr *Reader) Flush() (int64, error) {
|
|
if pr.bufRd == nil {
|
|
return pr.Offset, nil
|
|
}
|
|
|
|
// Update the number of total bits to discard.
|
|
pr.discardBits += int(pr.fedBits - pr.numBits)
|
|
pr.fedBits = pr.numBits
|
|
|
|
// Discard some bytes to update read offset.
|
|
var err error
|
|
nd := (pr.discardBits + 7) / 8 // Round up to nearest byte
|
|
nd, err = pr.bufRd.Discard(nd)
|
|
pr.discardBits -= nd * 8 // -7..0
|
|
pr.Offset += int64(nd)
|
|
|
|
// These are invalid after Discard.
|
|
pr.bufPeek = nil
|
|
return pr.Offset, err
|
|
}
|
|
|
|
// PullBits ensures that at least nb bits exist in the bit buffer.
|
|
// If the underlying reader is a compress.BufferedReader, then this will fill
|
|
// the bit buffer with as many bits as possible, relying on Peek and Discard to
|
|
// properly advance the read offset. Otherwise, it will use ReadByte to fill the
|
|
// buffer with just the right number of bits.
|
|
func (pr *Reader) PullBits(nb uint) error {
|
|
if pr.bufRd != nil {
|
|
pr.discardBits += int(pr.fedBits - pr.numBits)
|
|
for {
|
|
if len(pr.bufPeek) == 0 {
|
|
pr.fedBits = pr.numBits // Don't discard bits just added
|
|
if _, err := pr.Flush(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Peek no more bytes than necessary.
|
|
// The computation for cntPeek computes the minimum number of
|
|
// bytes to Peek to fill nb bits.
|
|
var err error
|
|
cntPeek := int(nb+(-nb&7)) / 8
|
|
if cntPeek < pr.bufRd.Buffered() {
|
|
cntPeek = pr.bufRd.Buffered()
|
|
}
|
|
pr.bufPeek, err = pr.bufRd.Peek(cntPeek)
|
|
pr.bufPeek = pr.bufPeek[int(pr.numBits/8):] // Skip buffered bits
|
|
if len(pr.bufPeek) == 0 {
|
|
if pr.numBits >= nb {
|
|
break
|
|
}
|
|
if err == io.EOF {
|
|
err = io.ErrUnexpectedEOF
|
|
}
|
|
return err
|
|
}
|
|
}
|
|
|
|
n := int(64-pr.numBits) / 8 // Number of bytes to copy to bit buffer
|
|
if len(pr.bufPeek) >= 8 {
|
|
// Starting with Go 1.7, the compiler should use a wide integer
|
|
// load here if the architecture supports it.
|
|
u := binary.LittleEndian.Uint64(pr.bufPeek)
|
|
if pr.bigEndian {
|
|
// Swap all the bits within each byte.
|
|
u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1
|
|
u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2
|
|
u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4
|
|
}
|
|
|
|
pr.bufBits |= u << pr.numBits
|
|
pr.numBits += uint(n * 8)
|
|
pr.bufPeek = pr.bufPeek[n:]
|
|
break
|
|
} else {
|
|
if n > len(pr.bufPeek) {
|
|
n = len(pr.bufPeek)
|
|
}
|
|
for _, c := range pr.bufPeek[:n] {
|
|
if pr.bigEndian {
|
|
c = internal.ReverseLUT[c]
|
|
}
|
|
pr.bufBits |= uint64(c) << pr.numBits
|
|
pr.numBits += 8
|
|
}
|
|
pr.bufPeek = pr.bufPeek[n:]
|
|
if pr.numBits > 56 {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
pr.fedBits = pr.numBits
|
|
} else {
|
|
for pr.numBits < nb {
|
|
c, err := pr.byteRd.ReadByte()
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
err = io.ErrUnexpectedEOF
|
|
}
|
|
return err
|
|
}
|
|
if pr.bigEndian {
|
|
c = internal.ReverseLUT[c]
|
|
}
|
|
pr.bufBits |= uint64(c) << pr.numBits
|
|
pr.numBits += 8
|
|
pr.Offset++
|
|
}
|
|
}
|
|
return nil
|
|
}
|