use VarInt encoding for backrefs and sequence lengths

This commit is contained in:
Arya Irani 2018-06-11 18:19:22 -04:00
parent e7db5d98ec
commit dcd15f8c73
3 changed files with 46 additions and 8 deletions

View File

@ -5,6 +5,10 @@ module Unison.Codecs where
import Data.Text (Text)
import Control.Arrow (second)
import Control.Monad.State
import Data.Bits (Bits)
import qualified Data.Bytes.Serial as BS
import Data.Bytes.Signed (Unsigned)
import Data.Bytes.VarInt (VarInt(..))
import qualified Data.ByteString as B
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString)
import qualified Data.ByteString.Lazy as BL
@ -257,10 +261,12 @@ serializeCase1 (MatchCase p guard body) = do
pure $ MatchCase p posg posb
putBackref :: MonadPut m => Pos -> m ()
putBackref = putWord64be
putBackref = BS.serialize . VarInt
putLength :: (MonadPut m, Integral n) => n -> m ()
putLength = putWord64be . fromIntegral
putLength :: (MonadPut m, Integral n, Integral (Unsigned n),
Bits n, Bits (Unsigned n))
=> n -> m ()
putLength = BS.serialize . VarInt
serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m ()
serializeMaybe f b = case b of
@ -302,4 +308,4 @@ serializeFile (UnisonFile dataDecls effectDecls body) = do
serializeFoldable (uncurry serializeConstructorArities) effectDecls'
pos <- serializeTerm body
putWord8 0
putWord64be pos
putBackref pos

View File

@ -1,6 +1,7 @@
package org.unisonweb.util
import java.nio.{ByteBuffer,BufferOverflowException}
import java.lang.Long.{compareUnsigned}
import Text.Text
/**
@ -15,8 +16,24 @@ trait Sink {
def putLong(n: Long): Unit
// todo: the UTF-8 of Long encoding, use a single byte if possible
def putVarLong(n: Long): Unit =
putLong(n)
// Uses the little-endian variable length encoding of unsigned integers:
// https://developers.google.com/protocol-buffers/docs/encoding#varints
def putVarLong(n: Long): Unit = {
val lsb = n.toShort & 0xff
if (compareUnsigned(n, 0x80) < 0) putByte(lsb.toByte)
else {
putByte((lsb | 0x80).toByte)
putVarLong(n >>> 7)
}
}
// Uses the zigzag encoding for variable-length signed numbers, described at:
// https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
// https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
def putVarSignedLong(n: Long): Unit = {
putVarLong((n << 1) ^ (n >> 63))
}
def putDouble(n: Double): Unit
def putString(s: String): Unit
def putText(txt: Text): Unit

View File

@ -17,8 +17,23 @@ trait Source { self =>
def getByte: Byte
def getInt: Int
def getLong: Long
// todo: The UTF-8 of Long encodings, uses a single byte where possible
def getVarLong: Long = getLong
// Uses the little-endian variable length encoding of unsigned integers:
// https://developers.google.com/protocol-buffers/docs/encoding#varints
def getVarLong: Long = {
val b = getByte
if ((b & 0x80) == 0) b
else (getVarLong << 7) | (b & 0x7f)
}
// Uses the zigzag encoding for variable-length signed numbers, described at:
// https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
// https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedInputStream.java#L557-L568
def getVarSignedLong: Long = {
val n = getVarLong
(n >>> 1) ^ -(n & 1)
}
def getDouble: Double
def position: Long
def getFramed: Array[Byte] = get(getInt)