mirror of
https://github.com/unisonweb/unison.git
synced 2024-11-11 17:16:30 +03:00
use VarInt encoding for backrefs and sequence lengths
This commit is contained in:
parent
e7db5d98ec
commit
dcd15f8c73
@ -5,6 +5,10 @@ module Unison.Codecs where
|
||||
import Data.Text (Text)
|
||||
import Control.Arrow (second)
|
||||
import Control.Monad.State
|
||||
import Data.Bits (Bits)
|
||||
import qualified Data.Bytes.Serial as BS
|
||||
import Data.Bytes.Signed (Unsigned)
|
||||
import Data.Bytes.VarInt (VarInt(..))
|
||||
import qualified Data.ByteString as B
|
||||
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString)
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
@ -257,10 +261,12 @@ serializeCase1 (MatchCase p guard body) = do
|
||||
pure $ MatchCase p posg posb
|
||||
|
||||
putBackref :: MonadPut m => Pos -> m ()
|
||||
putBackref = putWord64be
|
||||
putBackref = BS.serialize . VarInt
|
||||
|
||||
putLength :: (MonadPut m, Integral n) => n -> m ()
|
||||
putLength = putWord64be . fromIntegral
|
||||
putLength :: (MonadPut m, Integral n, Integral (Unsigned n),
|
||||
Bits n, Bits (Unsigned n))
|
||||
=> n -> m ()
|
||||
putLength = BS.serialize . VarInt
|
||||
|
||||
serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m ()
|
||||
serializeMaybe f b = case b of
|
||||
@ -302,4 +308,4 @@ serializeFile (UnisonFile dataDecls effectDecls body) = do
|
||||
serializeFoldable (uncurry serializeConstructorArities) effectDecls'
|
||||
pos <- serializeTerm body
|
||||
putWord8 0
|
||||
putWord64be pos
|
||||
putBackref pos
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.unisonweb.util
|
||||
|
||||
import java.nio.{ByteBuffer,BufferOverflowException}
|
||||
import java.lang.Long.{compareUnsigned}
|
||||
import Text.Text
|
||||
|
||||
/**
|
||||
@ -15,8 +16,24 @@ trait Sink {
|
||||
def putLong(n: Long): Unit
|
||||
|
||||
// todo: the UTF-8 of Long encoding, use a single byte if possible
|
||||
def putVarLong(n: Long): Unit =
|
||||
putLong(n)
|
||||
// Uses the little-endian variable length encoding of unsigned integers:
|
||||
// https://developers.google.com/protocol-buffers/docs/encoding#varints
|
||||
def putVarLong(n: Long): Unit = {
|
||||
val lsb = n.toShort & 0xff
|
||||
if (compareUnsigned(n, 0x80) < 0) putByte(lsb.toByte)
|
||||
else {
|
||||
putByte((lsb | 0x80).toByte)
|
||||
putVarLong(n >>> 7)
|
||||
}
|
||||
}
|
||||
|
||||
// Uses the zigzag encoding for variable-length signed numbers, described at:
|
||||
// https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
|
||||
// https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
|
||||
def putVarSignedLong(n: Long): Unit = {
|
||||
putVarLong((n << 1) ^ (n >> 63))
|
||||
}
|
||||
|
||||
def putDouble(n: Double): Unit
|
||||
def putString(s: String): Unit
|
||||
def putText(txt: Text): Unit
|
||||
|
@ -17,8 +17,23 @@ trait Source { self =>
|
||||
def getByte: Byte
|
||||
def getInt: Int
|
||||
def getLong: Long
|
||||
// todo: The UTF-8 of Long encodings, uses a single byte where possible
|
||||
def getVarLong: Long = getLong
|
||||
|
||||
// Uses the little-endian variable length encoding of unsigned integers:
|
||||
// https://developers.google.com/protocol-buffers/docs/encoding#varints
|
||||
def getVarLong: Long = {
|
||||
val b = getByte
|
||||
if ((b & 0x80) == 0) b
|
||||
else (getVarLong << 7) | (b & 0x7f)
|
||||
}
|
||||
|
||||
// Uses the zigzag encoding for variable-length signed numbers, described at:
|
||||
// https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
|
||||
// https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedInputStream.java#L557-L568
|
||||
def getVarSignedLong: Long = {
|
||||
val n = getVarLong
|
||||
(n >>> 1) ^ -(n & 1)
|
||||
}
|
||||
|
||||
def getDouble: Double
|
||||
def position: Long
|
||||
def getFramed: Array[Byte] = get(getInt)
|
||||
|
Loading…
Reference in New Issue
Block a user