From dcd15f8c735e199abb63a0f9f68317533a73844c Mon Sep 17 00:00:00 2001 From: Arya Irani Date: Mon, 11 Jun 2018 18:19:22 -0400 Subject: [PATCH] use VarInt encoding for backrefs and sequence lengths --- parser-typechecker/src/Unison/Codecs.hs | 14 +++++++++---- .../main/src/main/scala/util/Sink.scala | 21 +++++++++++++++++-- .../main/src/main/scala/util/Source.scala | 19 +++++++++++++++-- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/parser-typechecker/src/Unison/Codecs.hs b/parser-typechecker/src/Unison/Codecs.hs index 1f8c2a496..af6597faf 100644 --- a/parser-typechecker/src/Unison/Codecs.hs +++ b/parser-typechecker/src/Unison/Codecs.hs @@ -5,6 +5,10 @@ module Unison.Codecs where import Data.Text (Text) import Control.Arrow (second) import Control.Monad.State +import Data.Bits (Bits) +import qualified Data.Bytes.Serial as BS +import Data.Bytes.Signed (Unsigned) +import Data.Bytes.VarInt (VarInt(..)) import qualified Data.ByteString as B import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString) import qualified Data.ByteString.Lazy as BL @@ -257,10 +261,12 @@ serializeCase1 (MatchCase p guard body) = do pure $ MatchCase p posg posb putBackref :: MonadPut m => Pos -> m () -putBackref = putWord64be +putBackref = BS.serialize . VarInt -putLength :: (MonadPut m, Integral n) => n -> m () -putLength = putWord64be . fromIntegral +putLength :: (MonadPut m, Integral n, Integral (Unsigned n), + Bits n, Bits (Unsigned n)) + => n -> m () +putLength = BS.serialize . VarInt serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m () serializeMaybe f b = case b of @@ -302,4 +308,4 @@ serializeFile (UnisonFile dataDecls effectDecls body) = do serializeFoldable (uncurry serializeConstructorArities) effectDecls' pos <- serializeTerm body putWord8 0 - putWord64be pos + putBackref pos diff --git a/runtime-jvm/main/src/main/scala/util/Sink.scala b/runtime-jvm/main/src/main/scala/util/Sink.scala index 553221abc..8e9756063 100644 --- a/runtime-jvm/main/src/main/scala/util/Sink.scala +++ b/runtime-jvm/main/src/main/scala/util/Sink.scala @@ -1,6 +1,7 @@ package org.unisonweb.util import java.nio.{ByteBuffer,BufferOverflowException} +import java.lang.Long.{compareUnsigned} import Text.Text /** @@ -15,8 +16,24 @@ trait Sink { def putLong(n: Long): Unit // todo: the UTF-8 of Long encoding, use a single byte if possible - def putVarLong(n: Long): Unit = - putLong(n) + // Uses the little-endian variable length encoding of unsigned integers: + // https://developers.google.com/protocol-buffers/docs/encoding#varints + def putVarLong(n: Long): Unit = { + val lsb = n.toShort & 0xff + if (compareUnsigned(n, 0x80) < 0) putByte(lsb.toByte) + else { + putByte((lsb | 0x80).toByte) + putVarLong(n >>> 7) + } + } + + // Uses the zigzag encoding for variable-length signed numbers, described at: + // https://developers.google.com/protocol-buffers/docs/encoding#signed-integers + // https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952 + def putVarSignedLong(n: Long): Unit = { + putVarLong((n << 1) ^ (n >> 63)) + } + def putDouble(n: Double): Unit def putString(s: String): Unit def putText(txt: Text): Unit diff --git a/runtime-jvm/main/src/main/scala/util/Source.scala b/runtime-jvm/main/src/main/scala/util/Source.scala index 4b638e15a..aa24d6942 100644 --- a/runtime-jvm/main/src/main/scala/util/Source.scala +++ b/runtime-jvm/main/src/main/scala/util/Source.scala @@ -17,8 +17,23 @@ trait Source { self => def getByte: Byte def getInt: Int def getLong: Long - // todo: The UTF-8 of Long encodings, uses a single byte where possible - def getVarLong: Long = getLong + + // Uses the little-endian variable length encoding of unsigned integers: + // https://developers.google.com/protocol-buffers/docs/encoding#varints + def getVarLong: Long = { + val b = getByte + if ((b & 0x80) == 0) b + else (getVarLong << 7) | (b & 0x7f) + } + + // Uses the zigzag encoding for variable-length signed numbers, described at: + // https://developers.google.com/protocol-buffers/docs/encoding#signed-integers + // https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedInputStream.java#L557-L568 + def getVarSignedLong: Long = { + val n = getVarLong + (n >>> 1) ^ -(n & 1) + } + def getDouble: Double def position: Long def getFramed: Array[Byte] = get(getInt)