Merge pull request #190 from unisonweb/topic/varlong-arrays

use VarLong for lengths and backref Pos
This commit is contained in:
Paul Chiusano 2018-06-15 10:32:30 -05:00 committed by GitHub
commit 80e71a987a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 62 additions and 13 deletions

View File

@ -5,6 +5,10 @@ module Unison.Codecs where
import Data.Text (Text)
import Control.Arrow (second)
import Control.Monad.State
import Data.Bits (Bits)
import qualified Data.Bytes.Serial as BS
import Data.Bytes.Signed (Unsigned)
import Data.Bytes.VarInt (VarInt(..))
import qualified Data.ByteString as B
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString)
import qualified Data.ByteString.Lazy as BL
@ -257,10 +261,12 @@ serializeCase1 (MatchCase p guard body) = do
pure $ MatchCase p posg posb
putBackref :: MonadPut m => Pos -> m ()
putBackref = putWord64be
putBackref = BS.serialize . VarInt
putLength :: (MonadPut m, Integral n) => n -> m ()
putLength = putWord64be . fromIntegral
putLength :: (MonadPut m, Integral n, Integral (Unsigned n),
Bits n, Bits (Unsigned n))
=> n -> m ()
putLength = BS.serialize . VarInt
serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m ()
serializeMaybe f b = case b of
@ -270,7 +276,7 @@ serializeMaybe f b = case b of
lengthEncode :: MonadPut m => Text -> m ()
lengthEncode text = do
let bs = encodeUtf8 text
putWord32be . fromIntegral $ B.length bs
putLength $ B.length bs
putByteString bs
serializeFoldable :: (MonadPut m, Foldable f) => (a -> m ()) -> f a -> m ()
@ -286,7 +292,7 @@ serializeReference ref = case ref of
Derived hash -> do
putWord8 1
let bs = Hash.toBytes hash
putWord32be . fromIntegral $ B.length bs
putLength $ B.length bs
putByteString bs
serializeConstructorArities :: MonadPut m => Reference -> [Int] -> m ()
@ -302,4 +308,4 @@ serializeFile (UnisonFile dataDecls effectDecls body) = do
serializeFoldable (uncurry serializeConstructorArities) effectDecls'
pos <- serializeTerm body
putWord8 0
putWord64be pos
putBackref pos

View File

@ -1,6 +1,7 @@
package org.unisonweb.util
import java.nio.{ByteBuffer,BufferOverflowException}
import java.lang.Long.{compareUnsigned}
import Text.Text
/**
@ -14,15 +15,34 @@ trait Sink {
def putInt(n: Int): Unit
def putLong(n: Long): Unit
// todo: the UTF-8 of Long encoding, use a single byte if possible
def putVarLong(n: Long): Unit =
putLong(n)
/**
* Uses the little-endian variable length encoding of unsigned integers:
* https://developers.google.com/protocol-buffers/docs/encoding#varints
*/
def putVarLong(n: Long): Unit = {
val lsb = n.toShort & 0xff
if (compareUnsigned(n, 0x80) < 0) putByte(lsb.toByte)
else {
putByte((lsb | 0x80).toByte)
putVarLong(n >>> 7)
}
}
/**
* Uses the zigzag encoding for variable-length signed numbers, described at:
* https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
* https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
*/
def putVarSignedLong(n: Long): Unit = {
putVarLong((n << 1) ^ (n >> 63))
}
def putDouble(n: Double): Unit
def putString(s: String): Unit
def putText(txt: Text): Unit
def position: Long
def putFramed(bs: Array[Byte]): Unit = {
putInt(bs.length)
putVarLong(bs.length)
put(bs)
}
def putFramedSeq[A](seq: Seq[A])(f: (Sink,A) => Unit): Unit =

View File

@ -12,17 +12,40 @@ import scala.reflect.ClassTag
* The cursor position can be accessed via the `position` method.
*/
trait Source { self =>
// todo: use a representation that supports 64-bit lengths, unlike Array
def get(n: Int): Array[Byte]
def getBoolean: Boolean = getByte != 0
def getByte: Byte
def getInt: Int
def getLong: Long
// todo: The UTF-8 of Long encodings, uses a single byte where possible
def getVarLong: Long = getLong
/**
* Uses the little-endian variable length encoding of unsigned integers:
* https://developers.google.com/protocol-buffers/docs/encoding#varints
*/
def getVarLong: Long = {
val b = getByte
if ((b & 0x80) == 0) b
else (getVarLong << 7) | (b & 0x7f)
}
/**
* Uses the zigzag encoding for variable-length signed numbers, described at:
* https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
* https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
*/
def getVarSignedLong: Long = {
val n = getVarLong
(n >>> 1) ^ -(n & 1)
}
def getDouble: Double
def position: Long
def getFramed: Array[Byte] = get(getInt)
// todo: use a representation that supports 64-bit lengths, unlike Array
def getFramed: Array[Byte] = get(getVarLong.toInt)
// todo: use a representation that supports 64-bit lengths, unlike String
final def getString: String = {
val bytes = getFramed
new String(bytes, java.nio.charset.StandardCharsets.UTF_8)