Merge pull request #190 from unisonweb/topic/varlong-arrays

use VarLong for lengths and backref Pos
This commit is contained in:
Paul Chiusano 2018-06-15 10:32:30 -05:00 committed by GitHub
commit 80e71a987a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 62 additions and 13 deletions

View File

@ -5,6 +5,10 @@ module Unison.Codecs where
import Data.Text (Text) import Data.Text (Text)
import Control.Arrow (second) import Control.Arrow (second)
import Control.Monad.State import Control.Monad.State
import Data.Bits (Bits)
import qualified Data.Bytes.Serial as BS
import Data.Bytes.Signed (Unsigned)
import Data.Bytes.VarInt (VarInt(..))
import qualified Data.ByteString as B import qualified Data.ByteString as B
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString) import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString)
import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy as BL
@ -257,10 +261,12 @@ serializeCase1 (MatchCase p guard body) = do
pure $ MatchCase p posg posb pure $ MatchCase p posg posb
putBackref :: MonadPut m => Pos -> m () putBackref :: MonadPut m => Pos -> m ()
putBackref = putWord64be putBackref = BS.serialize . VarInt
putLength :: (MonadPut m, Integral n) => n -> m () putLength :: (MonadPut m, Integral n, Integral (Unsigned n),
putLength = putWord64be . fromIntegral Bits n, Bits (Unsigned n))
=> n -> m ()
putLength = BS.serialize . VarInt
serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m () serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m ()
serializeMaybe f b = case b of serializeMaybe f b = case b of
@ -270,7 +276,7 @@ serializeMaybe f b = case b of
lengthEncode :: MonadPut m => Text -> m () lengthEncode :: MonadPut m => Text -> m ()
lengthEncode text = do lengthEncode text = do
let bs = encodeUtf8 text let bs = encodeUtf8 text
putWord32be . fromIntegral $ B.length bs putLength $ B.length bs
putByteString bs putByteString bs
serializeFoldable :: (MonadPut m, Foldable f) => (a -> m ()) -> f a -> m () serializeFoldable :: (MonadPut m, Foldable f) => (a -> m ()) -> f a -> m ()
@ -286,7 +292,7 @@ serializeReference ref = case ref of
Derived hash -> do Derived hash -> do
putWord8 1 putWord8 1
let bs = Hash.toBytes hash let bs = Hash.toBytes hash
putWord32be . fromIntegral $ B.length bs putLength $ B.length bs
putByteString bs putByteString bs
serializeConstructorArities :: MonadPut m => Reference -> [Int] -> m () serializeConstructorArities :: MonadPut m => Reference -> [Int] -> m ()
@ -302,4 +308,4 @@ serializeFile (UnisonFile dataDecls effectDecls body) = do
serializeFoldable (uncurry serializeConstructorArities) effectDecls' serializeFoldable (uncurry serializeConstructorArities) effectDecls'
pos <- serializeTerm body pos <- serializeTerm body
putWord8 0 putWord8 0
putWord64be pos putBackref pos

View File

@ -1,6 +1,7 @@
package org.unisonweb.util package org.unisonweb.util
import java.nio.{ByteBuffer,BufferOverflowException} import java.nio.{ByteBuffer,BufferOverflowException}
import java.lang.Long.{compareUnsigned}
import Text.Text import Text.Text
/** /**
@ -14,15 +15,34 @@ trait Sink {
def putInt(n: Int): Unit def putInt(n: Int): Unit
def putLong(n: Long): Unit def putLong(n: Long): Unit
// todo: the UTF-8 of Long encoding, use a single byte if possible /**
def putVarLong(n: Long): Unit = * Uses the little-endian variable length encoding of unsigned integers:
putLong(n) * https://developers.google.com/protocol-buffers/docs/encoding#varints
*/
def putVarLong(n: Long): Unit = {
val lsb = n.toShort & 0xff
if (compareUnsigned(n, 0x80) < 0) putByte(lsb.toByte)
else {
putByte((lsb | 0x80).toByte)
putVarLong(n >>> 7)
}
}
/**
* Uses the zigzag encoding for variable-length signed numbers, described at:
* https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
* https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
*/
def putVarSignedLong(n: Long): Unit = {
putVarLong((n << 1) ^ (n >> 63))
}
def putDouble(n: Double): Unit def putDouble(n: Double): Unit
def putString(s: String): Unit def putString(s: String): Unit
def putText(txt: Text): Unit def putText(txt: Text): Unit
def position: Long def position: Long
def putFramed(bs: Array[Byte]): Unit = { def putFramed(bs: Array[Byte]): Unit = {
putInt(bs.length) putVarLong(bs.length)
put(bs) put(bs)
} }
def putFramedSeq[A](seq: Seq[A])(f: (Sink,A) => Unit): Unit = def putFramedSeq[A](seq: Seq[A])(f: (Sink,A) => Unit): Unit =

View File

@ -12,17 +12,40 @@ import scala.reflect.ClassTag
* The cursor position can be accessed via the `position` method. * The cursor position can be accessed via the `position` method.
*/ */
trait Source { self => trait Source { self =>
// todo: use a representation that supports 64-bit lengths, unlike Array
def get(n: Int): Array[Byte] def get(n: Int): Array[Byte]
def getBoolean: Boolean = getByte != 0 def getBoolean: Boolean = getByte != 0
def getByte: Byte def getByte: Byte
def getInt: Int def getInt: Int
def getLong: Long def getLong: Long
// todo: The UTF-8 of Long encodings, uses a single byte where possible
def getVarLong: Long = getLong /**
* Uses the little-endian variable length encoding of unsigned integers:
* https://developers.google.com/protocol-buffers/docs/encoding#varints
*/
def getVarLong: Long = {
val b = getByte
if ((b & 0x80) == 0) b
else (getVarLong << 7) | (b & 0x7f)
}
/**
* Uses the zigzag encoding for variable-length signed numbers, described at:
* https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
* https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
*/
def getVarSignedLong: Long = {
val n = getVarLong
(n >>> 1) ^ -(n & 1)
}
def getDouble: Double def getDouble: Double
def position: Long def position: Long
def getFramed: Array[Byte] = get(getInt)
// todo: use a representation that supports 64-bit lengths, unlike Array
def getFramed: Array[Byte] = get(getVarLong.toInt)
// todo: use a representation that supports 64-bit lengths, unlike String
final def getString: String = { final def getString: String = {
val bytes = getFramed val bytes = getFramed
new String(bytes, java.nio.charset.StandardCharsets.UTF_8) new String(bytes, java.nio.charset.StandardCharsets.UTF_8)