mirror of
https://github.com/unisonweb/unison.git
synced 2024-09-23 08:18:04 +03:00
Merge pull request #190 from unisonweb/topic/varlong-arrays
use VarLong for lengths and backref Pos
This commit is contained in:
commit
80e71a987a
@ -5,6 +5,10 @@ module Unison.Codecs where
|
|||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Control.Arrow (second)
|
import Control.Arrow (second)
|
||||||
import Control.Monad.State
|
import Control.Monad.State
|
||||||
|
import Data.Bits (Bits)
|
||||||
|
import qualified Data.Bytes.Serial as BS
|
||||||
|
import Data.Bytes.Signed (Unsigned)
|
||||||
|
import Data.Bytes.VarInt (VarInt(..))
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString)
|
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString)
|
||||||
import qualified Data.ByteString.Lazy as BL
|
import qualified Data.ByteString.Lazy as BL
|
||||||
@ -257,10 +261,12 @@ serializeCase1 (MatchCase p guard body) = do
|
|||||||
pure $ MatchCase p posg posb
|
pure $ MatchCase p posg posb
|
||||||
|
|
||||||
putBackref :: MonadPut m => Pos -> m ()
|
putBackref :: MonadPut m => Pos -> m ()
|
||||||
putBackref = putWord64be
|
putBackref = BS.serialize . VarInt
|
||||||
|
|
||||||
putLength :: (MonadPut m, Integral n) => n -> m ()
|
putLength :: (MonadPut m, Integral n, Integral (Unsigned n),
|
||||||
putLength = putWord64be . fromIntegral
|
Bits n, Bits (Unsigned n))
|
||||||
|
=> n -> m ()
|
||||||
|
putLength = BS.serialize . VarInt
|
||||||
|
|
||||||
serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m ()
|
serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m ()
|
||||||
serializeMaybe f b = case b of
|
serializeMaybe f b = case b of
|
||||||
@ -270,7 +276,7 @@ serializeMaybe f b = case b of
|
|||||||
lengthEncode :: MonadPut m => Text -> m ()
|
lengthEncode :: MonadPut m => Text -> m ()
|
||||||
lengthEncode text = do
|
lengthEncode text = do
|
||||||
let bs = encodeUtf8 text
|
let bs = encodeUtf8 text
|
||||||
putWord32be . fromIntegral $ B.length bs
|
putLength $ B.length bs
|
||||||
putByteString bs
|
putByteString bs
|
||||||
|
|
||||||
serializeFoldable :: (MonadPut m, Foldable f) => (a -> m ()) -> f a -> m ()
|
serializeFoldable :: (MonadPut m, Foldable f) => (a -> m ()) -> f a -> m ()
|
||||||
@ -286,7 +292,7 @@ serializeReference ref = case ref of
|
|||||||
Derived hash -> do
|
Derived hash -> do
|
||||||
putWord8 1
|
putWord8 1
|
||||||
let bs = Hash.toBytes hash
|
let bs = Hash.toBytes hash
|
||||||
putWord32be . fromIntegral $ B.length bs
|
putLength $ B.length bs
|
||||||
putByteString bs
|
putByteString bs
|
||||||
|
|
||||||
serializeConstructorArities :: MonadPut m => Reference -> [Int] -> m ()
|
serializeConstructorArities :: MonadPut m => Reference -> [Int] -> m ()
|
||||||
@ -302,4 +308,4 @@ serializeFile (UnisonFile dataDecls effectDecls body) = do
|
|||||||
serializeFoldable (uncurry serializeConstructorArities) effectDecls'
|
serializeFoldable (uncurry serializeConstructorArities) effectDecls'
|
||||||
pos <- serializeTerm body
|
pos <- serializeTerm body
|
||||||
putWord8 0
|
putWord8 0
|
||||||
putWord64be pos
|
putBackref pos
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package org.unisonweb.util
|
package org.unisonweb.util
|
||||||
|
|
||||||
import java.nio.{ByteBuffer,BufferOverflowException}
|
import java.nio.{ByteBuffer,BufferOverflowException}
|
||||||
|
import java.lang.Long.{compareUnsigned}
|
||||||
import Text.Text
|
import Text.Text
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -14,15 +15,34 @@ trait Sink {
|
|||||||
def putInt(n: Int): Unit
|
def putInt(n: Int): Unit
|
||||||
def putLong(n: Long): Unit
|
def putLong(n: Long): Unit
|
||||||
|
|
||||||
// todo: the UTF-8 of Long encoding, use a single byte if possible
|
/**
|
||||||
def putVarLong(n: Long): Unit =
|
* Uses the little-endian variable length encoding of unsigned integers:
|
||||||
putLong(n)
|
* https://developers.google.com/protocol-buffers/docs/encoding#varints
|
||||||
|
*/
|
||||||
|
def putVarLong(n: Long): Unit = {
|
||||||
|
val lsb = n.toShort & 0xff
|
||||||
|
if (compareUnsigned(n, 0x80) < 0) putByte(lsb.toByte)
|
||||||
|
else {
|
||||||
|
putByte((lsb | 0x80).toByte)
|
||||||
|
putVarLong(n >>> 7)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses the zigzag encoding for variable-length signed numbers, described at:
|
||||||
|
* https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
|
||||||
|
* https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
|
||||||
|
*/
|
||||||
|
def putVarSignedLong(n: Long): Unit = {
|
||||||
|
putVarLong((n << 1) ^ (n >> 63))
|
||||||
|
}
|
||||||
|
|
||||||
def putDouble(n: Double): Unit
|
def putDouble(n: Double): Unit
|
||||||
def putString(s: String): Unit
|
def putString(s: String): Unit
|
||||||
def putText(txt: Text): Unit
|
def putText(txt: Text): Unit
|
||||||
def position: Long
|
def position: Long
|
||||||
def putFramed(bs: Array[Byte]): Unit = {
|
def putFramed(bs: Array[Byte]): Unit = {
|
||||||
putInt(bs.length)
|
putVarLong(bs.length)
|
||||||
put(bs)
|
put(bs)
|
||||||
}
|
}
|
||||||
def putFramedSeq[A](seq: Seq[A])(f: (Sink,A) => Unit): Unit =
|
def putFramedSeq[A](seq: Seq[A])(f: (Sink,A) => Unit): Unit =
|
||||||
|
@ -12,17 +12,40 @@ import scala.reflect.ClassTag
|
|||||||
* The cursor position can be accessed via the `position` method.
|
* The cursor position can be accessed via the `position` method.
|
||||||
*/
|
*/
|
||||||
trait Source { self =>
|
trait Source { self =>
|
||||||
|
// todo: use a representation that supports 64-bit lengths, unlike Array
|
||||||
def get(n: Int): Array[Byte]
|
def get(n: Int): Array[Byte]
|
||||||
def getBoolean: Boolean = getByte != 0
|
def getBoolean: Boolean = getByte != 0
|
||||||
def getByte: Byte
|
def getByte: Byte
|
||||||
def getInt: Int
|
def getInt: Int
|
||||||
def getLong: Long
|
def getLong: Long
|
||||||
// todo: The UTF-8 of Long encodings, uses a single byte where possible
|
|
||||||
def getVarLong: Long = getLong
|
/**
|
||||||
|
* Uses the little-endian variable length encoding of unsigned integers:
|
||||||
|
* https://developers.google.com/protocol-buffers/docs/encoding#varints
|
||||||
|
*/
|
||||||
|
def getVarLong: Long = {
|
||||||
|
val b = getByte
|
||||||
|
if ((b & 0x80) == 0) b
|
||||||
|
else (getVarLong << 7) | (b & 0x7f)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses the zigzag encoding for variable-length signed numbers, described at:
|
||||||
|
* https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
|
||||||
|
* https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
|
||||||
|
*/
|
||||||
|
def getVarSignedLong: Long = {
|
||||||
|
val n = getVarLong
|
||||||
|
(n >>> 1) ^ -(n & 1)
|
||||||
|
}
|
||||||
|
|
||||||
def getDouble: Double
|
def getDouble: Double
|
||||||
def position: Long
|
def position: Long
|
||||||
def getFramed: Array[Byte] = get(getInt)
|
|
||||||
|
|
||||||
|
// todo: use a representation that supports 64-bit lengths, unlike Array
|
||||||
|
def getFramed: Array[Byte] = get(getVarLong.toInt)
|
||||||
|
|
||||||
|
// todo: use a representation that supports 64-bit lengths, unlike String
|
||||||
final def getString: String = {
|
final def getString: String = {
|
||||||
val bytes = getFramed
|
val bytes = getFramed
|
||||||
new String(bytes, java.nio.charset.StandardCharsets.UTF_8)
|
new String(bytes, java.nio.charset.StandardCharsets.UTF_8)
|
||||||
|
Loading…
Reference in New Issue
Block a user