mirror of
https://github.com/unisonweb/unison.git
synced 2024-09-23 08:18:04 +03:00
Merge pull request #190 from unisonweb/topic/varlong-arrays
use VarLong for lengths and backref Pos
This commit is contained in:
commit
80e71a987a
@ -5,6 +5,10 @@ module Unison.Codecs where
|
||||
import Data.Text (Text)
|
||||
import Control.Arrow (second)
|
||||
import Control.Monad.State
|
||||
import Data.Bits (Bits)
|
||||
import qualified Data.Bytes.Serial as BS
|
||||
import Data.Bytes.Signed (Unsigned)
|
||||
import Data.Bytes.VarInt (VarInt(..))
|
||||
import qualified Data.ByteString as B
|
||||
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString)
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
@ -257,10 +261,12 @@ serializeCase1 (MatchCase p guard body) = do
|
||||
pure $ MatchCase p posg posb
|
||||
|
||||
putBackref :: MonadPut m => Pos -> m ()
|
||||
putBackref = putWord64be
|
||||
putBackref = BS.serialize . VarInt
|
||||
|
||||
putLength :: (MonadPut m, Integral n) => n -> m ()
|
||||
putLength = putWord64be . fromIntegral
|
||||
putLength :: (MonadPut m, Integral n, Integral (Unsigned n),
|
||||
Bits n, Bits (Unsigned n))
|
||||
=> n -> m ()
|
||||
putLength = BS.serialize . VarInt
|
||||
|
||||
serializeMaybe :: (MonadPut m) => (a -> m ()) -> Maybe a -> m ()
|
||||
serializeMaybe f b = case b of
|
||||
@ -270,7 +276,7 @@ serializeMaybe f b = case b of
|
||||
lengthEncode :: MonadPut m => Text -> m ()
|
||||
lengthEncode text = do
|
||||
let bs = encodeUtf8 text
|
||||
putWord32be . fromIntegral $ B.length bs
|
||||
putLength $ B.length bs
|
||||
putByteString bs
|
||||
|
||||
serializeFoldable :: (MonadPut m, Foldable f) => (a -> m ()) -> f a -> m ()
|
||||
@ -286,7 +292,7 @@ serializeReference ref = case ref of
|
||||
Derived hash -> do
|
||||
putWord8 1
|
||||
let bs = Hash.toBytes hash
|
||||
putWord32be . fromIntegral $ B.length bs
|
||||
putLength $ B.length bs
|
||||
putByteString bs
|
||||
|
||||
serializeConstructorArities :: MonadPut m => Reference -> [Int] -> m ()
|
||||
@ -302,4 +308,4 @@ serializeFile (UnisonFile dataDecls effectDecls body) = do
|
||||
serializeFoldable (uncurry serializeConstructorArities) effectDecls'
|
||||
pos <- serializeTerm body
|
||||
putWord8 0
|
||||
putWord64be pos
|
||||
putBackref pos
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.unisonweb.util
|
||||
|
||||
import java.nio.{ByteBuffer,BufferOverflowException}
|
||||
import java.lang.Long.{compareUnsigned}
|
||||
import Text.Text
|
||||
|
||||
/**
|
||||
@ -14,15 +15,34 @@ trait Sink {
|
||||
def putInt(n: Int): Unit
|
||||
def putLong(n: Long): Unit
|
||||
|
||||
// todo: the UTF-8 of Long encoding, use a single byte if possible
|
||||
def putVarLong(n: Long): Unit =
|
||||
putLong(n)
|
||||
/**
|
||||
* Uses the little-endian variable length encoding of unsigned integers:
|
||||
* https://developers.google.com/protocol-buffers/docs/encoding#varints
|
||||
*/
|
||||
def putVarLong(n: Long): Unit = {
|
||||
val lsb = n.toShort & 0xff
|
||||
if (compareUnsigned(n, 0x80) < 0) putByte(lsb.toByte)
|
||||
else {
|
||||
putByte((lsb | 0x80).toByte)
|
||||
putVarLong(n >>> 7)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the zigzag encoding for variable-length signed numbers, described at:
|
||||
* https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
|
||||
* https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
|
||||
*/
|
||||
def putVarSignedLong(n: Long): Unit = {
|
||||
putVarLong((n << 1) ^ (n >> 63))
|
||||
}
|
||||
|
||||
def putDouble(n: Double): Unit
|
||||
def putString(s: String): Unit
|
||||
def putText(txt: Text): Unit
|
||||
def position: Long
|
||||
def putFramed(bs: Array[Byte]): Unit = {
|
||||
putInt(bs.length)
|
||||
putVarLong(bs.length)
|
||||
put(bs)
|
||||
}
|
||||
def putFramedSeq[A](seq: Seq[A])(f: (Sink,A) => Unit): Unit =
|
||||
|
@ -12,17 +12,40 @@ import scala.reflect.ClassTag
|
||||
* The cursor position can be accessed via the `position` method.
|
||||
*/
|
||||
trait Source { self =>
|
||||
// todo: use a representation that supports 64-bit lengths, unlike Array
|
||||
def get(n: Int): Array[Byte]
|
||||
def getBoolean: Boolean = getByte != 0
|
||||
def getByte: Byte
|
||||
def getInt: Int
|
||||
def getLong: Long
|
||||
// todo: The UTF-8 of Long encodings, uses a single byte where possible
|
||||
def getVarLong: Long = getLong
|
||||
|
||||
/**
|
||||
* Uses the little-endian variable length encoding of unsigned integers:
|
||||
* https://developers.google.com/protocol-buffers/docs/encoding#varints
|
||||
*/
|
||||
def getVarLong: Long = {
|
||||
val b = getByte
|
||||
if ((b & 0x80) == 0) b
|
||||
else (getVarLong << 7) | (b & 0x7f)
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the zigzag encoding for variable-length signed numbers, described at:
|
||||
* https://developers.google.com/protocol-buffers/docs/encoding#signed-integers
|
||||
* https://github.com/google/protobuf/blob/0400cca/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L949-L952
|
||||
*/
|
||||
def getVarSignedLong: Long = {
|
||||
val n = getVarLong
|
||||
(n >>> 1) ^ -(n & 1)
|
||||
}
|
||||
|
||||
def getDouble: Double
|
||||
def position: Long
|
||||
def getFramed: Array[Byte] = get(getInt)
|
||||
|
||||
// todo: use a representation that supports 64-bit lengths, unlike Array
|
||||
def getFramed: Array[Byte] = get(getVarLong.toInt)
|
||||
|
||||
// todo: use a representation that supports 64-bit lengths, unlike String
|
||||
final def getString: String = {
|
||||
val bytes = getFramed
|
||||
new String(bytes, java.nio.charset.StandardCharsets.UTF_8)
|
||||
|
Loading…
Reference in New Issue
Block a user