Add Binary module

The binary module implements functions for interpreting byte sequences
as int16, int32, or int64 values depending on a given endianess (the
Byte module implements support for interpreting bytes as int8 values).

It'd be nice if we could implement these functions in pure Carp, using
the Bytes module--but unfortunately we need to rely on some type
conversions in C, which are only possible by registering C functions for
performing the necessary conversions.

At the moment, all of these functions are unsafe (they access byte
arrays using unsafe-nth).
This commit is contained in:
Scott Olsen 2020-02-16 15:15:54 -05:00
parent 117583c836
commit ea1505779d
2 changed files with 131 additions and 0 deletions

117
core/Binary.carp Normal file
View File

@ -0,0 +1,117 @@
(system-include "carp_binary.h")
(defmodule Binary
(doc Order
"The type of byte orders.
LittleEndian designates the little endian ordering, and indicates the least
significant byte appears first in a given byte sequence.
BigEndian designates the big endian ordering, and indicates the most
significant byte occurs first in a given byte sequence.")
(deftype Order LittleEndian BigEndian)
(register to-int16 (λ [Byte Byte] Int))
(register to-int32 (λ [Byte Byte Byte Byte] Int))
(register to-int64 (λ [Byte Byte Byte Byte Byte Byte Byte Byte] Int))
(doc bytes->int16-unsafe
"Interprets the first two bytes in a byte sequence as an int16 value.
**This operation is unsafe.**")
(sig bytes->int16-unsafe (Fn [Order (Ref (Array Byte) a)] Int))
(defn bytes->int16-unsafe [order bs]
(match order
(Order.LittleEndian)
(to-int16 @(Array.unsafe-nth bs 0) @(Array.unsafe-nth bs 1))
(Order.BigEndian)
(to-int16 @(Array.unsafe-nth bs 1) @(Array.unsafe-nth bs 0))))
(doc bytes->int32-unsafe
"Interprets the first four bytes in a byte sequence as an int32 value.
**This operation is unsafe.**")
(sig bytes->int32-unsafe (Fn [Order (Ref (Array Byte))] Int))
(defn bytes->int32-unsafe [order bs]
(match order
(Order.LittleEndian)
(to-int32 @(Array.unsafe-nth bs 0) @(Array.unsafe-nth bs 1)
@(Array.unsafe-nth bs 2) @(Array.unsafe-nth bs 3))
(Order.BigEndian)
(to-int32 @(Array.unsafe-nth bs 3) @(Array.unsafe-nth bs 2)
@(Array.unsafe-nth bs 1) @(Array.unsafe-nth bs 0))))
(doc bytes->int64-unsafe
"Interprets the first eight bytes in a byte sequence as an int64 value.
**This operation is unsafe.**")
(sig bytes->int64-unsafe (Fn [Order (Ref (Array Byte) a)] Int))
(defn bytes->int64-unsafe [order bs]
(match order
(Order.LittleEndian)
(to-int64 @(Array.unsafe-nth bs 0) @(Array.unsafe-nth bs 1)
@(Array.unsafe-nth bs 2) @(Array.unsafe-nth bs 3)
@(Array.unsafe-nth bs 4) @(Array.unsafe-nth bs 5)
@(Array.unsafe-nth bs 6) @(Array.unsafe-nth bs 7))
(Order.BigEndian)
(to-int64 @(Array.unsafe-nth bs 7) @(Array.unsafe-nth bs 6)
@(Array.unsafe-nth bs 5) @(Array.unsafe-nth bs 4)
@(Array.unsafe-nth bs 3) @(Array.unsafe-nth bs 2)
@(Array.unsafe-nth bs 1) @(Array.unsafe-nth bs 0))))
(sig partition (Fn [(Ref (Array a) b) Int] (Array (Array a))))
(defn partition [arr partition-len]
(let [len (+ (mod (Array.length arr) partition-len) (/ (Array.length arr)
partition-len))]
(let-do [x 0
y 0
a (Array.allocate len)]
(for [i 0 len]
(do
(set! y (+ x partition-len))
(when (> y (Array.length arr))
(set! y (Array.length arr)))
(Array.aset-uninitialized! &a i (Array.slice arr x y))
(set! x y)))
a)))
(doc byte-seq->int8-seq-unsafe
"Interprets a sequence of bytes as a sequence of int8 values.
**This operation is unsafe.**")
(sig byte-seq->int8-seq (Fn [(Ref (Array Byte) a)] (Array Int)))
(defn byte-seq->int8-seq [bs]
(let [f (fn [b] (Byte.to-int @b))]
(Array.copy-map &f bs)))
(doc byte-seq->int16-seq-unsafe
"Interprets a sequence of bytes as a sequence of int16 values.
**This operation is unsafe.**")
(sig byte-seq->int16-seq (Fn [Order (Ref (Array Byte) a)] (Array Int)))
(defn byte-seq->int16-seq [order bs]
;; This is way less efficient than it could be.
;; Instead of allocating about 4 extra arrays, we can express this
;; as a fold.
(let [enum (Array.enumerated bs)]
(let [evens (Array.copy-filter &(fn [p] (Int.even? @(Pair.a p))) &enum)
odds (Array.copy-filter &(fn [p] (Int.odd? @(Pair.a p))) &enum)
intf (fn [r1 r2] (to-int16 @(Pair.b r1) @(Pair.b r2)))]
(match order
(Order.LittleEndian) (Array.zip &intf &evens &odds)
(Order.BigEndian) (Array.zip &intf &odds &evens)))))
(doc byte-seq->int32-seq-unsafe
"Interprets a sequence of bytes as a sequence of int32 values.
**This operation is unsafe.**")
(sig byte-seq->int32-seq (Fn [Order (Ref (Array Byte) a)] (Array Int)))
(defn byte-seq->int32-seq [order bs]
(let [partitions (partition bs 4)
f (fn [b] (bytes->int32-unsafe order b))]
(Array.copy-map &f &partitions)))
(doc byte-seq->int64-seq-unsafe
"Interprets a sequence of bytes as a sequence of int32 values.
**This operation is unsafe.**")
(sig byte-seq->int64-seq (Fn [Order (Ref (Array Byte) a)] (Array Int)))
(defn byte-seq->int64-seq [order bs]
(let [partitions (partition bs 8)
f (fn [b] (bytes->int64-unsafe order b))]
(Array.copy-map &f &partitions)))
)

14
core/carp_binary.h Normal file
View File

@ -0,0 +1,14 @@
#include <stdint.h>
uint16_t Binary_to_MINUS_int16(uint8_t b1, uint8_t b2) {
return (uint16_t) (b2 << 8) | b1;
}
uint32_t Binary_to_MINUS_int32(uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4) {
return (uint32_t) b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
}
uint64_t Binary_to_MINUS_int64(uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5, uint8_t b6, uint8_t b7, uint8_t b8) {
return (uint64_t) b1 | (b2 << 8) | (b3 << 16) | (b4 << 24) | ((uint64_t)b5 << 32) |
((uint64_t)b6 << 40) | ((uint64_t)b7 << 48) | ((uint64_t)b8 << 56);
}