Add Binary module

The binary module implements functions for interpreting byte sequences as int16, int32, or int64 values depending on a given endianess (the Byte module implements support for interpreting bytes as int8 values). It'd be nice if we could implement these functions in pure Carp, using the Bytes module--but unfortunately we need to rely on some type conversions in C, which are only possible by registering C functions for performing the necessary conversions. At the moment, all of these functions are unsafe (they access byte arrays using unsafe-nth).
2024-09-11 05:25:28 +03:00 · 2020-02-16 15:15:54 -05:00 · 2020-02-16 15:15:54 -05:00 · ea1505779d
commit ea1505779d
parent 117583c836
2 changed files with 131 additions and 0 deletions
--- a/core/Binary.carp
+++ b/core/Binary.carp
@ -0,0 +1,117 @@
+(system-include "carp_binary.h")
+
+(defmodule Binary
+  (doc Order 
+    "The type of byte orders. 
+
+    LittleEndian designates the little endian ordering, and indicates the least
+    significant byte appears first in a given byte sequence.
+
+    BigEndian designates the big endian ordering, and indicates the most
+    significant byte occurs first in a given byte sequence.")
+  (deftype Order LittleEndian BigEndian)
+
+  (register to-int16 (λ [Byte Byte] Int))
+  (register to-int32 (λ [Byte Byte Byte Byte] Int))
+  (register to-int64 (λ [Byte Byte Byte Byte Byte Byte Byte Byte] Int))
+
+  (doc bytes->int16-unsafe
+    "Interprets the first two bytes in a byte sequence as an int16 value.
+     **This operation is unsafe.**")
+  (sig bytes->int16-unsafe (Fn [Order (Ref (Array Byte) a)] Int))
+  (defn bytes->int16-unsafe [order bs]
+    (match order 
+      (Order.LittleEndian) 
+        (to-int16 @(Array.unsafe-nth bs 0) @(Array.unsafe-nth bs 1)) 
+      (Order.BigEndian) 
+        (to-int16 @(Array.unsafe-nth bs 1) @(Array.unsafe-nth bs 0))))
+
+  (doc bytes->int32-unsafe
+    "Interprets the first four bytes in a byte sequence as an int32 value.
+     **This operation is unsafe.**")
+  (sig bytes->int32-unsafe (Fn [Order (Ref (Array Byte))] Int))
+  (defn bytes->int32-unsafe [order bs]
+    (match order 
+      (Order.LittleEndian) 
+        (to-int32 @(Array.unsafe-nth bs 0) @(Array.unsafe-nth bs 1) 
+                  @(Array.unsafe-nth bs 2) @(Array.unsafe-nth bs 3)) 
+      (Order.BigEndian) 
+        (to-int32 @(Array.unsafe-nth bs 3) @(Array.unsafe-nth bs 2)
+                  @(Array.unsafe-nth bs 1) @(Array.unsafe-nth bs 0))))
+
+  (doc bytes->int64-unsafe
+    "Interprets the first eight bytes in a byte sequence as an int64 value.
+     **This operation is unsafe.**")
+  (sig bytes->int64-unsafe (Fn [Order (Ref (Array Byte) a)] Int))
+  (defn bytes->int64-unsafe [order bs]
+    (match order 
+      (Order.LittleEndian) 
+        (to-int64 @(Array.unsafe-nth bs 0) @(Array.unsafe-nth bs 1) 
+                  @(Array.unsafe-nth bs 2) @(Array.unsafe-nth bs 3)
+                  @(Array.unsafe-nth bs 4) @(Array.unsafe-nth bs 5)
+                  @(Array.unsafe-nth bs 6) @(Array.unsafe-nth bs 7))
+      (Order.BigEndian) 
+        (to-int64 @(Array.unsafe-nth bs 7) @(Array.unsafe-nth bs 6)
+                  @(Array.unsafe-nth bs 5) @(Array.unsafe-nth bs 4)
+                  @(Array.unsafe-nth bs 3) @(Array.unsafe-nth bs 2)
+                  @(Array.unsafe-nth bs 1) @(Array.unsafe-nth bs 0))))
+ 
+  (sig partition (Fn [(Ref (Array a) b) Int] (Array (Array a))))
+  (defn partition [arr partition-len] 
+    (let [len (+ (mod (Array.length arr) partition-len) (/ (Array.length arr)
+    partition-len))]
+      (let-do [x 0
+               y 0
+               a (Array.allocate len)]
+        (for [i 0 len]
+          (do
+            (set! y (+ x partition-len))
+            (when (> y (Array.length arr))
+              (set! y (Array.length arr)))
+            (Array.aset-uninitialized! &a i (Array.slice arr x y))
+            (set! x y)))
+        a)))
+
+  (doc byte-seq->int8-seq-unsafe 
+    "Interprets a sequence of bytes as a sequence of int8 values.
+     **This operation is unsafe.**")
+  (sig byte-seq->int8-seq (Fn [(Ref (Array Byte) a)] (Array Int)))
+  (defn byte-seq->int8-seq [bs]
+    (let [f (fn [b] (Byte.to-int @b))]
+    (Array.copy-map &f bs)))
+
+  (doc byte-seq->int16-seq-unsafe
+    "Interprets a sequence of bytes as a sequence of int16 values.
+     **This operation is unsafe.**")
+  (sig byte-seq->int16-seq (Fn [Order (Ref (Array Byte) a)] (Array Int)))
+  (defn byte-seq->int16-seq [order bs]
+    ;; This is way less efficient than it could be.
+    ;; Instead of allocating about 4 extra arrays, we can express this
+    ;; as a fold.
+    (let [enum (Array.enumerated bs)] 
+      (let [evens (Array.copy-filter &(fn [p] (Int.even? @(Pair.a p))) &enum)
+            odds (Array.copy-filter &(fn [p] (Int.odd? @(Pair.a p))) &enum)
+            intf (fn [r1 r2] (to-int16 @(Pair.b r1) @(Pair.b r2)))]
+        (match order 
+          (Order.LittleEndian) (Array.zip &intf &evens &odds)
+          (Order.BigEndian) (Array.zip &intf &odds &evens)))))
+
+  (doc byte-seq->int32-seq-unsafe 
+    "Interprets a sequence of bytes as a sequence of int32 values.
+     **This operation is unsafe.**")
+  (sig byte-seq->int32-seq (Fn [Order (Ref (Array Byte) a)] (Array Int)))
+  (defn byte-seq->int32-seq [order bs] 
+    (let [partitions (partition bs 4)
+          f (fn [b] (bytes->int32-unsafe order b))] 
+      (Array.copy-map &f &partitions)))
+
+  (doc byte-seq->int64-seq-unsafe 
+    "Interprets a sequence of bytes as a sequence of int32 values.
+     **This operation is unsafe.**")
+  (sig byte-seq->int64-seq (Fn [Order (Ref (Array Byte) a)] (Array Int)))
+  (defn byte-seq->int64-seq [order bs] 
+    (let [partitions (partition bs 8)
+          f (fn [b] (bytes->int64-unsafe order b))] 
+      (Array.copy-map &f &partitions)))
+      
+)
--- a/core/carp_binary.h
+++ b/core/carp_binary.h
@ -0,0 +1,14 @@
+#include <stdint.h>
+
+uint16_t Binary_to_MINUS_int16(uint8_t b1, uint8_t b2) {
+  return (uint16_t) (b2 << 8) | b1;
+}
+
+uint32_t Binary_to_MINUS_int32(uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4) {
+  return (uint32_t) b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
+}
+
+uint64_t Binary_to_MINUS_int64(uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5, uint8_t b6, uint8_t b7, uint8_t b8) {
+  return (uint64_t) b1 | (b2 << 8) | (b3 << 16) | (b4 << 24) | ((uint64_t)b5 << 32) | 
+    ((uint64_t)b6 << 40) | ((uint64_t)b7 << 48) | ((uint64_t)b8 << 56);
+}