Optimize BigEndian class

Despite its comment, gcc sometimes failed to optimize the loop into single `bswap` instruction. So it looks like we should explicitly use `__builtin_bswap` functions.
2024-10-05 00:57:08 +03:00 · 2022-02-28 16:14:47 +09:00 · 2022-02-28 16:14:47 +09:00 · c1c9bdf363
commit c1c9bdf363
parent d4bbf31db2
1 changed files with 16 additions and 14 deletions
--- a/byteorder.h
+++ b/byteorder.h
@ -1,31 +1,24 @@
 #pragma once

 #include <cstdint>
+#include <cstring>

 namespace mold {

 template <typename T>
 class BigEndian {
 public:
-  BigEndian() : BigEndian(0) {}
-
-  BigEndian(T x) {
-    *this = x;
-  }
+  BigEndian() = delete;

  operator T() const {
-    // We don't need to optimize this code because compilers are
-    // usually smart enough to compile this loop into a single
-    // byte-swap instruction such as x86's bswap.
-    T ret = 0;
-    for (int i = 0; i < sizeof(T); i++)
-      ret = (ret << 8) | val[i];
-    return ret;
+    T x;
+    memcpy(&x, val, sizeof(T));
+    return bswap(x);
  }

  BigEndian &operator=(T x) {
-    for (int i = 0; i < sizeof(T); i++)
-      val[sizeof(T) - 1 - i] = x >> (i * 8);
+    x = bswap(x);
+    memcpy(&val, &x, sizeof(T));
    return *this;
  }

@ -63,6 +56,15 @@ public:

 private:
  uint8_t val[sizeof(T)];
+
+  static T bswap(T x) {
+    if constexpr (sizeof(T) == 2)
+      return __builtin_bswap16(x);
+    else if constexpr (sizeof(T) == 4)
+      return __builtin_bswap32(x);
+    else
+      return __builtin_bswap64(x);
+  }
 };

 using ibig16 = BigEndian<int16_t>;