klubhaus-doorbell/libraries/FastLED/src/bitswap.h

#pragma once

#ifndef __INC_BITSWAP_H
#define __INC_BITSWAP_H

#include "FastLED.h"
#include "fl/force_inline.h"
#include "fl/int.h"

/// @file bitswap.h
/// Functions for doing a rotation of bits/bytes used by parallel output

FASTLED_NAMESPACE_BEGIN


#if defined(FASTLED_ARM) || defined(FASTLED_ESP8266) || defined(FASTLED_DOXYGEN)
/// Structure representing 8 bits of access
typedef union {
  fl::u8 raw;   ///< the entire byte
  struct {
  fl::u32 a0:1;  ///< bit 0 (0x01)
  fl::u32 a1:1;  ///< bit 1 (0x02)
  fl::u32 a2:1;  ///< bit 2 (0x04)
  fl::u32 a3:1;  ///< bit 3 (0x08)
  fl::u32 a4:1;  ///< bit 4 (0x10)
  fl::u32 a5:1;  ///< bit 5 (0x20)
  fl::u32 a6:1;  ///< bit 6 (0x40)
  fl::u32 a7:1;  ///< bit 7 (0x80)
  };
} just8bits;

/// Structure representing 32 bits of access
typedef struct {
  fl::u32 a0:1;  ///< byte 'a', bit 0 (0x00000000)
  fl::u32 a1:1;  ///< byte 'a', bit 1 (0x00000002)
  fl::u32 a2:1;  ///< byte 'a', bit 2 (0x00000004)
  fl::u32 a3:1;  ///< byte 'a', bit 3 (0x00000008)
  fl::u32 a4:1;  ///< byte 'a', bit 4 (0x00000010)
  fl::u32 a5:1;  ///< byte 'a', bit 5 (0x00000020)
  fl::u32 a6:1;  ///< byte 'a', bit 6 (0x00000040)
  fl::u32 a7:1;  ///< byte 'a', bit 7 (0x00000080)
  fl::u32 b0:1;  ///< byte 'b', bit 0 (0x00000100)
  fl::u32 b1:1;  ///< byte 'b', bit 1 (0x00000200)
  fl::u32 b2:1;  ///< byte 'b', bit 2 (0x00000400)
  fl::u32 b3:1;  ///< byte 'b', bit 3 (0x00000800)
  fl::u32 b4:1;  ///< byte 'b', bit 4 (0x00001000)
  fl::u32 b5:1;  ///< byte 'b', bit 5 (0x00002000)
  fl::u32 b6:1;  ///< byte 'b', bit 6 (0x00004000)
  fl::u32 b7:1;  ///< byte 'b', bit 7 (0x00008000)
  fl::u32 c0:1;  ///< byte 'c', bit 0 (0x00010000)
  fl::u32 c1:1;  ///< byte 'c', bit 1 (0x00020000)
  fl::u32 c2:1;  ///< byte 'c', bit 2 (0x00040000)
  fl::u32 c3:1;  ///< byte 'c', bit 3 (0x00080000)
  fl::u32 c4:1;  ///< byte 'c', bit 4 (0x00100000)
  fl::u32 c5:1;  ///< byte 'c', bit 5 (0x00200000)
  fl::u32 c6:1;  ///< byte 'c', bit 6 (0x00400000)
  fl::u32 c7:1;  ///< byte 'c', bit 7 (0x00800000)
  fl::u32 d0:1;  ///< byte 'd', bit 0 (0x01000000)
  fl::u32 d1:1;  ///< byte 'd', bit 1 (0x02000000)
  fl::u32 d2:1;  ///< byte 'd', bit 2 (0x04000000)
  fl::u32 d3:1;  ///< byte 'd', bit 3 (0x08000000)
  fl::u32 d4:1;  ///< byte 'd', bit 4 (0x10000000)
  fl::u32 d5:1;  ///< byte 'd', bit 5 (0x20000000)
  fl::u32 d6:1;  ///< byte 'd', bit 6 (0x40000000)
  fl::u32 d7:1;  ///< byte 'd', bit 7 (0x80000000)
} sub4;

/// Union containing a full 8 bytes to swap the bit orientation on
typedef union {
  fl::u32 word[2];  ///< two 32-bit values to load for swapping
  fl::u8 bytes[8];  ///< eight 8-bit values to load for swapping
  struct {
    sub4 a;  ///< 32-bit access struct for bit swapping, upper four bytes (word[0] or bytes[0-3])
    sub4 b;  ///< 32-bit access struct for bit swapping, lower four bytes (word[1] or bytes[4-7])
  };
} bitswap_type;


/// Set `out.X` bits 0, 1, 2, and 3 to bit N
/// of `in.a.a`, `in.a.b`, `in.a.b`, `in.a.c`, and `in.a.d`
/// @param X the sub4 of `out` to set
/// @param N the bit of each byte to retrieve
/// @see bitswap_type
#define SWAPSA(X,N) out.  X ## 0 = in.a.a ## N; \
  out.  X ## 1 = in.a.b ## N; \
  out.  X ## 2 = in.a.c ## N; \
  out.  X ## 3 = in.a.d ## N;

/// Set `out.X` bits 0, 1, 2, and 3 to bit N
/// of `in.b.a`, `in.b.b`, `in.b.b`, `in.b.c`, and `in.b.d`
/// @param X the sub4 of `out` to set
/// @param N the bit of each byte to retrieve
/// @see bitswap_type
#define SWAPSB(X,N) out.  X ## 0 = in.b.a ## N; \
  out.  X ## 1 = in.b.b ## N; \
  out.  X ## 2 = in.b.c ## N; \
  out.  X ## 3 = in.b.d ## N;

/// Set `out.X` bits to bit N of both `in.a` and `in.b`
/// in order
/// @param X the sub4 of `out` to set
/// @param N the bit of each byte to retrieve
/// @see bitswap_type
#define SWAPS(X,N) out.  X ## 0 = in.a.a ## N; \
  out.  X ## 1 = in.a.b ## N; \
  out.  X ## 2 = in.a.c ## N; \
  out.  X ## 3 = in.a.d ## N; \
  out.  X ## 4 = in.b.a ## N; \
  out.  X ## 5 = in.b.b ## N; \
  out.  X ## 6 = in.b.c ## N; \
  out.  X ## 7 = in.b.d ## N;


/// Do an 8-byte by 8-bit rotation
FASTLED_FORCE_INLINE void swapbits8(bitswap_type in, bitswap_type & out) {

  // SWAPS(a.a,7);
  // SWAPS(a.b,6);
  // SWAPS(a.c,5);
  // SWAPS(a.d,4);
  // SWAPS(b.a,3);
  // SWAPS(b.b,2);
  // SWAPS(b.c,1);
  // SWAPS(b.d,0);

  // SWAPSA(a.a,7);
  // SWAPSA(a.b,6);
  // SWAPSA(a.c,5);
  // SWAPSA(a.d,4);
  //
  // SWAPSB(a.a,7);
  // SWAPSB(a.b,6);
  // SWAPSB(a.c,5);
  // SWAPSB(a.d,4);
  //
  // SWAPSA(b.a,3);
  // SWAPSA(b.b,2);
  // SWAPSA(b.c,1);
  // SWAPSA(b.d,0);
  // //
  // SWAPSB(b.a,3);
  // SWAPSB(b.b,2);
  // SWAPSB(b.c,1);
  // SWAPSB(b.d,0);

  for(int i = 0; i < 8; ++i) {
    just8bits work;
    work.a3 = in.word[0] >> 31;
    work.a2 = in.word[0] >> 23;
    work.a1 = in.word[0] >> 15;
    work.a0 = in.word[0] >> 7;
    in.word[0] <<= 1;
    work.a7 = in.word[1] >> 31;
    work.a6 = in.word[1] >> 23;
    work.a5 = in.word[1] >> 15;
    work.a4 = in.word[1] >> 7;
    in.word[1] <<= 1;
    out.bytes[i] = work.raw;
  }
}

/// Slow version of the 8 byte by 8 bit rotation
FASTLED_FORCE_INLINE void slowswap(unsigned char *A, unsigned char *B) {

  for(int row = 0; row < 7; ++row) {
    fl::u8 x = A[row];

    fl::u8 bit = (1<<row);
    unsigned char *p = B;
    for(fl::u32 mask = 1<<7 ; mask ; mask >>= 1) {
      if(x & mask) {
        *p++ |= bit;
      } else {
        *p++ &= ~bit;
      }
    }
    // B[7] |= (x & 0x01) << row; x >>= 1;
    // B[6] |= (x & 0x01) << row; x >>= 1;
    // B[5] |= (x & 0x01) << row; x >>= 1;
    // B[4] |= (x & 0x01) << row; x >>= 1;
    // B[3] |= (x & 0x01) << row; x >>= 1;
    // B[2] |= (x & 0x01) << row; x >>= 1;
    // B[1] |= (x & 0x01) << row; x >>= 1;
    // B[0] |= (x & 0x01) << row; x >>= 1;
  }
}

/// Simplified form of bits rotating function.
/// This rotates data into LSB for a faster write (the code using this data can happily walk the array backwards).
/// Based on code found here: https://web.archive.org/web/20190108225554/http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt
void transpose8x1_noinline(unsigned char *A, unsigned char *B);

/// @copydoc transpose8x1_noinline()
FASTLED_FORCE_INLINE void transpose8x1(unsigned char *A, unsigned char *B) {
  fl::u32 x, y, t;

  // Load the array and pack it into x and y.
  y = *(unsigned int*)(A);
  x = *(unsigned int*)(A+4);

  // pre-transform x
  t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
  t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);

  // pre-transform y
  t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
  t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);

  // final transform
  t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
  y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
  x = t;

  *((uint32_t*)B) = y;
  *((uint32_t*)(B+4)) = x;
}

/// Simplified form of bits rotating function.
/// Based on code found here: https://web.archive.org/web/20190108225554/http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt
FASTLED_FORCE_INLINE void transpose8x1_MSB(unsigned char *A, unsigned char *B) {
  fl::u32 x, y, t;

  // Load the array and pack it into x and y.
  y = *(unsigned int*)(A);
  x = *(unsigned int*)(A+4);

  // pre-transform x
  t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
  t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);

  // pre-transform y
  t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
  t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);

  // final transform
  t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
  y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
  x = t;

  B[7] = y; y >>= 8;
  B[6] = y; y >>= 8;
  B[5] = y; y >>= 8;
  B[4] = y;

  B[3] = x; x >>= 8;
  B[2] = x; x >>= 8;
  B[1] = x; x >>= 8;
  B[0] = x; /* */
}

/// Templated bit-rotating function.
/// Based on code found here: https://web.archive.org/web/20190108225554/http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt
template<int m, int n>
FASTLED_FORCE_INLINE void transpose8(unsigned char *A, unsigned char *B) {
  fl::u32 x, y, t;

  // Load the array and pack it into x and y.
  if(m == 1) {
    y = *(unsigned int*)(A);
    x = *(unsigned int*)(A+4);
  } else {
    x = (A[0]<<24)   | (A[m]<<16)   | (A[2*m]<<8) | A[3*m];
    y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
  }

  // pre-transform x
  t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
  t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);

  // pre-transform y
  t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
  t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);

  // final transform
  t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
  y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
  x = t;

  B[7*n] = y; y >>= 8;
  B[6*n] = y; y >>= 8;
  B[5*n] = y; y >>= 8;
  B[4*n] = y;

  B[3*n] = x; x >>= 8;
  B[2*n] = x; x >>= 8;
  B[n] = x; x >>= 8;
  B[0] = x;
  // B[0]=x>>24;    B[n]=x>>16;    B[2*n]=x>>8;  B[3*n]=x>>0;
  // B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y>>0;
}

#endif

FASTLED_NAMESPACE_END

#endif