initial commit

2026-02-12 00:45:31 -08:00
commit 5f168f370b
3024 changed files with 804889 additions and 0 deletions
--- a/libraries/FastLED/src/lib8tion/config.h
+++ b/libraries/FastLED/src/lib8tion/config.h
@@ -0,0 +1,161 @@
+#pragma once
+
+
+
+#if defined(__arm__)
+
+#if defined(FASTLED_TEENSY3)
+// Can use Cortex M4 DSP instructions
+#define QADD8_C 0
+#define QADD7_C 0
+#define QADD8_ARM_DSP_ASM 1
+#define QADD7_ARM_DSP_ASM 1
+#else
+// Generic ARM
+#define QADD8_C 1
+#define QADD7_C 1
+#endif // end of defined(FASTLED_TEENSY3)
+
+#define QSUB8_C 1
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define ABS8_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define ADD8_C 1
+#define SUB8_C 1
+#define EASE8_C 1
+#define AVG8_C 1
+#define AVG8R_C 1
+#define AVG7_C 1
+#define AVG16_C 1
+#define AVG16R_C 1
+#define AVG15_C 1
+#define BLEND8_C 1
+
+// end of #if defined(__arm__)
+
+#elif defined(ARDUINO_ARCH_APOLLO3)
+
+// Default to using the standard C functions for now
+#define QADD8_C 1
+#define QADD7_C 1
+#define QSUB8_C 1
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define ABS8_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define ADD8_C 1
+#define SUB8_C 1
+#define EASE8_C 1
+#define AVG8_C 1
+#define AVG8R_C 1
+#define AVG7_C 1
+#define AVG16_C 1
+#define AVG16R_C 1
+#define AVG15_C 1
+#define BLEND8_C 1
+
+// end of #elif defined(ARDUINO_ARCH_APOLLO3)
+
+#elif defined(__AVR__)
+
+// AVR ATmega and friends Arduino
+
+#define QADD8_C 0
+#define QADD7_C 0
+#define QSUB8_C 0
+#define ABS8_C 0
+#define ADD8_C 0
+#define SUB8_C 0
+#define AVG8_C 0
+#define AVG8R_C 0
+#define AVG7_C 0
+#define AVG16_C 0
+#define AVG16R_C 0
+#define AVG15_C 0
+
+#define QADD8_AVRASM 1
+#define QADD7_AVRASM 1
+#define QSUB8_AVRASM 1
+#define ABS8_AVRASM 1
+#define ADD8_AVRASM 1
+#define SUB8_AVRASM 1
+#define AVG8_AVRASM 1
+#define AVG8R_AVRASM 1
+#define AVG7_AVRASM 1
+#define AVG16_AVRASM 1
+#define AVG16R_AVRASM 1
+#define AVG15_AVRASM 1
+
+// Note: these require hardware MUL instruction
+//       -- sorry, ATtiny!
+#if !defined(LIB8_ATTINY)
+#define SCALE8_C 0
+#define SCALE16BY8_C 0
+#define SCALE16_C 0
+#define MUL8_C 0
+#define QMUL8_C 0
+#define EASE8_C 0
+#define BLEND8_C 0
+#define SCALE8_AVRASM 1
+#define SCALE16BY8_AVRASM 1
+#define SCALE16_AVRASM 1
+#define MUL8_AVRASM 1
+#define QMUL8_AVRASM 1
+#define EASE8_AVRASM 1
+#define CLEANUP_R1_AVRASM 1
+#define BLEND8_AVRASM 1
+#else
+// On ATtiny, we just use C implementations
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define EASE8_C 1
+#define BLEND8_C 1
+#define SCALE8_AVRASM 0
+#define SCALE16BY8_AVRASM 0
+#define SCALE16_AVRASM 0
+#define MUL8_AVRASM 0
+#define QMUL8_AVRASM 0
+#define EASE8_AVRASM 0
+#define BLEND8_AVRASM 0
+#endif // end of !defined(LIB8_ATTINY)
+
+// end of #elif defined(__AVR__)
+
+#else
+
+// Doxygen: ignore these macros
+/// @cond
+
+// unspecified architecture, so
+// no ASM, everything in C
+#define QADD8_C 1
+#define QADD7_C 1
+#define QSUB8_C 1
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define ABS8_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define ADD8_C 1
+#define SUB8_C 1
+#define EASE8_C 1
+#define AVG8_C 1
+#define AVG8R_C 1
+#define AVG7_C 1
+#define AVG16_C 1
+#define AVG16R_C 1
+#define AVG15_C 1
+#define BLEND8_C 1
+
+/// @endcond
+
+#endif
--- a/libraries/FastLED/src/lib8tion/intmap.h
+++ b/libraries/FastLED/src/lib8tion/intmap.h
@@ -0,0 +1,66 @@
+/// @file intmap.h
+/// Defines integer mapping functions
+
+#pragma once
+
+#include "fl/namespace.h"
+#include "lib8static.h"
+#include "fl/stdint.h"
+
+FASTLED_NAMESPACE_BEGIN
+
+/// @addtogroup lib8tion
+/// @{
+
+/// @defgroup intmap Integer Mapping Functions
+/// Maps a scalar from one integer size to another.
+///
+/// For example, a value representing 40% as an 8-bit unsigned integer would be
+/// `102 / 255`. Using `map8_to_16(uint8_t)` to convert that to a 16-bit
+/// unsigned integer would give you `26,214 / 65,535`, exactly 40% through the
+/// larger range.
+///
+/// @{
+
+LIB8STATIC_ALWAYS_INLINE uint16_t map8_to_16(uint8_t x) {
+    return uint16_t(x) * 0x101;
+}
+
+LIB8STATIC_ALWAYS_INLINE uint32_t map16_to_32(uint16_t x) {
+    return uint32_t(x) * 0x10001;
+}
+
+// map16_to_8: map 16-bit values to 8-bit values
+//   This function maps 16-bit values to 8-bit values.
+LIB8STATIC_ALWAYS_INLINE uint8_t map16_to_8(uint16_t x) {
+    // Tested to be nearly identical to double precision floating point
+    // doing this operation.
+    if (x == 0) {
+        return 0;
+    }
+    if (x >= 0xff00) {
+        return 0xff;
+    }
+    return uint8_t((x + 128) >> 8);
+}
+
+LIB8STATIC_ALWAYS_INLINE uint16_t map32_to_16(uint32_t x) {
+    // Tested to be nearly identical to double precision floating point
+    // doing this operation.
+    if (x == 0) {
+        return 0;
+    }
+    if (x >= 0xffff0000) {
+        return 0xffff;
+    }
+    return uint16_t((x + 32768) >> 16);
+}
+
+LIB8STATIC_ALWAYS_INLINE uint32_t map8_to_32(uint8_t x) {
+    return uint32_t(x) * 0x1010101;
+}
+
+/// @} intmap
+/// @} lib8tion
+
+FASTLED_NAMESPACE_END
--- a/libraries/FastLED/src/lib8tion/lib8static.h
+++ b/libraries/FastLED/src/lib8tion/lib8static.h
@@ -0,0 +1,14 @@
+/// @file lib8static.h
+/// Defines static inlining macros for lib8tion functions
+
+#pragma once
+
+/// @addtogroup lib8tion
+/// @{
+
+/// Define a LIB8TION member function as static inline with an "unused" attribute
+#define LIB8STATIC __attribute__ ((unused)) static inline
+/// Define a LIB8TION member function as always static inline
+#define LIB8STATIC_ALWAYS_INLINE __attribute__ ((always_inline)) static inline
+
+/// @} lib8tion
--- a/libraries/FastLED/src/lib8tion/math8.h
+++ b/libraries/FastLED/src/lib8tion/math8.h
@@ -0,0 +1,701 @@
+#pragma once
+
+#include "lib8tion/config.h"
+#include "scale8.h"
+#include "lib8tion/lib8static.h"
+#include "intmap.h"
+#include "fl/namespace.h"
+
+#include "fl/compiler_control.h"
+
+
+FL_DISABLE_WARNING_PUSH
+FL_DISABLE_WARNING_UNUSED_PARAMETER
+FL_DISABLE_WARNING_RETURN_TYPE
+FL_DISABLE_WARNING_IMPLICIT_INT_CONVERSION
+
+FASTLED_NAMESPACE_BEGIN
+
+/// @file math8.h
+/// Fast, efficient 8-bit math functions specifically
+/// designed for high-performance LED programming.
+
+/// @ingroup lib8tion
+/// @{
+
+/// @defgroup Math Basic Math Operations
+/// Fast, efficient 8-bit math functions specifically
+/// designed for high-performance LED programming.
+///
+/// Because of the AVR (Arduino) and ARM assembly language
+/// implementations provided, using these functions often
+/// results in smaller and faster code than the equivalent
+/// program using plain "C" arithmetic and logic.
+/// @{
+
+/// Add one byte to another, saturating at 0xFF
+/// @param i first byte to add
+/// @param j second byte to add
+/// @returns the sum of i + j, capped at 0xFF
+LIB8STATIC_ALWAYS_INLINE uint8_t qadd8(uint8_t i, uint8_t j) {
+#if QADD8_C == 1
+    unsigned int t = i + j;
+    if (t > 255)
+        t = 255;
+    return static_cast<uint8_t>(t);
+#elif QADD8_AVRASM == 1
+    asm volatile(
+        /* First, add j to i, conditioning the C flag */
+        "add %0, %1    \n\t"
+
+        /* Now test the C flag.
+        If C is clear, we branch around a load of 0xFF into i.
+        If C is set, we go ahead and load 0xFF into i.
+        */
+        "brcc L_%=     \n\t"
+        "ldi %0, 0xFF  \n\t"
+        "L_%=: "
+        : "+d"(i) // r16-r31, restricted by ldi
+        : "r"(j));
+    return i;
+#elif QADD8_ARM_DSP_ASM == 1
+    asm volatile("uqadd8 %0, %0, %1" : "+r"(i) : "r"(j));
+    return i;
+#else
+#error "No implementation for qadd8 available."
+#endif
+}
+
+/// Add one byte to another, saturating at 0x7F and -0x80
+/// @param i first byte to add
+/// @param j second byte to add
+/// @returns the sum of i + j, capped at 0x7F and -0x80
+LIB8STATIC_ALWAYS_INLINE int8_t qadd7(int8_t i, int8_t j) {
+#if QADD7_C == 1
+    int16_t t = i + j;
+    if (t > 127)
+        t = 127;
+    else if (t < -128)
+        t = -128;
+    return static_cast<int8_t>(t);
+#elif QADD7_AVRASM == 1
+    asm volatile(
+        /* First, add j to i, conditioning the V and C flags */
+        "add %0, %1    \n\t"
+
+        /* Now test the V flag.
+        If V is clear, we branch to end.
+        If V is set, we go ahead and load 0x7F into i.
+        */
+        "brvc L_%=     \n\t"
+        "ldi %0, 0x7F  \n\t"
+
+        /* When both numbers are negative, C is set.
+        Adding it to make result negative. */
+        "adc %0, __zero_reg__\n\t"
+        "L_%=: "
+        : "+d"(i) // r16-r31, restricted by ldi
+        : "r"(j));
+    return i;
+#elif QADD7_ARM_DSP_ASM == 1
+    asm volatile("qadd8 %0, %0, %1" : "+r"(i) : "r"(j));
+    return i;
+#else
+#error "No implementation for qadd7 available."
+#endif
+}
+
+/// Subtract one byte from another, saturating at 0x00
+/// @param i byte to subtract from
+/// @param j byte to subtract
+/// @returns i - j with a floor of 0
+LIB8STATIC_ALWAYS_INLINE uint8_t qsub8(uint8_t i, uint8_t j) {
+#if QSUB8_C == 1
+    int t = i - j;
+    if (t < 0)
+        t = 0;
+    return static_cast<uint8_t>(t);
+#elif QSUB8_AVRASM == 1
+
+    asm volatile(
+        /* First, subtract j from i, conditioning the C flag */
+        "sub %0, %1    \n\t"
+
+        /* Now test the C flag.
+        If C is clear, we branch around a load of 0x00 into i.
+        If C is set, we go ahead and load 0x00 into i.
+        */
+        "brcc L_%=     \n\t"
+        "ldi %0, 0x00  \n\t"
+        "L_%=: "
+        : "+d"(i) // r16-r31, restricted by ldi
+        : "r"(j));
+    return i;
+#else
+#error "No implementation for qsub8 available."
+#endif
+}
+
+/// Add one byte to another, with 8-bit result
+/// @note This does not saturate and may overflow!
+/// @param i first byte to add
+/// @param j second byte to add
+/// @returns the sum of i + j, 8-bit
+LIB8STATIC_ALWAYS_INLINE uint8_t add8(uint8_t i, uint8_t j) {
+#if ADD8_C == 1
+    int t = i + j;
+    return static_cast<uint8_t>(t);
+#elif ADD8_AVRASM == 1
+    // Add j to i, period.
+    asm volatile("add %0, %1" : "+r"(i) : "r"(j));
+    return i;
+#else
+#error "No implementation for add8 available."
+#endif
+}
+
+/// Add one byte to two bytes, with 16-bit result
+/// @note This does not saturate and may overflow!
+/// @param i first value to add, 8-bit
+/// @param j second value to add, 16-bit
+/// @returns the sum of i + j, 16-bit
+LIB8STATIC_ALWAYS_INLINE uint16_t add8to16(uint8_t i, uint16_t j) {
+#if ADD8_C == 1
+    uint16_t t = i + j;
+    return t;
+#elif ADD8_AVRASM == 1
+    // Add i(one byte) to j(two bytes)
+    asm volatile("add %A[j], %[i]              \n\t"
+                 "adc %B[j], __zero_reg__      \n\t"
+                 : [j] "+r"(j)
+                 : [i] "r"(i));
+    return i;
+#else
+#error "No implementation for add8to16 available."
+#endif
+}
+
+/// Subtract one byte from another, 8-bit result
+/// @note This does not saturate and may overflow!
+/// @param i byte to subtract from
+/// @param j byte to subtract
+/// @returns i - j
+LIB8STATIC_ALWAYS_INLINE uint8_t sub8(uint8_t i, uint8_t j) {
+#if SUB8_C == 1
+    int t = i - j;
+    return static_cast<uint8_t>(t);
+#elif SUB8_AVRASM == 1
+    // Subtract j from i, period.
+    asm volatile("sub %0, %1" : "+r"(i) : "r"(j));
+    return i;
+#else
+#error "No implementation for sub8 available."
+#endif
+}
+
+/// Calculate an integer average of two unsigned
+/// 8-bit integer values (uint8_t), rounded down.
+/// Fractional results are rounded down, e.g. avg8(20,41) = 30
+/// @param i first value to average
+/// @param j second value to average
+/// @returns mean average of i and j, rounded down
+LIB8STATIC_ALWAYS_INLINE uint8_t avg8(uint8_t i, uint8_t j) {
+#if AVG8_C == 1
+    return (i + j) >> 1;
+#elif AVG8_AVRASM == 1
+    asm volatile(
+        /* First, add j to i, 9th bit overflows into C flag */
+        "add %0, %1    \n\t"
+        /* Divide by two, moving C flag into high 8th bit */
+        "ror %0        \n\t"
+        : "+r"(i)
+        : "r"(j));
+    return i;
+#else
+#error "No implementation for avg8 available."
+#endif
+}
+
+/// Calculate an integer average of two unsigned
+/// 16-bit integer values (uint16_t), rounded down.
+/// Fractional results are rounded down, e.g. avg16(20,41) = 30
+/// @param i first value to average
+/// @param j second value to average
+/// @returns mean average of i and j, rounded down
+LIB8STATIC_ALWAYS_INLINE uint16_t avg16(uint16_t i, uint16_t j) {
+#if AVG16_C == 1
+    // return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
+    uint32_t tmp = i;
+    tmp += j;
+    return static_cast<uint16_t>(tmp >> 1);
+#elif AVG16_AVRASM == 1
+    asm volatile(
+        /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
+        "add %A[i], %A[j]    \n\t"
+        /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
+        "adc %B[i], %B[j]    \n\t"
+        /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now
+           in
+           C */
+        "ror %B[i]        \n\t"
+        /* Divide iLo by two, moving C flag into high 8th bit */
+        "ror %A[i]        \n\t"
+        : [i] "+r"(i)
+        : [j] "r"(j));
+    return i;
+#else
+#error "No implementation for avg16 available."
+#endif
+}
+
+/// Calculate an integer average of two unsigned
+/// 8-bit integer values (uint8_t), rounded up.
+/// Fractional results are rounded up, e.g. avg8r(20,41) = 31
+/// @param i first value to average
+/// @param j second value to average
+/// @returns mean average of i and j, rounded up
+LIB8STATIC_ALWAYS_INLINE uint8_t avg8r(uint8_t i, uint8_t j) {
+#if AVG8R_C == 1
+    return (i + j + 1) >> 1;
+#elif AVG8R_AVRASM == 1
+    asm volatile(
+        /* First, add j to i, 9th bit overflows into C flag */
+        "add %0, %1          \n\t"
+        /* Divide by two, moving C flag into high 8th bit, old 1st bit now in C
+         */
+        "ror %0              \n\t"
+        /* Add C flag */
+        "adc %0, __zero_reg__\n\t"
+        : "+r"(i)
+        : "r"(j));
+    return i;
+#else
+#error "No implementation for avg8r available."
+#endif
+}
+
+/// Calculate an integer average of two unsigned
+/// 16-bit integer values (uint16_t), rounded up.
+/// Fractional results are rounded up, e.g. avg16r(20,41) = 31
+/// @param i first value to average
+/// @param j second value to average
+/// @returns mean average of i and j, rounded up
+LIB8STATIC_ALWAYS_INLINE uint16_t avg16r(uint16_t i, uint16_t j) {
+#if AVG16R_C == 1
+    // return (uint32_t)((uint32_t)(i) + (uint32_t)(j) + 1) >> 1;
+    uint32_t tmp = i;
+    tmp += j;
+    tmp += 1;
+    return static_cast<uint16_t>(tmp >> 1);
+#elif AVG16R_AVRASM == 1
+    asm volatile(
+        /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
+        "add %A[i], %A[j]    \n\t"
+        /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
+        "adc %B[i], %B[j]    \n\t"
+        /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now
+           in
+           C */
+        "ror %B[i]        \n\t"
+        /* Divide iLo by two, moving C flag into high 8th bit, old 1st bit now
+           in
+           C */
+        "ror %A[i]        \n\t"
+        /* Add C flag */
+        "adc %A[i], __zero_reg__\n\t"
+        "adc %B[i], __zero_reg__\n\t"
+        : [i] "+r"(i)
+        : [j] "r"(j));
+    return i;
+#else
+#error "No implementation for avg16r available."
+#endif
+}
+
+/// Calculate an integer average of two signed 7-bit
+/// integers (int8_t).
+/// If the first argument is even, result is rounded down.
+/// If the first argument is odd, result is rounded up.
+/// @param i first value to average
+/// @param j second value to average
+/// @returns mean average of i and j, rounded
+LIB8STATIC_ALWAYS_INLINE int8_t avg7(int8_t i, int8_t j) {
+#if AVG7_C == 1
+    return (i >> 1) + (j >> 1) + (i & 0x1);
+#elif AVG7_AVRASM == 1
+    asm volatile("asr %1        \n\t"
+                 "asr %0        \n\t"
+                 "adc %0, %1    \n\t"
+                 : "+r"(i)
+                 : "r"(j));
+    return i;
+#else
+#error "No implementation for avg7 available."
+#endif
+}
+
+/// Calculate an integer average of two signed 15-bit
+/// integers (int16_t).
+/// If the first argument is even, result is rounded down.
+/// If the first argument is odd, result is rounded up.
+/// @param i first value to average
+/// @param j second value to average
+/// @returns mean average of i and j, rounded
+LIB8STATIC_ALWAYS_INLINE int16_t avg15(int16_t i, int16_t j) {
+#if AVG15_C == 1
+    return (i >> 1) + (j >> 1) + (i & 0x1);
+#elif AVG15_AVRASM == 1
+    asm volatile(
+        /* first divide j by 2, throwing away lowest bit */
+        "asr %B[j]          \n\t"
+        "ror %A[j]          \n\t"
+        /* now divide i by 2, with lowest bit going into C */
+        "asr %B[i]          \n\t"
+        "ror %A[i]          \n\t"
+        /* add j + C to i */
+        "adc %A[i], %A[j]   \n\t"
+        "adc %B[i], %B[j]   \n\t"
+        : [i] "+r"(i)
+        : [j] "r"(j));
+    return i;
+#else
+#error "No implementation for avg15 available."
+#endif
+}
+
+/// Calculate the remainder of one unsigned 8-bit
+/// value divided by anoter, aka A % M.
+/// Implemented by repeated subtraction, which is
+/// very compact, and very fast if A is "probably"
+/// less than M.  If A is a large multiple of M,
+/// the loop has to execute multiple times.  However,
+/// even in that case, the loop is only two
+/// instructions long on AVR, i.e., quick.
+/// @param a dividend byte
+/// @param m divisor byte
+/// @returns remainder of a / m (i.e. a % m)
+LIB8STATIC_ALWAYS_INLINE uint8_t mod8(uint8_t a, uint8_t m) {
+#if defined(__AVR__)
+    asm volatile("L_%=:  sub %[a],%[m]    \n\t"
+                 "       brcc L_%=        \n\t"
+                 "       add %[a],%[m]    \n\t"
+                 : [a] "+r"(a)
+                 : [m] "r"(m));
+#else
+    while (a >= m)
+        a -= m;
+#endif
+    return a;
+}
+
+/// Add two numbers, and calculate the modulo
+/// of the sum and a third number, M.
+/// In other words, it returns (A+B) % M.
+/// It is designed as a compact mechanism for
+/// incrementing a "mode" switch and wrapping
+/// around back to "mode 0" when the switch
+/// goes past the end of the available range.
+/// e.g. if you have seven modes, this switches
+/// to the next one and wraps around if needed:
+///   @code{.cpp}
+///   mode = addmod8( mode, 1, 7);
+///   @endcode
+/// @param a dividend byte
+/// @param b value to add to the dividend
+/// @param m divisor byte
+/// @returns remainder of (a + b) / m
+/// @see mod8() for notes on performance.
+LIB8STATIC uint8_t addmod8(uint8_t a, uint8_t b, uint8_t m) {
+#if defined(__AVR__)
+    asm volatile("       add %[a],%[b]    \n\t"
+                 "L_%=:  sub %[a],%[m]    \n\t"
+                 "       brcc L_%=        \n\t"
+                 "       add %[a],%[m]    \n\t"
+                 : [a] "+r"(a)
+                 : [b] "r"(b), [m] "r"(m));
+#else
+    a += b;
+    while (a >= m)
+        a -= m;
+#endif
+    return a;
+}
+
+/// Subtract two numbers, and calculate the modulo
+/// of the difference and a third number, M.
+/// In other words, it returns (A-B) % M.
+/// It is designed as a compact mechanism for
+/// decrementing a "mode" switch and wrapping
+/// around back to "mode 0" when the switch
+/// goes past the start of the available range.
+/// e.g. if you have seven modes, this switches
+/// to the previous one and wraps around if needed:
+///   @code{.cpp}
+///   mode = submod8( mode, 1, 7);
+///   @endcode
+/// @param a dividend byte
+/// @param b value to subtract from the dividend
+/// @param m divisor byte
+/// @returns remainder of (a - b) / m
+/// @see mod8() for notes on performance.
+LIB8STATIC uint8_t submod8(uint8_t a, uint8_t b, uint8_t m) {
+#if defined(__AVR__)
+    asm volatile("       sub %[a],%[b]    \n\t"
+                 "L_%=:  sub %[a],%[m]    \n\t"
+                 "       brcc L_%=        \n\t"
+                 "       add %[a],%[m]    \n\t"
+                 : [a] "+r"(a)
+                 : [b] "r"(b), [m] "r"(m));
+#else
+    a -= b;
+    while (a >= m)
+        a -= m;
+#endif
+    return a;
+}
+
+/// 8x8 bit multiplication, with 8-bit result.
+/// @param i first byte to multiply
+/// @param j second byte to multiply
+/// @returns the product of i * j
+/// @note This does not saturate and may overflow!
+LIB8STATIC_ALWAYS_INLINE uint8_t mul8(uint8_t i, uint8_t j) {
+#if MUL8_C == 1
+    return ((int)i * (int)(j)) & 0xFF;
+#elif MUL8_AVRASM == 1
+    asm volatile(
+        /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
+        "mul %0, %1          \n\t"
+        /* Extract the LOW 8-bits (r0) */
+        "mov %0, r0          \n\t"
+        /* Restore r1 to "0"; it's expected to always be that */
+        "clr __zero_reg__    \n\t"
+        : "+r"(i)
+        : "r"(j)
+        : "r0", "r1");
+    return i;
+#else
+#error "No implementation for mul8 available."
+#endif
+}
+
+/// 8x8 bit multiplication with 8-bit result, saturating at 0xFF.
+/// @param i first byte to multiply
+/// @param j second byte to multiply
+/// @returns the product of i * j, capping at 0xFF
+LIB8STATIC_ALWAYS_INLINE uint8_t qmul8(uint8_t i, uint8_t j) {
+#if QMUL8_C == 1
+    unsigned p = (unsigned)i * (unsigned)j;
+    if (p > 255)
+        p = 255;
+    return p;
+#elif QMUL8_AVRASM == 1
+    asm volatile(
+        /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
+        "  mul %0, %1          \n\t"
+        /* Extract the LOW 8-bits (r0) */
+        "  mov %0, r0          \n\t"
+        /* If high byte of result is zero, all is well. */
+        "  tst r1              \n\t"
+        "  breq Lnospill_%=    \n\t"
+        /* If high byte of result > 0, saturate to 0xFF */
+        "  ldi %0, 0xFF         \n\t"
+        "Lnospill_%=:          \n\t"
+        /* Restore r1 to "0"; it's expected to always be that */
+        "  clr __zero_reg__    \n\t"
+        : "+d"(i) // r16-r31, restricted by ldi
+        : "r"(j)
+        : "r0", "r1");
+    return i;
+#else
+#error "No implementation for qmul8 available."
+#endif
+}
+
+/// Take the absolute value of a signed 8-bit uint8_t.
+LIB8STATIC_ALWAYS_INLINE int8_t abs8(int8_t i) {
+#if ABS8_C == 1
+    if (i < 0)
+        i = -i;
+    return i;
+#elif ABS8_AVRASM == 1
+    asm volatile(
+        /* First, check the high bit, and prepare to skip if it's clear */
+        "sbrc %0, 7 \n"
+
+        /* Negate the value */
+        "neg %0     \n"
+
+        : "+r"(i)
+        : "r"(i));
+    return i;
+#else
+#error "No implementation for abs8 available."
+#endif
+}
+
+/// Square root for 16-bit integers.
+/// About three times faster and five times smaller
+/// than Arduino's general `sqrt` on AVR.
+LIB8STATIC uint8_t sqrt16(uint16_t x) {
+    if (x <= 1) {
+        return x;
+    }
+
+    uint8_t low = 1; // lower bound
+    uint8_t hi, mid;
+
+    if (x > 7904) {
+        hi = 255;
+    } else {
+        hi = (x >> 5) + 8; // initial estimate for upper bound
+    }
+
+    do {
+        mid = (low + hi) >> 1;
+        if ((uint16_t)(mid * mid) > x) {
+            hi = mid - 1;
+        } else {
+            if (mid == 255) {
+                return 255;
+            }
+            low = mid + 1;
+        }
+    } while (hi >= low);
+
+    return low - 1;
+}
+
+LIB8STATIC_ALWAYS_INLINE uint8_t sqrt8(uint8_t x) {
+    return sqrt16(map8_to_16(x));
+}
+
+/// Blend a variable proportion (0-255) of one byte to another.
+/// @param a the starting byte value
+/// @param b the byte value to blend toward
+/// @param amountOfB the proportion (0-255) of b to blend
+/// @returns a byte value between a and b, inclusive
+#if (FASTLED_BLEND_FIXED == 1)
+LIB8STATIC uint8_t blend8(uint8_t a, uint8_t b, uint8_t amountOfB) {
+
+    // The BLEND_FIXED formula is
+    //
+    //   result = (  A*(amountOfA) + B*(amountOfB)              )/ 256
+    //
+    // …where amountOfA = 255-amountOfB.
+    //
+    // This formula will never return 255, which is why the BLEND_FIXED +
+    // SCALE8_FIXED version is
+    //
+    //   result = (  A*(amountOfA) + A + B*(amountOfB) + B      ) / 256
+    //
+    // We can rearrange this formula for some great optimisations.
+    //
+    //   result = (  A*(amountOfA) + A + B*(amountOfB) + B      ) / 256
+    //          = (  A*(255-amountOfB) + A + B*(amountOfB) + B  ) / 256
+    //          = (  A*(256-amountOfB) + B*(amountOfB) + B      ) / 256
+    //          = (  A*256 + B + B*(amountOfB) - A*(amountOfB)  ) / 256  // this
+    //          is the version used in SCALE8_FIXED AVR below = (  A*256 + B +
+    //          (B-A)*(amountOfB)              ) / 256  // this is the version
+    //          used in SCALE8_FIXED C below
+
+    uint16_t partial;
+    uint8_t result;
+
+#if BLEND8_C == 1
+
+#if (FASTLED_SCALE8_FIXED == 1)
+    partial = (a << 8) | b; // A*256 + B
+
+    // on many platforms this compiles to a single multiply of (B-A) * amountOfB
+    partial += (b * amountOfB);
+    partial -= (a * amountOfB);
+
+#else
+    uint8_t amountOfA = 255 - amountOfB;
+
+    // on the other hand, this compiles to two multiplies, and gives the "wrong"
+    // answer :]
+    partial = (a * amountOfA);
+    partial += (b * amountOfB);
+#endif
+
+    result = partial >> 8;
+
+    return result;
+
+#elif BLEND8_AVRASM == 1
+
+#if (FASTLED_SCALE8_FIXED == 1)
+
+    // 1 or 2 cycles depending on how the compiler optimises
+    partial = (a << 8) | b;
+
+    // 7 cycles
+    asm volatile("  mul %[a], %[amountOfB]        \n\t"
+                 "  sub %A[partial], r0           \n\t"
+                 "  sbc %B[partial], r1           \n\t"
+                 "  mul %[b], %[amountOfB]        \n\t"
+                 "  add %A[partial], r0           \n\t"
+                 "  adc %B[partial], r1           \n\t"
+                 "  clr __zero_reg__              \n\t"
+                 : [partial] "+r"(partial)
+                 : [amountOfB] "r"(amountOfB), [a] "r"(a), [b] "r"(b)
+                 : "r0", "r1");
+
+#else
+
+    // non-SCALE8-fixed version
+
+    // 7 cycles
+    asm volatile(
+        /* partial = b * amountOfB */
+        "  mul %[b], %[amountOfB]        \n\t"
+        "  movw %A[partial], r0          \n\t"
+
+        /* amountOfB (aka amountOfA) = 255 - amountOfB */
+        "  com %[amountOfB]              \n\t"
+
+        /* partial += a * amountOfB (aka amountOfA) */
+        "  mul %[a], %[amountOfB]        \n\t"
+
+        "  add %A[partial], r0           \n\t"
+        "  adc %B[partial], r1           \n\t"
+
+        "  clr __zero_reg__              \n\t"
+
+        : [partial] "=r"(partial), [amountOfB] "+r"(amountOfB)
+        : [a] "r"(a), [b] "r"(b)
+        : "r0", "r1");
+
+#endif
+
+    result = partial >> 8;
+
+    return result;
+
+#else
+#error "No implementation for blend8 available."
+#endif
+}
+
+#else
+LIB8STATIC uint8_t blend8(uint8_t a, uint8_t b, uint8_t amountOfB) {
+    // This version loses precision in the integer math
+    // and can actually return results outside of the range
+    // from a to b.  Its use is not recommended.
+    uint8_t result;
+    uint8_t amountOfA = 255 - amountOfB;
+    result = scale8_LEAVING_R1_DIRTY(a, amountOfA) +
+             scale8_LEAVING_R1_DIRTY(b, amountOfB);
+    cleanup_R1();
+    return result;
+}
+#endif
+
+/// @} Math
+/// @} lib8tion
+
+FASTLED_NAMESPACE_END
+
+FL_DISABLE_WARNING_POP
--- a/libraries/FastLED/src/lib8tion/memmove.h
+++ b/libraries/FastLED/src/lib8tion/memmove.h
@@ -0,0 +1,24 @@
+#pragma once
+
+///////////////////////////////////////////////////////////////////////
+///
+/// @defgroup FastMemory Fast Memory Functions for AVR
+/// Alternatives to memmove, memcpy, and memset that are
+/// faster on AVR than standard avr-libc 1.8. 
+/// @{
+
+#if defined(__AVR__) || defined(FASTLED_DOXYGEN)
+extern "C" {
+void * memmove8( void * dst, const void * src, uint16_t num );  ///< Faster alternative to memmove() on AVR
+void * memcpy8 ( void * dst, const void * src, uint16_t num )  __attribute__ ((noinline));  ///< Faster alternative to memcpy() on AVR
+void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinline)) ;  ///< Faster alternative to memset() on AVR
+}
+#else
+#include "fl/memfill.h"
+// on non-AVR platforms, these names just call standard libc.
+#define memmove8 memmove
+#define memcpy8 fl::memcopy
+#define memset8 fl::memfill
+#endif
+
+/// @} FastMemory
--- a/libraries/FastLED/src/lib8tion/qfx.h
+++ b/libraries/FastLED/src/lib8tion/qfx.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include "fl/stdint.h"
+#include "fl/namespace.h"
+
+FASTLED_NAMESPACE_BEGIN
+
+
+/// @addtogroup FractionalTypes
+/// @{
+
+/// Template class for representing fractional ints.
+/// @tparam T underlying type for data storage
+/// @tparam F number of fractional bits
+/// @tparam I number of integer bits
+template<class T, int F, int I> class qfx {
+    T i:I;  ///< Integer value of number
+    T f:F;  ///< Fractional value of number
+public:
+    /// Constructor, storing a float as a fractional int
+    qfx(float fx) { i = fx; f = (fx-i) * (1<<F); }
+    /// Constructor, storing a fractional int directly
+    qfx(uint8_t _i, uint8_t _f) {i=_i; f=_f; }
+
+    /// Multiply the fractional int by a value
+    uint32_t operator*(uint32_t v) { return (v*i) + ((v*f)>>F); }
+    /// @copydoc operator*(uint32_t)
+    uint16_t operator*(uint16_t v) { return (v*i) + ((v*f)>>F); }
+    /// @copydoc operator*(uint32_t)
+    int32_t operator*(int32_t v) { return (v*i) + ((v*f)>>F); }
+    /// @copydoc operator*(uint32_t)
+    int16_t operator*(int16_t v) { return (v*i) + ((v*f)>>F); }
+#if defined(FASTLED_ARM) | defined(FASTLED_RISCV) | defined(FASTLED_APOLLO3)
+    /// @copydoc operator*(uint32_t)
+    int operator*(int v) { return (v*i) + ((v*f)>>F); }
+#endif
+};
+
+template<class T, int F, int I> static uint32_t operator*(uint32_t v, qfx<T,F,I> & q) { return q * v; }
+template<class T, int F, int I> static uint16_t operator*(uint16_t v, qfx<T,F,I> & q) { return q * v; }
+template<class T, int F, int I> static int32_t operator*(int32_t v, qfx<T,F,I> & q) { return q * v; }
+template<class T, int F, int I> static int16_t operator*(int16_t v, qfx<T,F,I> & q) { return q * v; }
+#if defined(FASTLED_ARM) | defined(FASTLED_RISCV) | defined(FASTLED_APOLLO3)
+template<class T, int F, int I> static int operator*(int v, qfx<T,F,I> & q) { return q * v; }
+#endif
+
+/// A 4.4 integer (4 bits integer, 4 bits fraction)
+typedef qfx<uint8_t, 4,4> q44;
+/// A 6.2 integer (6 bits integer, 2 bits fraction)
+typedef qfx<uint8_t, 6,2> q62;
+/// A 8.8 integer (8 bits integer, 8 bits fraction)
+typedef qfx<uint16_t, 8,8> q88;
+/// A 12.4 integer (12 bits integer, 4 bits fraction)
+typedef qfx<uint16_t, 12,4> q124;
+
+/// @} FractionalTypes
+
+FASTLED_NAMESPACE_END
--- a/libraries/FastLED/src/lib8tion/random8.h
+++ b/libraries/FastLED/src/lib8tion/random8.h
@@ -0,0 +1,110 @@
+#pragma once
+
+#ifndef __INC_LIB8TION_RANDOM_H
+#define __INC_LIB8TION_RANDOM_H
+
+#include "fl/stdint.h"
+
+#include "lib8tion/lib8static.h"
+
+/// @file random8.h
+/// Fast, efficient random number generators specifically
+/// designed for high-performance LED programming.
+
+/// @ingroup lib8tion
+/// @{
+
+/// @defgroup Random Fast Random Number Generators
+/// Fast 8-bit and 16-bit unsigned random number generators.
+/// Significantly faster than Arduino random(), but
+/// also somewhat less random.  You can add entropy.
+///
+/// Pseudo-random number generation follows the form:
+///   @code
+///   X(n+1) = (2053 * X(n)) + 13849)
+///   @endcode
+/// @{
+
+/// Multiplier value for pseudo-random number generation
+#define FASTLED_RAND16_2053 ((uint16_t)(2053))
+/// Increment value for pseudo-random number generation
+#define FASTLED_RAND16_13849 ((uint16_t)(13849))
+
+#if defined(LIB8_ATTINY)
+/// Multiplies a value by the pseudo-random multiplier
+#define APPLY_FASTLED_RAND16_2053(x) (x << 11) + (x << 2) + x
+#else
+/// Multiplies a value by the pseudo-random multiplier
+#define APPLY_FASTLED_RAND16_2053(x) (x * FASTLED_RAND16_2053)
+#endif
+
+/// Seed for the random number generator functions
+extern uint16_t rand16seed; // = RAND16_SEED;
+
+/// Generate an 8-bit random number
+/// @returns random 8-bit number, in the range 0-255
+LIB8STATIC uint8_t random8() {
+    rand16seed = APPLY_FASTLED_RAND16_2053(rand16seed) + FASTLED_RAND16_13849;
+    // return the sum of the high and low bytes, for better
+    //  mixing and non-sequential correlation
+    return (uint8_t)(((uint8_t)(rand16seed & 0xFF)) +
+                     ((uint8_t)(rand16seed >> 8)));
+}
+
+/// Generate a 16-bit random number
+/// @returns random 16-bit number, in the range 0-65535
+LIB8STATIC uint16_t random16() {
+    rand16seed = APPLY_FASTLED_RAND16_2053(rand16seed) + FASTLED_RAND16_13849;
+    return rand16seed;
+}
+
+/// Generate an 8-bit random number between 0 and lim
+/// @param lim the upper bound for the result, exclusive
+LIB8STATIC uint8_t random8(uint8_t lim) {
+    uint8_t r = random8();
+    r = (r * lim) >> 8;
+    return r;
+}
+
+/// Generate an 8-bit random number in the given range
+/// @param min the lower bound for the random number, inclusive
+/// @param lim the upper bound for the random number, exclusive
+LIB8STATIC uint8_t random8(uint8_t min, uint8_t lim) {
+    uint8_t delta = lim - min;
+    uint8_t r = random8(delta) + min;
+    return r;
+}
+
+/// Generate an 16-bit random number between 0 and lim
+/// @param lim the upper bound for the result, exclusive
+LIB8STATIC uint16_t random16(uint16_t lim) {
+    uint16_t r = random16();
+    uint32_t p = (uint32_t)lim * (uint32_t)r;
+    r = p >> 16;
+    return r;
+}
+
+/// Generate an 16-bit random number in the given range
+/// @param min the lower bound for the random number, inclusive
+/// @param lim the upper bound for the random number, exclusive
+LIB8STATIC uint16_t random16(uint16_t min, uint16_t lim) {
+    uint16_t delta = lim - min;
+    uint16_t r = random16(delta) + min;
+    return r;
+}
+
+/// Set the 16-bit seed used for the random number generator
+LIB8STATIC void random16_set_seed(uint16_t seed) { rand16seed = seed; }
+
+/// Get the current seed value for the random number generator
+LIB8STATIC uint16_t random16_get_seed() { return rand16seed; }
+
+/// Add entropy into the random number generator
+LIB8STATIC void random16_add_entropy(uint16_t entropy) {
+    rand16seed += entropy;
+}
+
+/// @} Random
+/// @} lib8tion
+
+#endif
--- a/libraries/FastLED/src/lib8tion/scale8.h
+++ b/libraries/FastLED/src/lib8tion/scale8.h
@@ -0,0 +1,760 @@
+#include "fl/compiler_control.h"
+
+#pragma once
+
+#include "lib8tion/config.h"
+#include "crgb.h"
+#include "fl/namespace.h"
+#include "fastled_config.h"
+#include "lib8static.h"
+
+FL_DISABLE_WARNING_PUSH
+FL_DISABLE_WARNING_UNUSED_PARAMETER
+FL_DISABLE_WARNING_RETURN_TYPE
+FL_DISABLE_WARNING_IMPLICIT_INT_CONVERSION
+
+
+FASTLED_NAMESPACE_BEGIN
+
+/// @file scale8.h
+/// Fast, efficient 8-bit scaling functions specifically
+/// designed for high-performance LED programming.
+
+/// @addtogroup lib8tion
+/// @{
+
+/// @defgroup Scaling Scaling Functions
+/// Fast, efficient 8-bit scaling functions specifically
+/// designed for high-performance LED programming.
+///
+/// Because of the AVR(Arduino) and ARM assembly language
+/// implementations provided, using these functions often
+/// results in smaller and faster code than the equivalent
+/// program using plain "C" arithmetic and logic.
+/// @{
+
+/// Scale one byte by a second one, which is treated as
+/// the numerator of a fraction whose denominator is 256.
+///
+/// In other words, it computes i * (scale / 256)
+/// @param i input value to scale
+/// @param scale scale factor, in n/256 units
+/// @returns scaled value
+/// @note Takes 4 clocks on AVR with MUL, 2 clocks on ARM
+LIB8STATIC_ALWAYS_INLINE uint8_t scale8(uint8_t i, fract8 scale) {
+#if SCALE8_C == 1
+#if (FASTLED_SCALE8_FIXED == 1)
+    return (((uint16_t)i) * (1 + (uint16_t)(scale))) >> 8;
+#else
+    return ((uint16_t)i * (uint16_t)(scale)) >> 8;
+#endif
+#elif SCALE8_AVRASM == 1
+#if defined(LIB8_ATTINY)
+#if (FASTLED_SCALE8_FIXED == 1)
+    uint8_t work = i;
+#else
+    uint8_t work = 0;
+#endif
+    uint8_t cnt = 0x80;
+    asm volatile(
+#if (FASTLED_SCALE8_FIXED == 1)
+        "  inc %[scale]                 \n\t"
+        "  breq DONE_%=                 \n\t"
+        "  clr %[work]                  \n\t"
+#endif
+        "LOOP_%=:                       \n\t"
+        /*"  sbrc %[scale], 0             \n\t"
+        "  add %[work], %[i]            \n\t"
+        "  ror %[work]                  \n\t"
+        "  lsr %[scale]                 \n\t"
+        "  clc                          \n\t"*/
+        "  sbrc %[scale], 0             \n\t"
+        "  add %[work], %[i]            \n\t"
+        "  ror %[work]                  \n\t"
+        "  lsr %[scale]                 \n\t"
+        "  lsr %[cnt]                   \n\t"
+        "brcc LOOP_%=                   \n\t"
+        "DONE_%=:                       \n\t"
+        : [work] "+r"(work), [cnt] "+r"(cnt)
+        : [scale] "r"(scale), [i] "r"(i)
+        :);
+    return work;
+#else
+    asm volatile(
+#if (FASTLED_SCALE8_FIXED == 1)
+        // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
+        "mul %0, %1          \n\t"
+        // Add i to r0, possibly setting the carry flag
+        "add r0, %0         \n\t"
+        // load the immediate 0 into i (note, this does _not_ touch any flags)
+        "ldi %0, 0x00       \n\t"
+        // walk and chew gum at the same time
+        "adc %0, r1          \n\t"
+#else
+        /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+        "mul %0, %1          \n\t"
+        /* Move the high 8-bits of the product (r1) back to i */
+        "mov %0, r1          \n\t"
+    /* Restore r1 to "0"; it's expected to always be that */
+#endif
+        "clr __zero_reg__    \n\t"
+
+        : "+d"(i)    /* writes to i; r16-r31, restricted by ldi */
+        : "r"(scale) /* uses scale */
+        : "r0", "r1" /* clobbers r0, r1 */
+    );
+    /* Return the result */
+    return i;
+#endif
+#else
+#error "No implementation for scale8 available."
+#endif
+}
+
+constexpr uint8_t scale8_constexpr(uint8_t i, fract8 scale) {
+    return (((uint16_t)i) * (1 + (uint16_t)(scale))) >> 8;
+}
+
+/// The "video" version of scale8() guarantees that the output will
+/// be only be zero if one or both of the inputs are zero.
+/// If both inputs are non-zero, the output is guaranteed to be non-zero.
+/// This makes for better "video"/LED dimming, at the cost of
+/// several additional cycles.
+/// @param i input value to scale
+/// @param scale scale factor, in n/256 units
+/// @returns scaled value
+/// @see scale8()
+LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video(uint8_t i, fract8 scale) {
+#if SCALE8_C == 1 || defined(LIB8_ATTINY)
+    uint8_t j = (((int)i * (int)scale) >> 8) + ((i && scale) ? 1 : 0);
+    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+    // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) +
+    // nonzeroscale;
+    return j;
+#elif SCALE8_AVRASM == 1
+    uint8_t j = 0;
+    asm volatile("  tst %[i]\n\t"
+                 "  breq L_%=\n\t"
+                 "  mul %[i], %[scale]\n\t"
+                 "  mov %[j], r1\n\t"
+                 "  clr __zero_reg__\n\t"
+                 "  cpse %[scale], r1\n\t"
+                 "  subi %[j], 0xFF\n\t"
+                 "L_%=: \n\t"
+                 : [j] "+d"(j) // r16-r31, restricted by subi
+                 : [i] "r"(i), [scale] "r"(scale)
+                 : "r0", "r1");
+    return j;
+    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+    // asm volatile(
+    //      "      tst %0           \n"
+    //      "      breq L_%=        \n"
+    //      "      mul %0, %1       \n"
+    //      "      mov %0, r1       \n"
+    //      "      add %0, %2       \n"
+    //      "      clr __zero_reg__ \n"
+    //      "L_%=:                  \n"
+    //      : "+a" (i)
+    //      : "a" (scale), "a" (nonzeroscale)
+    //      : "r0", "r1");
+    // // Return the result
+    // return i;
+#else
+#error "No implementation for scale8_video available."
+#endif
+}
+
+/// @defgroup ScalingDirty Scaling Functions that Leave R1 Dirty
+/// These functions are more efficient for scaling multiple
+/// bytes at once, but require calling cleanup_R1() afterwards.
+/// @{
+
+/// This version of scale8() does not clean up the R1 register on AVR.
+/// If you are doing several "scale8()'s" in a row, use this, and
+/// then explicitly call cleanup_R1().
+/// @warning You **MUST** call cleanup_R1() after using this function!
+/// @param i input value to scale
+/// @param scale scale factor, in n/256 units
+/// @returns scaled value
+/// @see scale8()
+LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY(uint8_t i,
+                                                         fract8 scale) {
+#if SCALE8_C == 1
+#if (FASTLED_SCALE8_FIXED == 1)
+    return (((uint16_t)i) * ((uint16_t)(scale) + 1)) >> 8;
+#else
+    return ((int)i * (int)(scale)) >> 8;
+#endif
+#elif SCALE8_AVRASM == 1
+    asm volatile(
+#if (FASTLED_SCALE8_FIXED == 1)
+        // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
+        "mul %0, %1          \n\t"
+        // Add i to r0, possibly setting the carry flag
+        "add r0, %0         \n\t"
+        // load the immediate 0 into i (note, this does _not_ touch any flags)
+        "ldi %0, 0x00       \n\t"
+        // walk and chew gum at the same time
+        "adc %0, r1          \n\t"
+#else
+        /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+        "mul %0, %1    \n\t"
+        /* Move the high 8-bits of the product (r1) back to i */
+        "mov %0, r1    \n\t"
+#endif
+        /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF  */
+        /* "clr __zero_reg__    \n\t" */
+        : "+d"(i)    /* writes to i; r16-r31, restricted by ldi */
+        : "r"(scale) /* uses scale */
+        : "r0", "r1" /* clobbers r0, r1 */
+    );
+    // Return the result
+    return i;
+#else
+#error "No implementation for scale8_LEAVING_R1_DIRTY available."
+#endif
+}
+
+/// In place modifying version of scale8() that does not clean up the R1
+/// register on AVR. If you are doing several "scale8()'s" in a row, use this,
+/// and then explicitly call cleanup_R1().
+/// @warning You **MUST** call cleanup_R1() after using this function!
+/// @par
+/// @warning This function always modifies its arguments in place!
+/// @param i input value to scale
+/// @param scale scale factor, in n/256 units
+/// @see scale8()
+LIB8STATIC_ALWAYS_INLINE void nscale8_LEAVING_R1_DIRTY(uint8_t &i,
+                                                       fract8 scale) {
+#if SCALE8_C == 1
+#if (FASTLED_SCALE8_FIXED == 1)
+    i = (((uint16_t)i) * ((uint16_t)(scale) + 1)) >> 8;
+#else
+    i = ((int)i * (int)(scale)) >> 8;
+#endif
+#elif SCALE8_AVRASM == 1
+    asm volatile(
+#if (FASTLED_SCALE8_FIXED == 1)
+        // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
+        "mul %0, %1          \n\t"
+        // Add i to r0, possibly setting the carry flag
+        "add r0, %0         \n\t"
+        // load the immediate 0 into i (note, this does _not_ touch any flags)
+        "ldi %0, 0x00       \n\t"
+        // walk and chew gum at the same time
+        "adc %0, r1          \n\t"
+#else
+        /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+        "mul %0, %1    \n\t"
+        /* Move the high 8-bits of the product (r1) back to i */
+        "mov %0, r1    \n\t"
+#endif
+        /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
+        /* "clr __zero_reg__    \n\t" */
+
+        : "+d"(i)    /* writes to i; r16-r31, restricted by ldi */
+        : "r"(scale) /* uses scale */
+        : "r0", "r1" /* clobbers r0, r1 */
+    );
+#else
+#error "No implementation for nscale8_LEAVING_R1_DIRTY available."
+#endif
+}
+
+/// This version of scale8_video() does not clean up the R1 register on AVR.
+/// If you are doing several "scale8_video()'s" in a row, use this, and
+/// then explicitly call cleanup_R1().
+/// @warning You **MUST** call cleanup_R1() after using this function!
+/// @param i input value to scale
+/// @param scale scale factor, in n/256 units
+/// @returns scaled value
+/// @see scale8_video()
+LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY(uint8_t i,
+                                                               fract8 scale) {
+#if SCALE8_C == 1 || defined(LIB8_ATTINY)
+    uint8_t j = (((int)i * (int)scale) >> 8) + ((i && scale) ? 1 : 0);
+    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+    // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) +
+    // nonzeroscale;
+    return j;
+#elif SCALE8_AVRASM == 1
+    uint8_t j = 0;
+    asm volatile("  tst %[i]\n\t"
+                 "  breq L_%=\n\t"
+                 "  mul %[i], %[scale]\n\t"
+                 "  mov %[j], r1\n\t"
+                 "  breq L_%=\n\t"
+                 "  subi %[j], 0xFF\n\t"
+                 "L_%=: \n\t"
+                 : [j] "+d"(j) // r16-r31, restricted by subi
+                 : [i] "r"(i), [scale] "r"(scale)
+                 : "r0", "r1");
+    return j;
+    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+    // asm volatile(
+    //      "      tst %0           \n"
+    //      "      breq L_%=        \n"
+    //      "      mul %0, %1       \n"
+    //      "      mov %0, r1       \n"
+    //      "      add %0, %2       \n"
+    //      "      clr __zero_reg__ \n"
+    //      "L_%=:                  \n"
+    //      : "+a" (i)
+    //      : "a" (scale), "a" (nonzeroscale)
+    //      : "r0", "r1");
+    // // Return the result
+    // return i;
+#else
+#error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
+#endif
+}
+
+/// In place modifying version of scale8_video() that does not clean up the R1
+/// register on AVR. If you are doing several "scale8_video()'s" in a row, use
+/// this, and then explicitly call cleanup_R1().
+/// @warning You **MUST** call cleanup_R1() after using this function!
+/// @par
+/// @warning This function always modifies its arguments in place!
+/// @param i input value to scale
+/// @param scale scale factor, in n/256 units
+/// @see scale8_video()
+LIB8STATIC_ALWAYS_INLINE void nscale8_video_LEAVING_R1_DIRTY(uint8_t &i,
+                                                             fract8 scale) {
+#if SCALE8_C == 1 || defined(LIB8_ATTINY)
+    i = (((int)i * (int)scale) >> 8) + ((i && scale) ? 1 : 0);
+#elif SCALE8_AVRASM == 1
+    asm volatile("  tst %[i]\n\t"
+                 "  breq L_%=\n\t"
+                 "  mul %[i], %[scale]\n\t"
+                 "  mov %[i], r1\n\t"
+                 "  breq L_%=\n\t"
+                 "  subi %[i], 0xFF\n\t"
+                 "L_%=: \n\t"
+                 : [i] "+d"(i) // r16-r31, restricted by subi
+                 : [scale] "r"(scale)
+                 : "r0", "r1");
+#else
+#error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
+#endif
+}
+
+/// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls
+/// @ingroup ScalingDirty
+LIB8STATIC_ALWAYS_INLINE void cleanup_R1() {
+#if CLEANUP_R1_AVRASM == 1
+    // Restore r1 to "0"; it's expected to always be that
+    asm volatile("clr __zero_reg__  \n\t" : : : "r1");
+#endif
+}
+
+constexpr CRGB nscale8x3_constexpr(uint8_t r, uint8_t g, uint8_t b, fract8 scale) {
+    return CRGB(((int)r * (int)(scale)) >> 8, ((int)g * (int)(scale)) >> 8,
+                ((int)b * (int)(scale)) >> 8);
+}
+
+/// @} ScalingDirty
+
+/// Scale three one-byte values by a fourth one, which is treated as
+/// the numerator of a fraction whose demominator is 256.
+///
+/// In other words, it computes r,g,b * (scale / 256)
+///
+/// @warning This function always modifies its arguments in place!
+/// @param r first value to scale
+/// @param g second value to scale
+/// @param b third value to scale
+/// @param scale scale factor, in n/256 units
+LIB8STATIC void nscale8x3(uint8_t &r, uint8_t &g, uint8_t &b, fract8 scale) {
+#if SCALE8_C == 1
+#if (FASTLED_SCALE8_FIXED == 1)
+    uint16_t scale_fixed = scale + 1;
+    r = (((uint16_t)r) * scale_fixed) >> 8;
+    g = (((uint16_t)g) * scale_fixed) >> 8;
+    b = (((uint16_t)b) * scale_fixed) >> 8;
+#else
+    r = ((int)r * (int)(scale)) >> 8;
+    g = ((int)g * (int)(scale)) >> 8;
+    b = ((int)b * (int)(scale)) >> 8;
+#endif
+#elif SCALE8_AVRASM == 1
+    r = scale8_LEAVING_R1_DIRTY(r, scale);
+    g = scale8_LEAVING_R1_DIRTY(g, scale);
+    b = scale8_LEAVING_R1_DIRTY(b, scale);
+    cleanup_R1();
+#else
+#error "No implementation for nscale8x3 available."
+#endif
+}
+
+/// Scale three one-byte values by a fourth one, which is treated as
+/// the numerator of a fraction whose demominator is 256.
+///
+/// In other words, it computes r,g,b * (scale / 256), ensuring
+/// that non-zero values passed in remain non-zero, no matter how low the scale
+/// argument.
+///
+/// @warning This function always modifies its arguments in place!
+/// @param r first value to scale
+/// @param g second value to scale
+/// @param b third value to scale
+/// @param scale scale factor, in n/256 units
+LIB8STATIC void nscale8x3_video(uint8_t &r, uint8_t &g, uint8_t &b,
+                                fract8 scale) {
+#if SCALE8_C == 1
+    uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+    r = (r == 0) ? 0 : (((int)r * (int)(scale)) >> 8) + nonzeroscale;
+    g = (g == 0) ? 0 : (((int)g * (int)(scale)) >> 8) + nonzeroscale;
+    b = (b == 0) ? 0 : (((int)b * (int)(scale)) >> 8) + nonzeroscale;
+#elif SCALE8_AVRASM == 1
+    nscale8_video_LEAVING_R1_DIRTY(r, scale);
+    nscale8_video_LEAVING_R1_DIRTY(g, scale);
+    nscale8_video_LEAVING_R1_DIRTY(b, scale);
+    cleanup_R1();
+#else
+#error "No implementation for nscale8x3 available."
+#endif
+}
+
+/// Scale two one-byte values by a third one, which is treated as
+/// the numerator of a fraction whose demominator is 256.
+///
+/// In other words, it computes i,j * (scale / 256).
+///
+/// @warning This function always modifies its arguments in place!
+/// @param i first value to scale
+/// @param j second value to scale
+/// @param scale scale factor, in n/256 units
+LIB8STATIC void nscale8x2(uint8_t &i, uint8_t &j, fract8 scale) {
+#if SCALE8_C == 1
+#if FASTLED_SCALE8_FIXED == 1
+    uint16_t scale_fixed = scale + 1;
+    i = (((uint16_t)i) * scale_fixed) >> 8;
+    j = (((uint16_t)j) * scale_fixed) >> 8;
+#else
+    i = ((uint16_t)i * (uint16_t)(scale)) >> 8;
+    j = ((uint16_t)j * (uint16_t)(scale)) >> 8;
+#endif
+#elif SCALE8_AVRASM == 1
+    i = scale8_LEAVING_R1_DIRTY(i, scale);
+    j = scale8_LEAVING_R1_DIRTY(j, scale);
+    cleanup_R1();
+#else
+#error "No implementation for nscale8x2 available."
+#endif
+}
+
+/// Scale two one-byte values by a third one, which is treated as
+/// the numerator of a fraction whose demominator is 256.
+///
+/// In other words, it computes i,j * (scale / 256), ensuring
+/// that non-zero values passed in remain non zero, no matter how low the scale
+/// argument.
+///
+/// @warning This function always modifies its arguments in place!
+/// @param i first value to scale
+/// @param j second value to scale
+/// @param scale scale factor, in n/256 units
+LIB8STATIC void nscale8x2_video(uint8_t &i, uint8_t &j, fract8 scale) {
+#if SCALE8_C == 1
+    uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
+    i = (i == 0) ? 0 : (((int)i * (int)(scale)) >> 8) + nonzeroscale;
+    j = (j == 0) ? 0 : (((int)j * (int)(scale)) >> 8) + nonzeroscale;
+#elif SCALE8_AVRASM == 1
+    nscale8_video_LEAVING_R1_DIRTY(i, scale);
+    nscale8_video_LEAVING_R1_DIRTY(j, scale);
+    cleanup_R1();
+#else
+#error "No implementation for nscale8x2 available."
+#endif
+}
+
+/// Scale a 16-bit unsigned value by an 8-bit value, which is treated
+/// as the numerator of a fraction whose denominator is 256.
+///
+/// In other words, it computes i * (scale / 256)
+/// @param i input value to scale
+/// @param scale scale factor, in n/256 units
+/// @returns scaled value
+LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8(uint16_t i, fract8 scale) {
+    if (scale == 0) {
+        return 0; // Fixes non zero output when scale == 0 and
+                  // FASTLED_SCALE8_FIXED==1
+    }
+#if SCALE16BY8_C == 1
+    uint16_t result;
+#if FASTLED_SCALE8_FIXED == 1
+    result = (((uint32_t)(i) * (1 + ((uint32_t)scale))) >> 8);
+#else
+    result = (i * scale) / 256;
+#endif
+    return result;
+#elif SCALE16BY8_AVRASM == 1
+#if FASTLED_SCALE8_FIXED == 1
+    uint16_t result = 0;
+    asm volatile(
+        // result.A = HighByte( (i.A x scale) + i.A )
+        "  mul %A[i], %[scale]                 \n\t"
+        "  add r0, %A[i]                       \n\t"
+        //   "  adc r1, [zero]                      \n\t"
+        //   "  mov %A[result], r1                  \n\t"
+        "  adc %A[result], r1                  \n\t"
+
+        // result.A-B += i.B x scale
+        "  mul %B[i], %[scale]                 \n\t"
+        "  add %A[result], r0                  \n\t"
+        "  adc %B[result], r1                  \n\t"
+
+        // cleanup r1
+        "  clr __zero_reg__                    \n\t"
+
+        // result.A-B += i.B
+        "  add %A[result], %B[i]               \n\t"
+        "  adc %B[result], __zero_reg__        \n\t"
+
+        : [result] "+r"(result)
+        : [i] "r"(i), [scale] "r"(scale)
+        : "r0", "r1");
+    return result;
+#else
+    uint16_t result = 0;
+    asm volatile(
+        // result.A = HighByte(i.A x j )
+        "  mul %A[i], %[scale]                 \n\t"
+        "  mov %A[result], r1                  \n\t"
+        //"  clr %B[result]                      \n\t"
+
+        // result.A-B += i.B x j
+        "  mul %B[i], %[scale]                 \n\t"
+        "  add %A[result], r0                  \n\t"
+        "  adc %B[result], r1                  \n\t"
+
+        // cleanup r1
+        "  clr __zero_reg__                    \n\t"
+
+        : [result] "+r"(result)
+        : [i] "r"(i), [scale] "r"(scale)
+        : "r0", "r1");
+    return result;
+#endif
+#else
+#error "No implementation for scale16by8 available."
+#endif
+}
+
+
+/// Scale a 16-bit unsigned value by an 16-bit value, which is treated
+/// as the numerator of a fraction whose denominator is 65536.
+/// In other words, it computes i * (scale / 65536)
+/// @param i input value to scale
+/// @param scale scale factor, in n/65536 units
+/// @returns scaled value
+LIB8STATIC uint16_t scale16(uint16_t i, fract16 scale) {
+#if SCALE16_C == 1
+    uint16_t result;
+#if FASTLED_SCALE8_FIXED == 1
+    result = ((uint32_t)(i) * (1 + (uint32_t)(scale))) / 65536;
+#else
+    result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
+#endif
+    return result;
+#elif SCALE16_AVRASM == 1
+#if FASTLED_SCALE8_FIXED == 1
+    // implemented sort of like
+    //   result = ((i * scale) + i ) / 65536
+    //
+    // why not like this, you may ask?
+    //   result = (i * (scale+1)) / 65536
+    // the answer is that if scale is 65535, then scale+1
+    // will be zero, which is not what we want.
+    uint32_t result;
+    asm volatile(
+        // result.A-B  = i.A x scale.A
+        "  mul %A[i], %A[scale]                 \n\t"
+        //  save results...
+        // basic idea:
+        //"  mov %A[result], r0                 \n\t"
+        //"  mov %B[result], r1                 \n\t"
+        // which can be written as...
+        "  movw %A[result], r0                   \n\t"
+        // Because we're going to add i.A-B to
+        // result.A-D, we DO need to keep both
+        // the r0 and r1 portions of the product
+        // UNlike in the 'unfixed scale8' version.
+        // So the movw here is needed.
+        : [result] "=r"(result)
+        : [i] "r"(i), [scale] "r"(scale)
+        : "r0", "r1");
+
+    asm volatile(
+        // result.C-D  = i.B x scale.B
+        "  mul %B[i], %B[scale]                 \n\t"
+        //"  mov %C[result], r0                 \n\t"
+        //"  mov %D[result], r1                 \n\t"
+        "  movw %C[result], r0                   \n\t"
+        : [result] "+r"(result)
+        : [i] "r"(i), [scale] "r"(scale)
+        : "r0", "r1");
+
+    const uint8_t zero = 0;
+    asm volatile(
+        // result.B-D += i.B x scale.A
+        "  mul %B[i], %A[scale]                 \n\t"
+
+        "  add %B[result], r0                   \n\t"
+        "  adc %C[result], r1                   \n\t"
+        "  adc %D[result], %[zero]              \n\t"
+
+        // result.B-D += i.A x scale.B
+        "  mul %A[i], %B[scale]                 \n\t"
+
+        "  add %B[result], r0                   \n\t"
+        "  adc %C[result], r1                   \n\t"
+        "  adc %D[result], %[zero]              \n\t"
+
+        // cleanup r1
+        "  clr r1                               \n\t"
+
+        : [result] "+r"(result)
+        : [i] "r"(i), [scale] "r"(scale), [zero] "r"(zero)
+        : "r0", "r1");
+
+    asm volatile(
+        // result.A-D += i.A-B
+        "  add %A[result], %A[i]                \n\t"
+        "  adc %B[result], %B[i]                \n\t"
+        "  adc %C[result], %[zero]              \n\t"
+        "  adc %D[result], %[zero]              \n\t"
+        : [result] "+r"(result)
+        : [i] "r"(i), [zero] "r"(zero));
+
+    result = result >> 16;
+    return result;
+#else
+    uint32_t result;
+    asm volatile(
+        // result.A-B  = i.A x scale.A
+        "  mul %A[i], %A[scale]                 \n\t"
+        //  save results...
+        // basic idea:
+        //"  mov %A[result], r0                 \n\t"
+        //"  mov %B[result], r1                 \n\t"
+        // which can be written as...
+        "  movw %A[result], r0                   \n\t"
+        // We actually don't need to do anything with r0,
+        // as result.A is never used again here, so we
+        // could just move the high byte, but movw is
+        // one clock cycle, just like mov, so might as
+        // well, in case we want to use this code for
+        // a generic 16x16 multiply somewhere.
+
+        : [result] "=r"(result)
+        : [i] "r"(i), [scale] "r"(scale)
+        : "r0", "r1");
+
+    asm volatile(
+        // result.C-D  = i.B x scale.B
+        "  mul %B[i], %B[scale]                 \n\t"
+        //"  mov %C[result], r0                 \n\t"
+        //"  mov %D[result], r1                 \n\t"
+        "  movw %C[result], r0                   \n\t"
+        : [result] "+r"(result)
+        : [i] "r"(i), [scale] "r"(scale)
+        : "r0", "r1");
+
+    const uint8_t zero = 0;
+    asm volatile(
+        // result.B-D += i.B x scale.A
+        "  mul %B[i], %A[scale]                 \n\t"
+
+        "  add %B[result], r0                   \n\t"
+        "  adc %C[result], r1                   \n\t"
+        "  adc %D[result], %[zero]              \n\t"
+
+        // result.B-D += i.A x scale.B
+        "  mul %A[i], %B[scale]                 \n\t"
+
+        "  add %B[result], r0                   \n\t"
+        "  adc %C[result], r1                   \n\t"
+        "  adc %D[result], %[zero]              \n\t"
+
+        // cleanup r1
+        "  clr r1                               \n\t"
+
+        : [result] "+r"(result)
+        : [i] "r"(i), [scale] "r"(scale), [zero] "r"(zero)
+        : "r0", "r1");
+
+    result = result >> 16;
+    return result;
+#endif
+#else
+#error "No implementation for scale16 available."
+#endif
+}
+/// @} Scaling
+
+/// @defgroup Dimming Dimming and Brightening Functions
+/// Functions to dim or brighten data.
+///
+/// The eye does not respond in a linear way to light.
+/// High speed PWM'd LEDs at 50% duty cycle appear far
+/// brighter then the "half as bright" you might expect.
+///
+/// If you want your midpoint brightness LEDs (128) to
+/// appear half as bright as "full" brightness (255), you
+/// have to apply a "dimming function".
+///
+/// @note These are approximations of gamma correction with
+///       a gamma value of 2.0.
+/// @see @ref GammaFuncs
+/// @{
+
+/// Adjust a scaling value for dimming.
+/// @see scale8()
+LIB8STATIC uint8_t dim8_raw(uint8_t x) { return scale8(x, x); }
+
+/// Adjust a scaling value for dimming for video (value will never go below 1)
+/// @see scale8_video()
+LIB8STATIC uint8_t dim8_video(uint8_t x) { return scale8_video(x, x); }
+
+/// Linear version of the dimming function that halves for values < 128
+LIB8STATIC uint8_t dim8_lin(uint8_t x) {
+    if (x & 0x80) {
+        x = scale8(x, x);
+    } else {
+        x += 1;
+        x /= 2;
+    }
+    return x;
+}
+
+/// Brighten a value (inverse of dim8_raw())
+LIB8STATIC uint8_t brighten8_raw(uint8_t x) {
+    uint8_t ix = 255 - x;
+    return 255 - scale8(ix, ix);
+}
+
+/// Brighten a value (inverse of dim8_video())
+LIB8STATIC uint8_t brighten8_video(uint8_t x) {
+    uint8_t ix = 255 - x;
+    return 255 - scale8_video(ix, ix);
+}
+
+/// Brighten a value (inverse of dim8_lin())
+LIB8STATIC uint8_t brighten8_lin(uint8_t x) {
+    uint8_t ix = 255 - x;
+    if (ix & 0x80) {
+        ix = scale8(ix, ix);
+    } else {
+        ix += 1;
+        ix /= 2;
+    }
+    return 255 - ix;
+}
+
+/// @} Dimming
+/// @} lib8tion
+
+FASTLED_NAMESPACE_END
+
+#pragma GCC diagnostic pop
--- a/libraries/FastLED/src/lib8tion/trig8.h
+++ b/libraries/FastLED/src/lib8tion/trig8.h
@@ -0,0 +1,287 @@
+#pragma once
+
+#ifndef __INC_LIB8TION_TRIG_H
+#define __INC_LIB8TION_TRIG_H
+
+#include "fl/stdint.h"
+#include "lib8tion/lib8static.h"
+
+#include "fl/compiler_control.h"
+
+FL_DISABLE_WARNING_PUSH
+FL_DISABLE_WARNING_UNUSED_PARAMETER
+FL_DISABLE_WARNING_RETURN_TYPE
+FL_DISABLE_WARNING_IMPLICIT_INT_CONVERSION
+FL_DISABLE_WARNING_FLOAT_CONVERSION
+FL_DISABLE_WARNING_SIGN_CONVERSION
+
+/// @file trig8.h
+/// Fast, efficient 8-bit trigonometry functions specifically
+/// designed for high-performance LED programming.
+
+/// @ingroup lib8tion
+/// @{
+
+/// @defgroup Trig Fast Trigonometry Functions
+/// Fast 8-bit and 16-bit approximations of sin(x) and cos(x).
+///
+/// Don't use these approximations for calculating the
+/// trajectory of a rocket to Mars, but they're great
+/// for art projects and LED displays.
+///
+/// On Arduino/AVR, the 16-bit approximation is more than
+/// 10X faster than floating point sin(x) and cos(x), while
+/// the 8-bit approximation is more than 20X faster.
+/// @{
+
+#if defined(USE_SIN_32)
+
+#define sin16 fl::sin16lut
+#define cos16 fl::cos16lut
+
+#include "fl/sin32.h"
+
+#elif defined(__AVR__)
+
+/// Platform-independent alias of the fast sin implementation
+#define sin16 sin16_avr
+
+/// Fast 16-bit approximation of sin(x). This approximation never varies more
+/// than 0.69% from the floating point value you'd get by doing
+///    @code{.cpp}
+///    float s = sin(x) * 32767.0;
+///    @endcode
+///
+/// @param theta input angle from 0-65535
+/// @returns sin of theta, value between -32767 to 32767.
+LIB8STATIC int16_t sin16_avr(uint16_t theta) {
+    static const uint8_t data[] = {
+        0,           0,           49, 0, 6393 % 256,  6393 / 256,  48, 0,
+        12539 % 256, 12539 / 256, 44, 0, 18204 % 256, 18204 / 256, 38, 0,
+        23170 % 256, 23170 / 256, 31, 0, 27245 % 256, 27245 / 256, 23, 0,
+        30273 % 256, 30273 / 256, 14, 0, 32137 % 256, 32137 / 256, 4 /*,0*/};
+
+    uint16_t offset = (theta & 0x3FFF);
+
+    // AVR doesn't have a multi-bit shift instruction,
+    // so if we say "offset >>= 3", gcc makes a tiny loop.
+    // Inserting empty volatile statements between each
+    // bit shift forces gcc to unroll the loop.
+    offset >>= 1; // 0..8191
+    asm volatile("");
+    offset >>= 1; // 0..4095
+    asm volatile("");
+    offset >>= 1; // 0..2047
+
+    if (theta & 0x4000)
+        offset = 2047 - offset;
+
+    uint8_t sectionX4;
+    sectionX4 = offset / 256;
+    sectionX4 *= 4;
+
+    uint8_t m;
+
+    union {
+        uint16_t b;
+        struct {
+            uint8_t blo;
+            uint8_t bhi;
+        };
+    } u;
+
+    // in effect u.b = blo + (256 * bhi);
+    u.blo = data[sectionX4];
+    u.bhi = data[sectionX4 + 1];
+    m = data[sectionX4 + 2];
+
+    uint8_t secoffset8 = (uint8_t)(offset) / 2;
+
+    uint16_t mx = m * secoffset8;
+
+    int16_t y = mx + u.b;
+    if (theta & 0x8000)
+        y = -y;
+
+    return y;
+}
+
+#else
+
+/// Platform-independent alias of the fast sin implementation
+#define sin16 sin16_C
+
+/// Fast 16-bit approximation of sin(x). This approximation never varies more
+/// than 0.69% from the floating point value you'd get by doing
+///    @code{.cpp}
+///    float s = sin(x) * 32767.0;
+///    @endcode
+///
+/// @param theta input angle from 0-65535
+/// @returns sin of theta, value between -32767 to 32767.
+LIB8STATIC int16_t sin16_C(uint16_t theta) {
+    static const uint16_t base[] = {0,     6393,  12539, 18204,
+                                    23170, 27245, 30273, 32137};
+    static const uint8_t slope[] = {49, 48, 44, 38, 31, 23, 14, 4};
+
+    uint16_t offset = (theta & 0x3FFF) >> 3; // 0..2047
+    if (theta & 0x4000)
+        offset = 2047 - offset;
+
+    uint8_t section = offset / 256; // 0..7
+    uint16_t b = base[section];
+    uint8_t m = slope[section];
+
+    uint8_t secoffset8 = (uint8_t)(offset) / 2;
+
+    uint16_t mx = m * secoffset8;
+    int16_t y = mx + b;
+
+    if (theta & 0x8000)
+        y = -y;
+
+    return y;
+}
+
+#endif
+
+/// Fast 16-bit approximation of cos(x). This approximation never varies more
+/// than 0.69% from the floating point value you'd get by doing
+///    @code{.cpp}
+///    float s = cos(x) * 32767.0;
+///    @endcode
+///
+/// @param theta input angle from 0-65535
+/// @returns cos of theta, value between -32767 to 32767.
+#ifndef USE_SIN_32
+LIB8STATIC int16_t cos16(uint16_t theta) { return sin16(theta + 16384); }
+#endif
+
+///////////////////////////////////////////////////////////////////////
+// sin8() and cos8()
+// Fast 8-bit approximations of sin(x) & cos(x).
+
+/// Pre-calculated lookup table used in sin8() and cos8() functions
+const uint8_t b_m16_interleave[] = {0, 49, 49, 41, 90, 27, 117, 10};
+
+#if defined(__AVR__) && !defined(LIB8_ATTINY)
+/// Platform-independent alias of the fast sin implementation
+#define sin8 sin8_avr
+
+/// Fast 8-bit approximation of sin(x). This approximation never varies more
+/// than 2% from the floating point value you'd get by doing
+///   @code{.cpp}
+///   float s = (sin(x) * 128.0) + 128;
+///   @endcode
+///
+/// @param theta input angle from 0-255
+/// @returns sin of theta, value between 0 and 255
+LIB8STATIC uint8_t sin8_avr(uint8_t theta) {
+    uint8_t offset = theta;
+
+    asm volatile("sbrc %[theta],6         \n\t"
+                 "com  %[offset]           \n\t"
+                 : [theta] "+r"(theta), [offset] "+r"(offset));
+
+    offset &= 0x3F; // 0..63
+
+    uint8_t secoffset = offset & 0x0F; // 0..15
+    if (theta & 0x40)
+        ++secoffset;
+
+    uint8_t m16;
+    uint8_t b;
+
+    uint8_t section = offset >> 4; // 0..3
+    uint8_t s2 = section * 2;
+
+    const uint8_t *p = b_m16_interleave;
+    p += s2;
+    b = *p;
+    ++p;
+    m16 = *p;
+
+    uint8_t mx;
+    uint8_t xr1;
+    asm volatile("mul %[m16],%[secoffset]   \n\t"
+                 "mov %[mx],r0              \n\t"
+                 "mov %[xr1],r1             \n\t"
+                 "eor  r1, r1               \n\t"
+                 "swap %[mx]                \n\t"
+                 "andi %[mx],0x0F           \n\t"
+                 "swap %[xr1]               \n\t"
+                 "andi %[xr1], 0xF0         \n\t"
+                 "or   %[mx], %[xr1]        \n\t"
+                 : [mx] "=d"(mx), [xr1] "=d"(xr1)
+                 : [m16] "d"(m16), [secoffset] "d"(secoffset));
+
+    int8_t y = mx + b;
+    if (theta & 0x80)
+        y = -y;
+
+    y += 128;
+
+    return y;
+}
+
+#else
+
+/// Platform-independent alias of the fast sin implementation
+#define sin8 sin8_C
+
+/// Fast 8-bit approximation of sin(x). This approximation never varies more
+/// than 2% from the floating point value you'd get by doing
+///   @code{.cpp}
+///   float s = (sin(x) * 128.0) + 128;
+///   @endcode
+///
+/// @param theta input angle from 0-255
+/// @returns sin of theta, value between 0 and 255
+LIB8STATIC uint8_t sin8_C(uint8_t theta) {
+    uint8_t offset = theta;
+    if (theta & 0x40) {
+        offset = (uint8_t)255 - offset;
+    }
+    offset &= 0x3F; // 0..63
+
+    uint8_t secoffset = offset & 0x0F; // 0..15
+    if (theta & 0x40)
+        ++secoffset;
+
+    uint8_t section = offset >> 4; // 0..3
+    uint8_t s2 = section * 2;
+    const uint8_t *p = b_m16_interleave;
+    p += s2;
+    uint8_t b = *p;
+    ++p;
+    uint8_t m16 = *p;
+
+    uint8_t mx = (m16 * secoffset) >> 4;
+
+    int8_t y = mx + b;
+    if (theta & 0x80)
+        y = -y;
+
+    y += 128;
+
+    return y;
+}
+
+#endif
+
+/// Fast 8-bit approximation of cos(x). This approximation never varies more
+/// than 2% from the floating point value you'd get by doing
+///   @code{.cpp}
+///   float s = (cos(x) * 128.0) + 128;
+///   @endcode
+///
+/// @param theta input angle from 0-255
+/// @returns cos of theta, value between 0 and 255
+LIB8STATIC uint8_t cos8(uint8_t theta) { return sin8(theta + 64); }
+
+/// @} Trig
+/// @} lib8tion
+
+#endif
+
+FL_DISABLE_WARNING_POP
--- a/libraries/FastLED/src/lib8tion/types.h
+++ b/libraries/FastLED/src/lib8tion/types.h
@@ -0,0 +1,47 @@
+/// @file types.h
+/// Defines fractional types used for lib8tion functions
+
+#pragma once
+
+#include "fl/stdint.h"
+#include "fl/int.h"
+#include "fl/namespace.h"
+
+FASTLED_NAMESPACE_BEGIN
+
+/// @addtogroup lib8tion
+/// @{
+
+/// @defgroup FractionalTypes Fixed-Point Fractional Types. 
+/// Types for storing fractional data. 
+/// Note: Fractional types have been moved to fl/int.h
+/// @{
+
+
+/// typedef for IEEE754 "binary32" float type internals
+/// @see https://en.wikipedia.org/wiki/IEEE_754
+typedef union {
+    uint32_t i;  ///< raw value, as an integer
+    float    f;  ///< raw value, as a float
+    struct {
+        uint32_t mantissa: 23;  ///< 23-bit mantissa
+        uint32_t exponent:  8;  ///< 8-bit exponent
+        uint32_t signbit:   1;  ///< sign bit
+    };
+    struct {
+        uint32_t mant7 :  7;  ///< @todo Doc: what is this for?
+        uint32_t mant16: 16;  ///< @todo Doc: what is this for?
+        uint32_t exp_  :  8;  ///< @todo Doc: what is this for?
+        uint32_t sb_   :  1;  ///< @todo Doc: what is this for?
+    };
+    struct {
+        uint32_t mant_lo8 : 8;  ///< @todo Doc: what is this for?
+        uint32_t mant_hi16_exp_lo1 : 16;  ///< @todo Doc: what is this for?
+        uint32_t sb_exphi7 : 8;  ///< @todo Doc: what is this for?
+    };
+} IEEE754binary32_t;
+
+/// @} FractionalTypes
+/// @} lib8tion
+
+FASTLED_NAMESPACE_END