initial commit

This commit is contained in:
2026-02-12 00:45:31 -08:00
commit 5f168f370b
3024 changed files with 804889 additions and 0 deletions

View File

@@ -0,0 +1,161 @@
#pragma once
#if defined(__arm__)
#if defined(FASTLED_TEENSY3)
// Can use Cortex M4 DSP instructions
#define QADD8_C 0
#define QADD7_C 0
#define QADD8_ARM_DSP_ASM 1
#define QADD7_ARM_DSP_ASM 1
#else
// Generic ARM
#define QADD8_C 1
#define QADD7_C 1
#endif // end of defined(FASTLED_TEENSY3)
#define QSUB8_C 1
#define SCALE8_C 1
#define SCALE16BY8_C 1
#define SCALE16_C 1
#define ABS8_C 1
#define MUL8_C 1
#define QMUL8_C 1
#define ADD8_C 1
#define SUB8_C 1
#define EASE8_C 1
#define AVG8_C 1
#define AVG8R_C 1
#define AVG7_C 1
#define AVG16_C 1
#define AVG16R_C 1
#define AVG15_C 1
#define BLEND8_C 1
// end of #if defined(__arm__)
#elif defined(ARDUINO_ARCH_APOLLO3)
// Default to using the standard C functions for now
#define QADD8_C 1
#define QADD7_C 1
#define QSUB8_C 1
#define SCALE8_C 1
#define SCALE16BY8_C 1
#define SCALE16_C 1
#define ABS8_C 1
#define MUL8_C 1
#define QMUL8_C 1
#define ADD8_C 1
#define SUB8_C 1
#define EASE8_C 1
#define AVG8_C 1
#define AVG8R_C 1
#define AVG7_C 1
#define AVG16_C 1
#define AVG16R_C 1
#define AVG15_C 1
#define BLEND8_C 1
// end of #elif defined(ARDUINO_ARCH_APOLLO3)
#elif defined(__AVR__)
// AVR ATmega and friends Arduino
#define QADD8_C 0
#define QADD7_C 0
#define QSUB8_C 0
#define ABS8_C 0
#define ADD8_C 0
#define SUB8_C 0
#define AVG8_C 0
#define AVG8R_C 0
#define AVG7_C 0
#define AVG16_C 0
#define AVG16R_C 0
#define AVG15_C 0
#define QADD8_AVRASM 1
#define QADD7_AVRASM 1
#define QSUB8_AVRASM 1
#define ABS8_AVRASM 1
#define ADD8_AVRASM 1
#define SUB8_AVRASM 1
#define AVG8_AVRASM 1
#define AVG8R_AVRASM 1
#define AVG7_AVRASM 1
#define AVG16_AVRASM 1
#define AVG16R_AVRASM 1
#define AVG15_AVRASM 1
// Note: these require hardware MUL instruction
// -- sorry, ATtiny!
#if !defined(LIB8_ATTINY)
#define SCALE8_C 0
#define SCALE16BY8_C 0
#define SCALE16_C 0
#define MUL8_C 0
#define QMUL8_C 0
#define EASE8_C 0
#define BLEND8_C 0
#define SCALE8_AVRASM 1
#define SCALE16BY8_AVRASM 1
#define SCALE16_AVRASM 1
#define MUL8_AVRASM 1
#define QMUL8_AVRASM 1
#define EASE8_AVRASM 1
#define CLEANUP_R1_AVRASM 1
#define BLEND8_AVRASM 1
#else
// On ATtiny, we just use C implementations
#define SCALE8_C 1
#define SCALE16BY8_C 1
#define SCALE16_C 1
#define MUL8_C 1
#define QMUL8_C 1
#define EASE8_C 1
#define BLEND8_C 1
#define SCALE8_AVRASM 0
#define SCALE16BY8_AVRASM 0
#define SCALE16_AVRASM 0
#define MUL8_AVRASM 0
#define QMUL8_AVRASM 0
#define EASE8_AVRASM 0
#define BLEND8_AVRASM 0
#endif // end of !defined(LIB8_ATTINY)
// end of #elif defined(__AVR__)
#else
// Doxygen: ignore these macros
/// @cond
// unspecified architecture, so
// no ASM, everything in C
#define QADD8_C 1
#define QADD7_C 1
#define QSUB8_C 1
#define SCALE8_C 1
#define SCALE16BY8_C 1
#define SCALE16_C 1
#define ABS8_C 1
#define MUL8_C 1
#define QMUL8_C 1
#define ADD8_C 1
#define SUB8_C 1
#define EASE8_C 1
#define AVG8_C 1
#define AVG8R_C 1
#define AVG7_C 1
#define AVG16_C 1
#define AVG16R_C 1
#define AVG15_C 1
#define BLEND8_C 1
/// @endcond
#endif

View File

@@ -0,0 +1,66 @@
/// @file intmap.h
/// Defines integer mapping functions
#pragma once
#include "fl/namespace.h"
#include "lib8static.h"
#include "fl/stdint.h"
FASTLED_NAMESPACE_BEGIN
/// @addtogroup lib8tion
/// @{
/// @defgroup intmap Integer Mapping Functions
/// Maps a scalar from one integer size to another.
///
/// For example, a value representing 40% as an 8-bit unsigned integer would be
/// `102 / 255`. Using `map8_to_16(uint8_t)` to convert that to a 16-bit
/// unsigned integer would give you `26,214 / 65,535`, exactly 40% through the
/// larger range.
///
/// @{
LIB8STATIC_ALWAYS_INLINE uint16_t map8_to_16(uint8_t x) {
return uint16_t(x) * 0x101;
}
LIB8STATIC_ALWAYS_INLINE uint32_t map16_to_32(uint16_t x) {
return uint32_t(x) * 0x10001;
}
// map16_to_8: map 16-bit values to 8-bit values
// This function maps 16-bit values to 8-bit values.
LIB8STATIC_ALWAYS_INLINE uint8_t map16_to_8(uint16_t x) {
// Tested to be nearly identical to double precision floating point
// doing this operation.
if (x == 0) {
return 0;
}
if (x >= 0xff00) {
return 0xff;
}
return uint8_t((x + 128) >> 8);
}
LIB8STATIC_ALWAYS_INLINE uint16_t map32_to_16(uint32_t x) {
// Tested to be nearly identical to double precision floating point
// doing this operation.
if (x == 0) {
return 0;
}
if (x >= 0xffff0000) {
return 0xffff;
}
return uint16_t((x + 32768) >> 16);
}
LIB8STATIC_ALWAYS_INLINE uint32_t map8_to_32(uint8_t x) {
return uint32_t(x) * 0x1010101;
}
/// @} intmap
/// @} lib8tion
FASTLED_NAMESPACE_END

View File

@@ -0,0 +1,14 @@
/// @file lib8static.h
/// Defines static inlining macros for lib8tion functions
#pragma once
/// @addtogroup lib8tion
/// @{
/// Define a LIB8TION member function as static inline with an "unused" attribute
#define LIB8STATIC __attribute__ ((unused)) static inline
/// Define a LIB8TION member function as always static inline
#define LIB8STATIC_ALWAYS_INLINE __attribute__ ((always_inline)) static inline
/// @} lib8tion

View File

@@ -0,0 +1,701 @@
#pragma once
#include "lib8tion/config.h"
#include "scale8.h"
#include "lib8tion/lib8static.h"
#include "intmap.h"
#include "fl/namespace.h"
#include "fl/compiler_control.h"
FL_DISABLE_WARNING_PUSH
FL_DISABLE_WARNING_UNUSED_PARAMETER
FL_DISABLE_WARNING_RETURN_TYPE
FL_DISABLE_WARNING_IMPLICIT_INT_CONVERSION
FASTLED_NAMESPACE_BEGIN
/// @file math8.h
/// Fast, efficient 8-bit math functions specifically
/// designed for high-performance LED programming.
/// @ingroup lib8tion
/// @{
/// @defgroup Math Basic Math Operations
/// Fast, efficient 8-bit math functions specifically
/// designed for high-performance LED programming.
///
/// Because of the AVR (Arduino) and ARM assembly language
/// implementations provided, using these functions often
/// results in smaller and faster code than the equivalent
/// program using plain "C" arithmetic and logic.
/// @{
/// Add one byte to another, saturating at 0xFF
/// @param i first byte to add
/// @param j second byte to add
/// @returns the sum of i + j, capped at 0xFF
LIB8STATIC_ALWAYS_INLINE uint8_t qadd8(uint8_t i, uint8_t j) {
#if QADD8_C == 1
unsigned int t = i + j;
if (t > 255)
t = 255;
return static_cast<uint8_t>(t);
#elif QADD8_AVRASM == 1
asm volatile(
/* First, add j to i, conditioning the C flag */
"add %0, %1 \n\t"
/* Now test the C flag.
If C is clear, we branch around a load of 0xFF into i.
If C is set, we go ahead and load 0xFF into i.
*/
"brcc L_%= \n\t"
"ldi %0, 0xFF \n\t"
"L_%=: "
: "+d"(i) // r16-r31, restricted by ldi
: "r"(j));
return i;
#elif QADD8_ARM_DSP_ASM == 1
asm volatile("uqadd8 %0, %0, %1" : "+r"(i) : "r"(j));
return i;
#else
#error "No implementation for qadd8 available."
#endif
}
/// Add one byte to another, saturating at 0x7F and -0x80
/// @param i first byte to add
/// @param j second byte to add
/// @returns the sum of i + j, capped at 0x7F and -0x80
LIB8STATIC_ALWAYS_INLINE int8_t qadd7(int8_t i, int8_t j) {
#if QADD7_C == 1
int16_t t = i + j;
if (t > 127)
t = 127;
else if (t < -128)
t = -128;
return static_cast<int8_t>(t);
#elif QADD7_AVRASM == 1
asm volatile(
/* First, add j to i, conditioning the V and C flags */
"add %0, %1 \n\t"
/* Now test the V flag.
If V is clear, we branch to end.
If V is set, we go ahead and load 0x7F into i.
*/
"brvc L_%= \n\t"
"ldi %0, 0x7F \n\t"
/* When both numbers are negative, C is set.
Adding it to make result negative. */
"adc %0, __zero_reg__\n\t"
"L_%=: "
: "+d"(i) // r16-r31, restricted by ldi
: "r"(j));
return i;
#elif QADD7_ARM_DSP_ASM == 1
asm volatile("qadd8 %0, %0, %1" : "+r"(i) : "r"(j));
return i;
#else
#error "No implementation for qadd7 available."
#endif
}
/// Subtract one byte from another, saturating at 0x00
/// @param i byte to subtract from
/// @param j byte to subtract
/// @returns i - j with a floor of 0
LIB8STATIC_ALWAYS_INLINE uint8_t qsub8(uint8_t i, uint8_t j) {
#if QSUB8_C == 1
int t = i - j;
if (t < 0)
t = 0;
return static_cast<uint8_t>(t);
#elif QSUB8_AVRASM == 1
asm volatile(
/* First, subtract j from i, conditioning the C flag */
"sub %0, %1 \n\t"
/* Now test the C flag.
If C is clear, we branch around a load of 0x00 into i.
If C is set, we go ahead and load 0x00 into i.
*/
"brcc L_%= \n\t"
"ldi %0, 0x00 \n\t"
"L_%=: "
: "+d"(i) // r16-r31, restricted by ldi
: "r"(j));
return i;
#else
#error "No implementation for qsub8 available."
#endif
}
/// Add one byte to another, with 8-bit result
/// @note This does not saturate and may overflow!
/// @param i first byte to add
/// @param j second byte to add
/// @returns the sum of i + j, 8-bit
LIB8STATIC_ALWAYS_INLINE uint8_t add8(uint8_t i, uint8_t j) {
#if ADD8_C == 1
int t = i + j;
return static_cast<uint8_t>(t);
#elif ADD8_AVRASM == 1
// Add j to i, period.
asm volatile("add %0, %1" : "+r"(i) : "r"(j));
return i;
#else
#error "No implementation for add8 available."
#endif
}
/// Add one byte to two bytes, with 16-bit result
/// @note This does not saturate and may overflow!
/// @param i first value to add, 8-bit
/// @param j second value to add, 16-bit
/// @returns the sum of i + j, 16-bit
LIB8STATIC_ALWAYS_INLINE uint16_t add8to16(uint8_t i, uint16_t j) {
#if ADD8_C == 1
uint16_t t = i + j;
return t;
#elif ADD8_AVRASM == 1
// Add i(one byte) to j(two bytes)
asm volatile("add %A[j], %[i] \n\t"
"adc %B[j], __zero_reg__ \n\t"
: [j] "+r"(j)
: [i] "r"(i));
return i;
#else
#error "No implementation for add8to16 available."
#endif
}
/// Subtract one byte from another, 8-bit result
/// @note This does not saturate and may overflow!
/// @param i byte to subtract from
/// @param j byte to subtract
/// @returns i - j
LIB8STATIC_ALWAYS_INLINE uint8_t sub8(uint8_t i, uint8_t j) {
#if SUB8_C == 1
int t = i - j;
return static_cast<uint8_t>(t);
#elif SUB8_AVRASM == 1
// Subtract j from i, period.
asm volatile("sub %0, %1" : "+r"(i) : "r"(j));
return i;
#else
#error "No implementation for sub8 available."
#endif
}
/// Calculate an integer average of two unsigned
/// 8-bit integer values (uint8_t), rounded down.
/// Fractional results are rounded down, e.g. avg8(20,41) = 30
/// @param i first value to average
/// @param j second value to average
/// @returns mean average of i and j, rounded down
LIB8STATIC_ALWAYS_INLINE uint8_t avg8(uint8_t i, uint8_t j) {
#if AVG8_C == 1
return (i + j) >> 1;
#elif AVG8_AVRASM == 1
asm volatile(
/* First, add j to i, 9th bit overflows into C flag */
"add %0, %1 \n\t"
/* Divide by two, moving C flag into high 8th bit */
"ror %0 \n\t"
: "+r"(i)
: "r"(j));
return i;
#else
#error "No implementation for avg8 available."
#endif
}
/// Calculate an integer average of two unsigned
/// 16-bit integer values (uint16_t), rounded down.
/// Fractional results are rounded down, e.g. avg16(20,41) = 30
/// @param i first value to average
/// @param j second value to average
/// @returns mean average of i and j, rounded down
LIB8STATIC_ALWAYS_INLINE uint16_t avg16(uint16_t i, uint16_t j) {
#if AVG16_C == 1
// return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
uint32_t tmp = i;
tmp += j;
return static_cast<uint16_t>(tmp >> 1);
#elif AVG16_AVRASM == 1
asm volatile(
/* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
"add %A[i], %A[j] \n\t"
/* Now, add C + jHi to iHi, 17th bit overflows into C flag */
"adc %B[i], %B[j] \n\t"
/* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now
in
C */
"ror %B[i] \n\t"
/* Divide iLo by two, moving C flag into high 8th bit */
"ror %A[i] \n\t"
: [i] "+r"(i)
: [j] "r"(j));
return i;
#else
#error "No implementation for avg16 available."
#endif
}
/// Calculate an integer average of two unsigned
/// 8-bit integer values (uint8_t), rounded up.
/// Fractional results are rounded up, e.g. avg8r(20,41) = 31
/// @param i first value to average
/// @param j second value to average
/// @returns mean average of i and j, rounded up
LIB8STATIC_ALWAYS_INLINE uint8_t avg8r(uint8_t i, uint8_t j) {
#if AVG8R_C == 1
return (i + j + 1) >> 1;
#elif AVG8R_AVRASM == 1
asm volatile(
/* First, add j to i, 9th bit overflows into C flag */
"add %0, %1 \n\t"
/* Divide by two, moving C flag into high 8th bit, old 1st bit now in C
*/
"ror %0 \n\t"
/* Add C flag */
"adc %0, __zero_reg__\n\t"
: "+r"(i)
: "r"(j));
return i;
#else
#error "No implementation for avg8r available."
#endif
}
/// Calculate an integer average of two unsigned
/// 16-bit integer values (uint16_t), rounded up.
/// Fractional results are rounded up, e.g. avg16r(20,41) = 31
/// @param i first value to average
/// @param j second value to average
/// @returns mean average of i and j, rounded up
LIB8STATIC_ALWAYS_INLINE uint16_t avg16r(uint16_t i, uint16_t j) {
#if AVG16R_C == 1
// return (uint32_t)((uint32_t)(i) + (uint32_t)(j) + 1) >> 1;
uint32_t tmp = i;
tmp += j;
tmp += 1;
return static_cast<uint16_t>(tmp >> 1);
#elif AVG16R_AVRASM == 1
asm volatile(
/* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
"add %A[i], %A[j] \n\t"
/* Now, add C + jHi to iHi, 17th bit overflows into C flag */
"adc %B[i], %B[j] \n\t"
/* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now
in
C */
"ror %B[i] \n\t"
/* Divide iLo by two, moving C flag into high 8th bit, old 1st bit now
in
C */
"ror %A[i] \n\t"
/* Add C flag */
"adc %A[i], __zero_reg__\n\t"
"adc %B[i], __zero_reg__\n\t"
: [i] "+r"(i)
: [j] "r"(j));
return i;
#else
#error "No implementation for avg16r available."
#endif
}
/// Calculate an integer average of two signed 7-bit
/// integers (int8_t).
/// If the first argument is even, result is rounded down.
/// If the first argument is odd, result is rounded up.
/// @param i first value to average
/// @param j second value to average
/// @returns mean average of i and j, rounded
LIB8STATIC_ALWAYS_INLINE int8_t avg7(int8_t i, int8_t j) {
#if AVG7_C == 1
return (i >> 1) + (j >> 1) + (i & 0x1);
#elif AVG7_AVRASM == 1
asm volatile("asr %1 \n\t"
"asr %0 \n\t"
"adc %0, %1 \n\t"
: "+r"(i)
: "r"(j));
return i;
#else
#error "No implementation for avg7 available."
#endif
}
/// Calculate an integer average of two signed 15-bit
/// integers (int16_t).
/// If the first argument is even, result is rounded down.
/// If the first argument is odd, result is rounded up.
/// @param i first value to average
/// @param j second value to average
/// @returns mean average of i and j, rounded
LIB8STATIC_ALWAYS_INLINE int16_t avg15(int16_t i, int16_t j) {
#if AVG15_C == 1
return (i >> 1) + (j >> 1) + (i & 0x1);
#elif AVG15_AVRASM == 1
asm volatile(
/* first divide j by 2, throwing away lowest bit */
"asr %B[j] \n\t"
"ror %A[j] \n\t"
/* now divide i by 2, with lowest bit going into C */
"asr %B[i] \n\t"
"ror %A[i] \n\t"
/* add j + C to i */
"adc %A[i], %A[j] \n\t"
"adc %B[i], %B[j] \n\t"
: [i] "+r"(i)
: [j] "r"(j));
return i;
#else
#error "No implementation for avg15 available."
#endif
}
/// Calculate the remainder of one unsigned 8-bit
/// value divided by anoter, aka A % M.
/// Implemented by repeated subtraction, which is
/// very compact, and very fast if A is "probably"
/// less than M. If A is a large multiple of M,
/// the loop has to execute multiple times. However,
/// even in that case, the loop is only two
/// instructions long on AVR, i.e., quick.
/// @param a dividend byte
/// @param m divisor byte
/// @returns remainder of a / m (i.e. a % m)
LIB8STATIC_ALWAYS_INLINE uint8_t mod8(uint8_t a, uint8_t m) {
#if defined(__AVR__)
asm volatile("L_%=: sub %[a],%[m] \n\t"
" brcc L_%= \n\t"
" add %[a],%[m] \n\t"
: [a] "+r"(a)
: [m] "r"(m));
#else
while (a >= m)
a -= m;
#endif
return a;
}
/// Add two numbers, and calculate the modulo
/// of the sum and a third number, M.
/// In other words, it returns (A+B) % M.
/// It is designed as a compact mechanism for
/// incrementing a "mode" switch and wrapping
/// around back to "mode 0" when the switch
/// goes past the end of the available range.
/// e.g. if you have seven modes, this switches
/// to the next one and wraps around if needed:
/// @code{.cpp}
/// mode = addmod8( mode, 1, 7);
/// @endcode
/// @param a dividend byte
/// @param b value to add to the dividend
/// @param m divisor byte
/// @returns remainder of (a + b) / m
/// @see mod8() for notes on performance.
LIB8STATIC uint8_t addmod8(uint8_t a, uint8_t b, uint8_t m) {
#if defined(__AVR__)
asm volatile(" add %[a],%[b] \n\t"
"L_%=: sub %[a],%[m] \n\t"
" brcc L_%= \n\t"
" add %[a],%[m] \n\t"
: [a] "+r"(a)
: [b] "r"(b), [m] "r"(m));
#else
a += b;
while (a >= m)
a -= m;
#endif
return a;
}
/// Subtract two numbers, and calculate the modulo
/// of the difference and a third number, M.
/// In other words, it returns (A-B) % M.
/// It is designed as a compact mechanism for
/// decrementing a "mode" switch and wrapping
/// around back to "mode 0" when the switch
/// goes past the start of the available range.
/// e.g. if you have seven modes, this switches
/// to the previous one and wraps around if needed:
/// @code{.cpp}
/// mode = submod8( mode, 1, 7);
/// @endcode
/// @param a dividend byte
/// @param b value to subtract from the dividend
/// @param m divisor byte
/// @returns remainder of (a - b) / m
/// @see mod8() for notes on performance.
LIB8STATIC uint8_t submod8(uint8_t a, uint8_t b, uint8_t m) {
#if defined(__AVR__)
asm volatile(" sub %[a],%[b] \n\t"
"L_%=: sub %[a],%[m] \n\t"
" brcc L_%= \n\t"
" add %[a],%[m] \n\t"
: [a] "+r"(a)
: [b] "r"(b), [m] "r"(m));
#else
a -= b;
while (a >= m)
a -= m;
#endif
return a;
}
/// 8x8 bit multiplication, with 8-bit result.
/// @param i first byte to multiply
/// @param j second byte to multiply
/// @returns the product of i * j
/// @note This does not saturate and may overflow!
LIB8STATIC_ALWAYS_INLINE uint8_t mul8(uint8_t i, uint8_t j) {
#if MUL8_C == 1
return ((int)i * (int)(j)) & 0xFF;
#elif MUL8_AVRASM == 1
asm volatile(
/* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
"mul %0, %1 \n\t"
/* Extract the LOW 8-bits (r0) */
"mov %0, r0 \n\t"
/* Restore r1 to "0"; it's expected to always be that */
"clr __zero_reg__ \n\t"
: "+r"(i)
: "r"(j)
: "r0", "r1");
return i;
#else
#error "No implementation for mul8 available."
#endif
}
/// 8x8 bit multiplication with 8-bit result, saturating at 0xFF.
/// @param i first byte to multiply
/// @param j second byte to multiply
/// @returns the product of i * j, capping at 0xFF
LIB8STATIC_ALWAYS_INLINE uint8_t qmul8(uint8_t i, uint8_t j) {
#if QMUL8_C == 1
unsigned p = (unsigned)i * (unsigned)j;
if (p > 255)
p = 255;
return p;
#elif QMUL8_AVRASM == 1
asm volatile(
/* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
" mul %0, %1 \n\t"
/* Extract the LOW 8-bits (r0) */
" mov %0, r0 \n\t"
/* If high byte of result is zero, all is well. */
" tst r1 \n\t"
" breq Lnospill_%= \n\t"
/* If high byte of result > 0, saturate to 0xFF */
" ldi %0, 0xFF \n\t"
"Lnospill_%=: \n\t"
/* Restore r1 to "0"; it's expected to always be that */
" clr __zero_reg__ \n\t"
: "+d"(i) // r16-r31, restricted by ldi
: "r"(j)
: "r0", "r1");
return i;
#else
#error "No implementation for qmul8 available."
#endif
}
/// Take the absolute value of a signed 8-bit uint8_t.
LIB8STATIC_ALWAYS_INLINE int8_t abs8(int8_t i) {
#if ABS8_C == 1
if (i < 0)
i = -i;
return i;
#elif ABS8_AVRASM == 1
asm volatile(
/* First, check the high bit, and prepare to skip if it's clear */
"sbrc %0, 7 \n"
/* Negate the value */
"neg %0 \n"
: "+r"(i)
: "r"(i));
return i;
#else
#error "No implementation for abs8 available."
#endif
}
/// Square root for 16-bit integers.
/// About three times faster and five times smaller
/// than Arduino's general `sqrt` on AVR.
LIB8STATIC uint8_t sqrt16(uint16_t x) {
if (x <= 1) {
return x;
}
uint8_t low = 1; // lower bound
uint8_t hi, mid;
if (x > 7904) {
hi = 255;
} else {
hi = (x >> 5) + 8; // initial estimate for upper bound
}
do {
mid = (low + hi) >> 1;
if ((uint16_t)(mid * mid) > x) {
hi = mid - 1;
} else {
if (mid == 255) {
return 255;
}
low = mid + 1;
}
} while (hi >= low);
return low - 1;
}
LIB8STATIC_ALWAYS_INLINE uint8_t sqrt8(uint8_t x) {
return sqrt16(map8_to_16(x));
}
/// Blend a variable proportion (0-255) of one byte to another.
/// @param a the starting byte value
/// @param b the byte value to blend toward
/// @param amountOfB the proportion (0-255) of b to blend
/// @returns a byte value between a and b, inclusive
#if (FASTLED_BLEND_FIXED == 1)
LIB8STATIC uint8_t blend8(uint8_t a, uint8_t b, uint8_t amountOfB) {
// The BLEND_FIXED formula is
//
// result = ( A*(amountOfA) + B*(amountOfB) )/ 256
//
// …where amountOfA = 255-amountOfB.
//
// This formula will never return 255, which is why the BLEND_FIXED +
// SCALE8_FIXED version is
//
// result = ( A*(amountOfA) + A + B*(amountOfB) + B ) / 256
//
// We can rearrange this formula for some great optimisations.
//
// result = ( A*(amountOfA) + A + B*(amountOfB) + B ) / 256
// = ( A*(255-amountOfB) + A + B*(amountOfB) + B ) / 256
// = ( A*(256-amountOfB) + B*(amountOfB) + B ) / 256
// = ( A*256 + B + B*(amountOfB) - A*(amountOfB) ) / 256 // this
// is the version used in SCALE8_FIXED AVR below = ( A*256 + B +
// (B-A)*(amountOfB) ) / 256 // this is the version
// used in SCALE8_FIXED C below
uint16_t partial;
uint8_t result;
#if BLEND8_C == 1
#if (FASTLED_SCALE8_FIXED == 1)
partial = (a << 8) | b; // A*256 + B
// on many platforms this compiles to a single multiply of (B-A) * amountOfB
partial += (b * amountOfB);
partial -= (a * amountOfB);
#else
uint8_t amountOfA = 255 - amountOfB;
// on the other hand, this compiles to two multiplies, and gives the "wrong"
// answer :]
partial = (a * amountOfA);
partial += (b * amountOfB);
#endif
result = partial >> 8;
return result;
#elif BLEND8_AVRASM == 1
#if (FASTLED_SCALE8_FIXED == 1)
// 1 or 2 cycles depending on how the compiler optimises
partial = (a << 8) | b;
// 7 cycles
asm volatile(" mul %[a], %[amountOfB] \n\t"
" sub %A[partial], r0 \n\t"
" sbc %B[partial], r1 \n\t"
" mul %[b], %[amountOfB] \n\t"
" add %A[partial], r0 \n\t"
" adc %B[partial], r1 \n\t"
" clr __zero_reg__ \n\t"
: [partial] "+r"(partial)
: [amountOfB] "r"(amountOfB), [a] "r"(a), [b] "r"(b)
: "r0", "r1");
#else
// non-SCALE8-fixed version
// 7 cycles
asm volatile(
/* partial = b * amountOfB */
" mul %[b], %[amountOfB] \n\t"
" movw %A[partial], r0 \n\t"
/* amountOfB (aka amountOfA) = 255 - amountOfB */
" com %[amountOfB] \n\t"
/* partial += a * amountOfB (aka amountOfA) */
" mul %[a], %[amountOfB] \n\t"
" add %A[partial], r0 \n\t"
" adc %B[partial], r1 \n\t"
" clr __zero_reg__ \n\t"
: [partial] "=r"(partial), [amountOfB] "+r"(amountOfB)
: [a] "r"(a), [b] "r"(b)
: "r0", "r1");
#endif
result = partial >> 8;
return result;
#else
#error "No implementation for blend8 available."
#endif
}
#else
LIB8STATIC uint8_t blend8(uint8_t a, uint8_t b, uint8_t amountOfB) {
// This version loses precision in the integer math
// and can actually return results outside of the range
// from a to b. Its use is not recommended.
uint8_t result;
uint8_t amountOfA = 255 - amountOfB;
result = scale8_LEAVING_R1_DIRTY(a, amountOfA) +
scale8_LEAVING_R1_DIRTY(b, amountOfB);
cleanup_R1();
return result;
}
#endif
/// @} Math
/// @} lib8tion
FASTLED_NAMESPACE_END
FL_DISABLE_WARNING_POP

View File

@@ -0,0 +1,24 @@
#pragma once
///////////////////////////////////////////////////////////////////////
///
/// @defgroup FastMemory Fast Memory Functions for AVR
/// Alternatives to memmove, memcpy, and memset that are
/// faster on AVR than standard avr-libc 1.8.
/// @{
#if defined(__AVR__) || defined(FASTLED_DOXYGEN)
extern "C" {
void * memmove8( void * dst, const void * src, uint16_t num ); ///< Faster alternative to memmove() on AVR
void * memcpy8 ( void * dst, const void * src, uint16_t num ) __attribute__ ((noinline)); ///< Faster alternative to memcpy() on AVR
void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinline)) ; ///< Faster alternative to memset() on AVR
}
#else
#include "fl/memfill.h"
// on non-AVR platforms, these names just call standard libc.
#define memmove8 memmove
#define memcpy8 fl::memcopy
#define memset8 fl::memfill
#endif
/// @} FastMemory

View File

@@ -0,0 +1,58 @@
#pragma once
#include "fl/stdint.h"
#include "fl/namespace.h"
FASTLED_NAMESPACE_BEGIN
/// @addtogroup FractionalTypes
/// @{
/// Template class for representing fractional ints.
/// @tparam T underlying type for data storage
/// @tparam F number of fractional bits
/// @tparam I number of integer bits
template<class T, int F, int I> class qfx {
T i:I; ///< Integer value of number
T f:F; ///< Fractional value of number
public:
/// Constructor, storing a float as a fractional int
qfx(float fx) { i = fx; f = (fx-i) * (1<<F); }
/// Constructor, storing a fractional int directly
qfx(uint8_t _i, uint8_t _f) {i=_i; f=_f; }
/// Multiply the fractional int by a value
uint32_t operator*(uint32_t v) { return (v*i) + ((v*f)>>F); }
/// @copydoc operator*(uint32_t)
uint16_t operator*(uint16_t v) { return (v*i) + ((v*f)>>F); }
/// @copydoc operator*(uint32_t)
int32_t operator*(int32_t v) { return (v*i) + ((v*f)>>F); }
/// @copydoc operator*(uint32_t)
int16_t operator*(int16_t v) { return (v*i) + ((v*f)>>F); }
#if defined(FASTLED_ARM) | defined(FASTLED_RISCV) | defined(FASTLED_APOLLO3)
/// @copydoc operator*(uint32_t)
int operator*(int v) { return (v*i) + ((v*f)>>F); }
#endif
};
template<class T, int F, int I> static uint32_t operator*(uint32_t v, qfx<T,F,I> & q) { return q * v; }
template<class T, int F, int I> static uint16_t operator*(uint16_t v, qfx<T,F,I> & q) { return q * v; }
template<class T, int F, int I> static int32_t operator*(int32_t v, qfx<T,F,I> & q) { return q * v; }
template<class T, int F, int I> static int16_t operator*(int16_t v, qfx<T,F,I> & q) { return q * v; }
#if defined(FASTLED_ARM) | defined(FASTLED_RISCV) | defined(FASTLED_APOLLO3)
template<class T, int F, int I> static int operator*(int v, qfx<T,F,I> & q) { return q * v; }
#endif
/// A 4.4 integer (4 bits integer, 4 bits fraction)
typedef qfx<uint8_t, 4,4> q44;
/// A 6.2 integer (6 bits integer, 2 bits fraction)
typedef qfx<uint8_t, 6,2> q62;
/// A 8.8 integer (8 bits integer, 8 bits fraction)
typedef qfx<uint16_t, 8,8> q88;
/// A 12.4 integer (12 bits integer, 4 bits fraction)
typedef qfx<uint16_t, 12,4> q124;
/// @} FractionalTypes
FASTLED_NAMESPACE_END

View File

@@ -0,0 +1,110 @@
#pragma once
#ifndef __INC_LIB8TION_RANDOM_H
#define __INC_LIB8TION_RANDOM_H
#include "fl/stdint.h"
#include "lib8tion/lib8static.h"
/// @file random8.h
/// Fast, efficient random number generators specifically
/// designed for high-performance LED programming.
/// @ingroup lib8tion
/// @{
/// @defgroup Random Fast Random Number Generators
/// Fast 8-bit and 16-bit unsigned random number generators.
/// Significantly faster than Arduino random(), but
/// also somewhat less random. You can add entropy.
///
/// Pseudo-random number generation follows the form:
/// @code
/// X(n+1) = (2053 * X(n)) + 13849)
/// @endcode
/// @{
/// Multiplier value for pseudo-random number generation
#define FASTLED_RAND16_2053 ((uint16_t)(2053))
/// Increment value for pseudo-random number generation
#define FASTLED_RAND16_13849 ((uint16_t)(13849))
#if defined(LIB8_ATTINY)
/// Multiplies a value by the pseudo-random multiplier
#define APPLY_FASTLED_RAND16_2053(x) (x << 11) + (x << 2) + x
#else
/// Multiplies a value by the pseudo-random multiplier
#define APPLY_FASTLED_RAND16_2053(x) (x * FASTLED_RAND16_2053)
#endif
/// Seed for the random number generator functions
extern uint16_t rand16seed; // = RAND16_SEED;
/// Generate an 8-bit random number
/// @returns random 8-bit number, in the range 0-255
LIB8STATIC uint8_t random8() {
rand16seed = APPLY_FASTLED_RAND16_2053(rand16seed) + FASTLED_RAND16_13849;
// return the sum of the high and low bytes, for better
// mixing and non-sequential correlation
return (uint8_t)(((uint8_t)(rand16seed & 0xFF)) +
((uint8_t)(rand16seed >> 8)));
}
/// Generate a 16-bit random number
/// @returns random 16-bit number, in the range 0-65535
LIB8STATIC uint16_t random16() {
rand16seed = APPLY_FASTLED_RAND16_2053(rand16seed) + FASTLED_RAND16_13849;
return rand16seed;
}
/// Generate an 8-bit random number between 0 and lim
/// @param lim the upper bound for the result, exclusive
LIB8STATIC uint8_t random8(uint8_t lim) {
uint8_t r = random8();
r = (r * lim) >> 8;
return r;
}
/// Generate an 8-bit random number in the given range
/// @param min the lower bound for the random number, inclusive
/// @param lim the upper bound for the random number, exclusive
LIB8STATIC uint8_t random8(uint8_t min, uint8_t lim) {
uint8_t delta = lim - min;
uint8_t r = random8(delta) + min;
return r;
}
/// Generate an 16-bit random number between 0 and lim
/// @param lim the upper bound for the result, exclusive
LIB8STATIC uint16_t random16(uint16_t lim) {
uint16_t r = random16();
uint32_t p = (uint32_t)lim * (uint32_t)r;
r = p >> 16;
return r;
}
/// Generate an 16-bit random number in the given range
/// @param min the lower bound for the random number, inclusive
/// @param lim the upper bound for the random number, exclusive
LIB8STATIC uint16_t random16(uint16_t min, uint16_t lim) {
uint16_t delta = lim - min;
uint16_t r = random16(delta) + min;
return r;
}
/// Set the 16-bit seed used for the random number generator
LIB8STATIC void random16_set_seed(uint16_t seed) { rand16seed = seed; }
/// Get the current seed value for the random number generator
LIB8STATIC uint16_t random16_get_seed() { return rand16seed; }
/// Add entropy into the random number generator
LIB8STATIC void random16_add_entropy(uint16_t entropy) {
rand16seed += entropy;
}
/// @} Random
/// @} lib8tion
#endif

View File

@@ -0,0 +1,760 @@
#include "fl/compiler_control.h"
#pragma once
#include "lib8tion/config.h"
#include "crgb.h"
#include "fl/namespace.h"
#include "fastled_config.h"
#include "lib8static.h"
FL_DISABLE_WARNING_PUSH
FL_DISABLE_WARNING_UNUSED_PARAMETER
FL_DISABLE_WARNING_RETURN_TYPE
FL_DISABLE_WARNING_IMPLICIT_INT_CONVERSION
FASTLED_NAMESPACE_BEGIN
/// @file scale8.h
/// Fast, efficient 8-bit scaling functions specifically
/// designed for high-performance LED programming.
/// @addtogroup lib8tion
/// @{
/// @defgroup Scaling Scaling Functions
/// Fast, efficient 8-bit scaling functions specifically
/// designed for high-performance LED programming.
///
/// Because of the AVR(Arduino) and ARM assembly language
/// implementations provided, using these functions often
/// results in smaller and faster code than the equivalent
/// program using plain "C" arithmetic and logic.
/// @{
/// Scale one byte by a second one, which is treated as
/// the numerator of a fraction whose denominator is 256.
///
/// In other words, it computes i * (scale / 256)
/// @param i input value to scale
/// @param scale scale factor, in n/256 units
/// @returns scaled value
/// @note Takes 4 clocks on AVR with MUL, 2 clocks on ARM
LIB8STATIC_ALWAYS_INLINE uint8_t scale8(uint8_t i, fract8 scale) {
#if SCALE8_C == 1
#if (FASTLED_SCALE8_FIXED == 1)
return (((uint16_t)i) * (1 + (uint16_t)(scale))) >> 8;
#else
return ((uint16_t)i * (uint16_t)(scale)) >> 8;
#endif
#elif SCALE8_AVRASM == 1
#if defined(LIB8_ATTINY)
#if (FASTLED_SCALE8_FIXED == 1)
uint8_t work = i;
#else
uint8_t work = 0;
#endif
uint8_t cnt = 0x80;
asm volatile(
#if (FASTLED_SCALE8_FIXED == 1)
" inc %[scale] \n\t"
" breq DONE_%= \n\t"
" clr %[work] \n\t"
#endif
"LOOP_%=: \n\t"
/*" sbrc %[scale], 0 \n\t"
" add %[work], %[i] \n\t"
" ror %[work] \n\t"
" lsr %[scale] \n\t"
" clc \n\t"*/
" sbrc %[scale], 0 \n\t"
" add %[work], %[i] \n\t"
" ror %[work] \n\t"
" lsr %[scale] \n\t"
" lsr %[cnt] \n\t"
"brcc LOOP_%= \n\t"
"DONE_%=: \n\t"
: [work] "+r"(work), [cnt] "+r"(cnt)
: [scale] "r"(scale), [i] "r"(i)
:);
return work;
#else
asm volatile(
#if (FASTLED_SCALE8_FIXED == 1)
// Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
"mul %0, %1 \n\t"
// Add i to r0, possibly setting the carry flag
"add r0, %0 \n\t"
// load the immediate 0 into i (note, this does _not_ touch any flags)
"ldi %0, 0x00 \n\t"
// walk and chew gum at the same time
"adc %0, r1 \n\t"
#else
/* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
"mul %0, %1 \n\t"
/* Move the high 8-bits of the product (r1) back to i */
"mov %0, r1 \n\t"
/* Restore r1 to "0"; it's expected to always be that */
#endif
"clr __zero_reg__ \n\t"
: "+d"(i) /* writes to i; r16-r31, restricted by ldi */
: "r"(scale) /* uses scale */
: "r0", "r1" /* clobbers r0, r1 */
);
/* Return the result */
return i;
#endif
#else
#error "No implementation for scale8 available."
#endif
}
constexpr uint8_t scale8_constexpr(uint8_t i, fract8 scale) {
return (((uint16_t)i) * (1 + (uint16_t)(scale))) >> 8;
}
/// The "video" version of scale8() guarantees that the output will
/// be only be zero if one or both of the inputs are zero.
/// If both inputs are non-zero, the output is guaranteed to be non-zero.
/// This makes for better "video"/LED dimming, at the cost of
/// several additional cycles.
/// @param i input value to scale
/// @param scale scale factor, in n/256 units
/// @returns scaled value
/// @see scale8()
LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video(uint8_t i, fract8 scale) {
#if SCALE8_C == 1 || defined(LIB8_ATTINY)
uint8_t j = (((int)i * (int)scale) >> 8) + ((i && scale) ? 1 : 0);
// uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
// uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) +
// nonzeroscale;
return j;
#elif SCALE8_AVRASM == 1
uint8_t j = 0;
asm volatile(" tst %[i]\n\t"
" breq L_%=\n\t"
" mul %[i], %[scale]\n\t"
" mov %[j], r1\n\t"
" clr __zero_reg__\n\t"
" cpse %[scale], r1\n\t"
" subi %[j], 0xFF\n\t"
"L_%=: \n\t"
: [j] "+d"(j) // r16-r31, restricted by subi
: [i] "r"(i), [scale] "r"(scale)
: "r0", "r1");
return j;
// uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
// asm volatile(
// " tst %0 \n"
// " breq L_%= \n"
// " mul %0, %1 \n"
// " mov %0, r1 \n"
// " add %0, %2 \n"
// " clr __zero_reg__ \n"
// "L_%=: \n"
// : "+a" (i)
// : "a" (scale), "a" (nonzeroscale)
// : "r0", "r1");
// // Return the result
// return i;
#else
#error "No implementation for scale8_video available."
#endif
}
/// @defgroup ScalingDirty Scaling Functions that Leave R1 Dirty
/// These functions are more efficient for scaling multiple
/// bytes at once, but require calling cleanup_R1() afterwards.
/// @{
/// This version of scale8() does not clean up the R1 register on AVR.
/// If you are doing several "scale8()'s" in a row, use this, and
/// then explicitly call cleanup_R1().
/// @warning You **MUST** call cleanup_R1() after using this function!
/// @param i input value to scale
/// @param scale scale factor, in n/256 units
/// @returns scaled value
/// @see scale8()
LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY(uint8_t i,
fract8 scale) {
#if SCALE8_C == 1
#if (FASTLED_SCALE8_FIXED == 1)
return (((uint16_t)i) * ((uint16_t)(scale) + 1)) >> 8;
#else
return ((int)i * (int)(scale)) >> 8;
#endif
#elif SCALE8_AVRASM == 1
asm volatile(
#if (FASTLED_SCALE8_FIXED == 1)
// Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
"mul %0, %1 \n\t"
// Add i to r0, possibly setting the carry flag
"add r0, %0 \n\t"
// load the immediate 0 into i (note, this does _not_ touch any flags)
"ldi %0, 0x00 \n\t"
// walk and chew gum at the same time
"adc %0, r1 \n\t"
#else
/* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
"mul %0, %1 \n\t"
/* Move the high 8-bits of the product (r1) back to i */
"mov %0, r1 \n\t"
#endif
/* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
/* "clr __zero_reg__ \n\t" */
: "+d"(i) /* writes to i; r16-r31, restricted by ldi */
: "r"(scale) /* uses scale */
: "r0", "r1" /* clobbers r0, r1 */
);
// Return the result
return i;
#else
#error "No implementation for scale8_LEAVING_R1_DIRTY available."
#endif
}
/// In place modifying version of scale8() that does not clean up the R1
/// register on AVR. If you are doing several "scale8()'s" in a row, use this,
/// and then explicitly call cleanup_R1().
/// @warning You **MUST** call cleanup_R1() after using this function!
/// @par
/// @warning This function always modifies its arguments in place!
/// @param i input value to scale
/// @param scale scale factor, in n/256 units
/// @see scale8()
LIB8STATIC_ALWAYS_INLINE void nscale8_LEAVING_R1_DIRTY(uint8_t &i,
fract8 scale) {
#if SCALE8_C == 1
#if (FASTLED_SCALE8_FIXED == 1)
i = (((uint16_t)i) * ((uint16_t)(scale) + 1)) >> 8;
#else
i = ((int)i * (int)(scale)) >> 8;
#endif
#elif SCALE8_AVRASM == 1
asm volatile(
#if (FASTLED_SCALE8_FIXED == 1)
// Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
"mul %0, %1 \n\t"
// Add i to r0, possibly setting the carry flag
"add r0, %0 \n\t"
// load the immediate 0 into i (note, this does _not_ touch any flags)
"ldi %0, 0x00 \n\t"
// walk and chew gum at the same time
"adc %0, r1 \n\t"
#else
/* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
"mul %0, %1 \n\t"
/* Move the high 8-bits of the product (r1) back to i */
"mov %0, r1 \n\t"
#endif
/* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
/* "clr __zero_reg__ \n\t" */
: "+d"(i) /* writes to i; r16-r31, restricted by ldi */
: "r"(scale) /* uses scale */
: "r0", "r1" /* clobbers r0, r1 */
);
#else
#error "No implementation for nscale8_LEAVING_R1_DIRTY available."
#endif
}
/// This version of scale8_video() does not clean up the R1 register on AVR.
/// If you are doing several "scale8_video()'s" in a row, use this, and
/// then explicitly call cleanup_R1().
/// @warning You **MUST** call cleanup_R1() after using this function!
/// @param i input value to scale
/// @param scale scale factor, in n/256 units
/// @returns scaled value
/// @see scale8_video()
LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY(uint8_t i,
fract8 scale) {
#if SCALE8_C == 1 || defined(LIB8_ATTINY)
uint8_t j = (((int)i * (int)scale) >> 8) + ((i && scale) ? 1 : 0);
// uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
// uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) +
// nonzeroscale;
return j;
#elif SCALE8_AVRASM == 1
uint8_t j = 0;
asm volatile(" tst %[i]\n\t"
" breq L_%=\n\t"
" mul %[i], %[scale]\n\t"
" mov %[j], r1\n\t"
" breq L_%=\n\t"
" subi %[j], 0xFF\n\t"
"L_%=: \n\t"
: [j] "+d"(j) // r16-r31, restricted by subi
: [i] "r"(i), [scale] "r"(scale)
: "r0", "r1");
return j;
// uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
// asm volatile(
// " tst %0 \n"
// " breq L_%= \n"
// " mul %0, %1 \n"
// " mov %0, r1 \n"
// " add %0, %2 \n"
// " clr __zero_reg__ \n"
// "L_%=: \n"
// : "+a" (i)
// : "a" (scale), "a" (nonzeroscale)
// : "r0", "r1");
// // Return the result
// return i;
#else
#error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
#endif
}
/// In place modifying version of scale8_video() that does not clean up the R1
/// register on AVR. If you are doing several "scale8_video()'s" in a row, use
/// this, and then explicitly call cleanup_R1().
/// @warning You **MUST** call cleanup_R1() after using this function!
/// @par
/// @warning This function always modifies its arguments in place!
/// @param i input value to scale
/// @param scale scale factor, in n/256 units
/// @see scale8_video()
LIB8STATIC_ALWAYS_INLINE void nscale8_video_LEAVING_R1_DIRTY(uint8_t &i,
fract8 scale) {
#if SCALE8_C == 1 || defined(LIB8_ATTINY)
i = (((int)i * (int)scale) >> 8) + ((i && scale) ? 1 : 0);
#elif SCALE8_AVRASM == 1
asm volatile(" tst %[i]\n\t"
" breq L_%=\n\t"
" mul %[i], %[scale]\n\t"
" mov %[i], r1\n\t"
" breq L_%=\n\t"
" subi %[i], 0xFF\n\t"
"L_%=: \n\t"
: [i] "+d"(i) // r16-r31, restricted by subi
: [scale] "r"(scale)
: "r0", "r1");
#else
#error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
#endif
}
/// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls
/// @ingroup ScalingDirty
LIB8STATIC_ALWAYS_INLINE void cleanup_R1() {
#if CLEANUP_R1_AVRASM == 1
// Restore r1 to "0"; it's expected to always be that
asm volatile("clr __zero_reg__ \n\t" : : : "r1");
#endif
}
constexpr CRGB nscale8x3_constexpr(uint8_t r, uint8_t g, uint8_t b, fract8 scale) {
return CRGB(((int)r * (int)(scale)) >> 8, ((int)g * (int)(scale)) >> 8,
((int)b * (int)(scale)) >> 8);
}
/// @} ScalingDirty
/// Scale three one-byte values by a fourth one, which is treated as
/// the numerator of a fraction whose demominator is 256.
///
/// In other words, it computes r,g,b * (scale / 256)
///
/// @warning This function always modifies its arguments in place!
/// @param r first value to scale
/// @param g second value to scale
/// @param b third value to scale
/// @param scale scale factor, in n/256 units
LIB8STATIC void nscale8x3(uint8_t &r, uint8_t &g, uint8_t &b, fract8 scale) {
#if SCALE8_C == 1
#if (FASTLED_SCALE8_FIXED == 1)
uint16_t scale_fixed = scale + 1;
r = (((uint16_t)r) * scale_fixed) >> 8;
g = (((uint16_t)g) * scale_fixed) >> 8;
b = (((uint16_t)b) * scale_fixed) >> 8;
#else
r = ((int)r * (int)(scale)) >> 8;
g = ((int)g * (int)(scale)) >> 8;
b = ((int)b * (int)(scale)) >> 8;
#endif
#elif SCALE8_AVRASM == 1
r = scale8_LEAVING_R1_DIRTY(r, scale);
g = scale8_LEAVING_R1_DIRTY(g, scale);
b = scale8_LEAVING_R1_DIRTY(b, scale);
cleanup_R1();
#else
#error "No implementation for nscale8x3 available."
#endif
}
/// Scale three one-byte values by a fourth one, which is treated as
/// the numerator of a fraction whose demominator is 256.
///
/// In other words, it computes r,g,b * (scale / 256), ensuring
/// that non-zero values passed in remain non-zero, no matter how low the scale
/// argument.
///
/// @warning This function always modifies its arguments in place!
/// @param r first value to scale
/// @param g second value to scale
/// @param b third value to scale
/// @param scale scale factor, in n/256 units
LIB8STATIC void nscale8x3_video(uint8_t &r, uint8_t &g, uint8_t &b,
fract8 scale) {
#if SCALE8_C == 1
uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
r = (r == 0) ? 0 : (((int)r * (int)(scale)) >> 8) + nonzeroscale;
g = (g == 0) ? 0 : (((int)g * (int)(scale)) >> 8) + nonzeroscale;
b = (b == 0) ? 0 : (((int)b * (int)(scale)) >> 8) + nonzeroscale;
#elif SCALE8_AVRASM == 1
nscale8_video_LEAVING_R1_DIRTY(r, scale);
nscale8_video_LEAVING_R1_DIRTY(g, scale);
nscale8_video_LEAVING_R1_DIRTY(b, scale);
cleanup_R1();
#else
#error "No implementation for nscale8x3 available."
#endif
}
/// Scale two one-byte values by a third one, which is treated as
/// the numerator of a fraction whose demominator is 256.
///
/// In other words, it computes i,j * (scale / 256).
///
/// @warning This function always modifies its arguments in place!
/// @param i first value to scale
/// @param j second value to scale
/// @param scale scale factor, in n/256 units
LIB8STATIC void nscale8x2(uint8_t &i, uint8_t &j, fract8 scale) {
#if SCALE8_C == 1
#if FASTLED_SCALE8_FIXED == 1
uint16_t scale_fixed = scale + 1;
i = (((uint16_t)i) * scale_fixed) >> 8;
j = (((uint16_t)j) * scale_fixed) >> 8;
#else
i = ((uint16_t)i * (uint16_t)(scale)) >> 8;
j = ((uint16_t)j * (uint16_t)(scale)) >> 8;
#endif
#elif SCALE8_AVRASM == 1
i = scale8_LEAVING_R1_DIRTY(i, scale);
j = scale8_LEAVING_R1_DIRTY(j, scale);
cleanup_R1();
#else
#error "No implementation for nscale8x2 available."
#endif
}
/// Scale two one-byte values by a third one, which is treated as
/// the numerator of a fraction whose demominator is 256.
///
/// In other words, it computes i,j * (scale / 256), ensuring
/// that non-zero values passed in remain non zero, no matter how low the scale
/// argument.
///
/// @warning This function always modifies its arguments in place!
/// @param i first value to scale
/// @param j second value to scale
/// @param scale scale factor, in n/256 units
LIB8STATIC void nscale8x2_video(uint8_t &i, uint8_t &j, fract8 scale) {
#if SCALE8_C == 1
uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
i = (i == 0) ? 0 : (((int)i * (int)(scale)) >> 8) + nonzeroscale;
j = (j == 0) ? 0 : (((int)j * (int)(scale)) >> 8) + nonzeroscale;
#elif SCALE8_AVRASM == 1
nscale8_video_LEAVING_R1_DIRTY(i, scale);
nscale8_video_LEAVING_R1_DIRTY(j, scale);
cleanup_R1();
#else
#error "No implementation for nscale8x2 available."
#endif
}
/// Scale a 16-bit unsigned value by an 8-bit value, which is treated
/// as the numerator of a fraction whose denominator is 256.
///
/// In other words, it computes i * (scale / 256)
/// @param i input value to scale
/// @param scale scale factor, in n/256 units
/// @returns scaled value
LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8(uint16_t i, fract8 scale) {
if (scale == 0) {
return 0; // Fixes non zero output when scale == 0 and
// FASTLED_SCALE8_FIXED==1
}
#if SCALE16BY8_C == 1
uint16_t result;
#if FASTLED_SCALE8_FIXED == 1
result = (((uint32_t)(i) * (1 + ((uint32_t)scale))) >> 8);
#else
result = (i * scale) / 256;
#endif
return result;
#elif SCALE16BY8_AVRASM == 1
#if FASTLED_SCALE8_FIXED == 1
uint16_t result = 0;
asm volatile(
// result.A = HighByte( (i.A x scale) + i.A )
" mul %A[i], %[scale] \n\t"
" add r0, %A[i] \n\t"
// " adc r1, [zero] \n\t"
// " mov %A[result], r1 \n\t"
" adc %A[result], r1 \n\t"
// result.A-B += i.B x scale
" mul %B[i], %[scale] \n\t"
" add %A[result], r0 \n\t"
" adc %B[result], r1 \n\t"
// cleanup r1
" clr __zero_reg__ \n\t"
// result.A-B += i.B
" add %A[result], %B[i] \n\t"
" adc %B[result], __zero_reg__ \n\t"
: [result] "+r"(result)
: [i] "r"(i), [scale] "r"(scale)
: "r0", "r1");
return result;
#else
uint16_t result = 0;
asm volatile(
// result.A = HighByte(i.A x j )
" mul %A[i], %[scale] \n\t"
" mov %A[result], r1 \n\t"
//" clr %B[result] \n\t"
// result.A-B += i.B x j
" mul %B[i], %[scale] \n\t"
" add %A[result], r0 \n\t"
" adc %B[result], r1 \n\t"
// cleanup r1
" clr __zero_reg__ \n\t"
: [result] "+r"(result)
: [i] "r"(i), [scale] "r"(scale)
: "r0", "r1");
return result;
#endif
#else
#error "No implementation for scale16by8 available."
#endif
}
/// Scale a 16-bit unsigned value by an 16-bit value, which is treated
/// as the numerator of a fraction whose denominator is 65536.
/// In other words, it computes i * (scale / 65536)
/// @param i input value to scale
/// @param scale scale factor, in n/65536 units
/// @returns scaled value
LIB8STATIC uint16_t scale16(uint16_t i, fract16 scale) {
#if SCALE16_C == 1
uint16_t result;
#if FASTLED_SCALE8_FIXED == 1
result = ((uint32_t)(i) * (1 + (uint32_t)(scale))) / 65536;
#else
result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
#endif
return result;
#elif SCALE16_AVRASM == 1
#if FASTLED_SCALE8_FIXED == 1
// implemented sort of like
// result = ((i * scale) + i ) / 65536
//
// why not like this, you may ask?
// result = (i * (scale+1)) / 65536
// the answer is that if scale is 65535, then scale+1
// will be zero, which is not what we want.
uint32_t result;
asm volatile(
// result.A-B = i.A x scale.A
" mul %A[i], %A[scale] \n\t"
// save results...
// basic idea:
//" mov %A[result], r0 \n\t"
//" mov %B[result], r1 \n\t"
// which can be written as...
" movw %A[result], r0 \n\t"
// Because we're going to add i.A-B to
// result.A-D, we DO need to keep both
// the r0 and r1 portions of the product
// UNlike in the 'unfixed scale8' version.
// So the movw here is needed.
: [result] "=r"(result)
: [i] "r"(i), [scale] "r"(scale)
: "r0", "r1");
asm volatile(
// result.C-D = i.B x scale.B
" mul %B[i], %B[scale] \n\t"
//" mov %C[result], r0 \n\t"
//" mov %D[result], r1 \n\t"
" movw %C[result], r0 \n\t"
: [result] "+r"(result)
: [i] "r"(i), [scale] "r"(scale)
: "r0", "r1");
const uint8_t zero = 0;
asm volatile(
// result.B-D += i.B x scale.A
" mul %B[i], %A[scale] \n\t"
" add %B[result], r0 \n\t"
" adc %C[result], r1 \n\t"
" adc %D[result], %[zero] \n\t"
// result.B-D += i.A x scale.B
" mul %A[i], %B[scale] \n\t"
" add %B[result], r0 \n\t"
" adc %C[result], r1 \n\t"
" adc %D[result], %[zero] \n\t"
// cleanup r1
" clr r1 \n\t"
: [result] "+r"(result)
: [i] "r"(i), [scale] "r"(scale), [zero] "r"(zero)
: "r0", "r1");
asm volatile(
// result.A-D += i.A-B
" add %A[result], %A[i] \n\t"
" adc %B[result], %B[i] \n\t"
" adc %C[result], %[zero] \n\t"
" adc %D[result], %[zero] \n\t"
: [result] "+r"(result)
: [i] "r"(i), [zero] "r"(zero));
result = result >> 16;
return result;
#else
uint32_t result;
asm volatile(
// result.A-B = i.A x scale.A
" mul %A[i], %A[scale] \n\t"
// save results...
// basic idea:
//" mov %A[result], r0 \n\t"
//" mov %B[result], r1 \n\t"
// which can be written as...
" movw %A[result], r0 \n\t"
// We actually don't need to do anything with r0,
// as result.A is never used again here, so we
// could just move the high byte, but movw is
// one clock cycle, just like mov, so might as
// well, in case we want to use this code for
// a generic 16x16 multiply somewhere.
: [result] "=r"(result)
: [i] "r"(i), [scale] "r"(scale)
: "r0", "r1");
asm volatile(
// result.C-D = i.B x scale.B
" mul %B[i], %B[scale] \n\t"
//" mov %C[result], r0 \n\t"
//" mov %D[result], r1 \n\t"
" movw %C[result], r0 \n\t"
: [result] "+r"(result)
: [i] "r"(i), [scale] "r"(scale)
: "r0", "r1");
const uint8_t zero = 0;
asm volatile(
// result.B-D += i.B x scale.A
" mul %B[i], %A[scale] \n\t"
" add %B[result], r0 \n\t"
" adc %C[result], r1 \n\t"
" adc %D[result], %[zero] \n\t"
// result.B-D += i.A x scale.B
" mul %A[i], %B[scale] \n\t"
" add %B[result], r0 \n\t"
" adc %C[result], r1 \n\t"
" adc %D[result], %[zero] \n\t"
// cleanup r1
" clr r1 \n\t"
: [result] "+r"(result)
: [i] "r"(i), [scale] "r"(scale), [zero] "r"(zero)
: "r0", "r1");
result = result >> 16;
return result;
#endif
#else
#error "No implementation for scale16 available."
#endif
}
/// @} Scaling
/// @defgroup Dimming Dimming and Brightening Functions
/// Functions to dim or brighten data.
///
/// The eye does not respond in a linear way to light.
/// High speed PWM'd LEDs at 50% duty cycle appear far
/// brighter then the "half as bright" you might expect.
///
/// If you want your midpoint brightness LEDs (128) to
/// appear half as bright as "full" brightness (255), you
/// have to apply a "dimming function".
///
/// @note These are approximations of gamma correction with
/// a gamma value of 2.0.
/// @see @ref GammaFuncs
/// @{
/// Adjust a scaling value for dimming.
/// @see scale8()
LIB8STATIC uint8_t dim8_raw(uint8_t x) { return scale8(x, x); }
/// Adjust a scaling value for dimming for video (value will never go below 1)
/// @see scale8_video()
LIB8STATIC uint8_t dim8_video(uint8_t x) { return scale8_video(x, x); }
/// Linear version of the dimming function that halves for values < 128
LIB8STATIC uint8_t dim8_lin(uint8_t x) {
if (x & 0x80) {
x = scale8(x, x);
} else {
x += 1;
x /= 2;
}
return x;
}
/// Brighten a value (inverse of dim8_raw())
LIB8STATIC uint8_t brighten8_raw(uint8_t x) {
uint8_t ix = 255 - x;
return 255 - scale8(ix, ix);
}
/// Brighten a value (inverse of dim8_video())
LIB8STATIC uint8_t brighten8_video(uint8_t x) {
uint8_t ix = 255 - x;
return 255 - scale8_video(ix, ix);
}
/// Brighten a value (inverse of dim8_lin())
LIB8STATIC uint8_t brighten8_lin(uint8_t x) {
uint8_t ix = 255 - x;
if (ix & 0x80) {
ix = scale8(ix, ix);
} else {
ix += 1;
ix /= 2;
}
return 255 - ix;
}
/// @} Dimming
/// @} lib8tion
FASTLED_NAMESPACE_END
#pragma GCC diagnostic pop

View File

@@ -0,0 +1,287 @@
#pragma once
#ifndef __INC_LIB8TION_TRIG_H
#define __INC_LIB8TION_TRIG_H
#include "fl/stdint.h"
#include "lib8tion/lib8static.h"
#include "fl/compiler_control.h"
FL_DISABLE_WARNING_PUSH
FL_DISABLE_WARNING_UNUSED_PARAMETER
FL_DISABLE_WARNING_RETURN_TYPE
FL_DISABLE_WARNING_IMPLICIT_INT_CONVERSION
FL_DISABLE_WARNING_FLOAT_CONVERSION
FL_DISABLE_WARNING_SIGN_CONVERSION
/// @file trig8.h
/// Fast, efficient 8-bit trigonometry functions specifically
/// designed for high-performance LED programming.
/// @ingroup lib8tion
/// @{
/// @defgroup Trig Fast Trigonometry Functions
/// Fast 8-bit and 16-bit approximations of sin(x) and cos(x).
///
/// Don't use these approximations for calculating the
/// trajectory of a rocket to Mars, but they're great
/// for art projects and LED displays.
///
/// On Arduino/AVR, the 16-bit approximation is more than
/// 10X faster than floating point sin(x) and cos(x), while
/// the 8-bit approximation is more than 20X faster.
/// @{
#if defined(USE_SIN_32)
#define sin16 fl::sin16lut
#define cos16 fl::cos16lut
#include "fl/sin32.h"
#elif defined(__AVR__)
/// Platform-independent alias of the fast sin implementation
#define sin16 sin16_avr
/// Fast 16-bit approximation of sin(x). This approximation never varies more
/// than 0.69% from the floating point value you'd get by doing
/// @code{.cpp}
/// float s = sin(x) * 32767.0;
/// @endcode
///
/// @param theta input angle from 0-65535
/// @returns sin of theta, value between -32767 to 32767.
LIB8STATIC int16_t sin16_avr(uint16_t theta) {
static const uint8_t data[] = {
0, 0, 49, 0, 6393 % 256, 6393 / 256, 48, 0,
12539 % 256, 12539 / 256, 44, 0, 18204 % 256, 18204 / 256, 38, 0,
23170 % 256, 23170 / 256, 31, 0, 27245 % 256, 27245 / 256, 23, 0,
30273 % 256, 30273 / 256, 14, 0, 32137 % 256, 32137 / 256, 4 /*,0*/};
uint16_t offset = (theta & 0x3FFF);
// AVR doesn't have a multi-bit shift instruction,
// so if we say "offset >>= 3", gcc makes a tiny loop.
// Inserting empty volatile statements between each
// bit shift forces gcc to unroll the loop.
offset >>= 1; // 0..8191
asm volatile("");
offset >>= 1; // 0..4095
asm volatile("");
offset >>= 1; // 0..2047
if (theta & 0x4000)
offset = 2047 - offset;
uint8_t sectionX4;
sectionX4 = offset / 256;
sectionX4 *= 4;
uint8_t m;
union {
uint16_t b;
struct {
uint8_t blo;
uint8_t bhi;
};
} u;
// in effect u.b = blo + (256 * bhi);
u.blo = data[sectionX4];
u.bhi = data[sectionX4 + 1];
m = data[sectionX4 + 2];
uint8_t secoffset8 = (uint8_t)(offset) / 2;
uint16_t mx = m * secoffset8;
int16_t y = mx + u.b;
if (theta & 0x8000)
y = -y;
return y;
}
#else
/// Platform-independent alias of the fast sin implementation
#define sin16 sin16_C
/// Fast 16-bit approximation of sin(x). This approximation never varies more
/// than 0.69% from the floating point value you'd get by doing
/// @code{.cpp}
/// float s = sin(x) * 32767.0;
/// @endcode
///
/// @param theta input angle from 0-65535
/// @returns sin of theta, value between -32767 to 32767.
LIB8STATIC int16_t sin16_C(uint16_t theta) {
static const uint16_t base[] = {0, 6393, 12539, 18204,
23170, 27245, 30273, 32137};
static const uint8_t slope[] = {49, 48, 44, 38, 31, 23, 14, 4};
uint16_t offset = (theta & 0x3FFF) >> 3; // 0..2047
if (theta & 0x4000)
offset = 2047 - offset;
uint8_t section = offset / 256; // 0..7
uint16_t b = base[section];
uint8_t m = slope[section];
uint8_t secoffset8 = (uint8_t)(offset) / 2;
uint16_t mx = m * secoffset8;
int16_t y = mx + b;
if (theta & 0x8000)
y = -y;
return y;
}
#endif
/// Fast 16-bit approximation of cos(x). This approximation never varies more
/// than 0.69% from the floating point value you'd get by doing
/// @code{.cpp}
/// float s = cos(x) * 32767.0;
/// @endcode
///
/// @param theta input angle from 0-65535
/// @returns cos of theta, value between -32767 to 32767.
#ifndef USE_SIN_32
LIB8STATIC int16_t cos16(uint16_t theta) { return sin16(theta + 16384); }
#endif
///////////////////////////////////////////////////////////////////////
// sin8() and cos8()
// Fast 8-bit approximations of sin(x) & cos(x).
/// Pre-calculated lookup table used in sin8() and cos8() functions
const uint8_t b_m16_interleave[] = {0, 49, 49, 41, 90, 27, 117, 10};
#if defined(__AVR__) && !defined(LIB8_ATTINY)
/// Platform-independent alias of the fast sin implementation
#define sin8 sin8_avr
/// Fast 8-bit approximation of sin(x). This approximation never varies more
/// than 2% from the floating point value you'd get by doing
/// @code{.cpp}
/// float s = (sin(x) * 128.0) + 128;
/// @endcode
///
/// @param theta input angle from 0-255
/// @returns sin of theta, value between 0 and 255
LIB8STATIC uint8_t sin8_avr(uint8_t theta) {
uint8_t offset = theta;
asm volatile("sbrc %[theta],6 \n\t"
"com %[offset] \n\t"
: [theta] "+r"(theta), [offset] "+r"(offset));
offset &= 0x3F; // 0..63
uint8_t secoffset = offset & 0x0F; // 0..15
if (theta & 0x40)
++secoffset;
uint8_t m16;
uint8_t b;
uint8_t section = offset >> 4; // 0..3
uint8_t s2 = section * 2;
const uint8_t *p = b_m16_interleave;
p += s2;
b = *p;
++p;
m16 = *p;
uint8_t mx;
uint8_t xr1;
asm volatile("mul %[m16],%[secoffset] \n\t"
"mov %[mx],r0 \n\t"
"mov %[xr1],r1 \n\t"
"eor r1, r1 \n\t"
"swap %[mx] \n\t"
"andi %[mx],0x0F \n\t"
"swap %[xr1] \n\t"
"andi %[xr1], 0xF0 \n\t"
"or %[mx], %[xr1] \n\t"
: [mx] "=d"(mx), [xr1] "=d"(xr1)
: [m16] "d"(m16), [secoffset] "d"(secoffset));
int8_t y = mx + b;
if (theta & 0x80)
y = -y;
y += 128;
return y;
}
#else
/// Platform-independent alias of the fast sin implementation
#define sin8 sin8_C
/// Fast 8-bit approximation of sin(x). This approximation never varies more
/// than 2% from the floating point value you'd get by doing
/// @code{.cpp}
/// float s = (sin(x) * 128.0) + 128;
/// @endcode
///
/// @param theta input angle from 0-255
/// @returns sin of theta, value between 0 and 255
LIB8STATIC uint8_t sin8_C(uint8_t theta) {
uint8_t offset = theta;
if (theta & 0x40) {
offset = (uint8_t)255 - offset;
}
offset &= 0x3F; // 0..63
uint8_t secoffset = offset & 0x0F; // 0..15
if (theta & 0x40)
++secoffset;
uint8_t section = offset >> 4; // 0..3
uint8_t s2 = section * 2;
const uint8_t *p = b_m16_interleave;
p += s2;
uint8_t b = *p;
++p;
uint8_t m16 = *p;
uint8_t mx = (m16 * secoffset) >> 4;
int8_t y = mx + b;
if (theta & 0x80)
y = -y;
y += 128;
return y;
}
#endif
/// Fast 8-bit approximation of cos(x). This approximation never varies more
/// than 2% from the floating point value you'd get by doing
/// @code{.cpp}
/// float s = (cos(x) * 128.0) + 128;
/// @endcode
///
/// @param theta input angle from 0-255
/// @returns cos of theta, value between 0 and 255
LIB8STATIC uint8_t cos8(uint8_t theta) { return sin8(theta + 64); }
/// @} Trig
/// @} lib8tion
#endif
FL_DISABLE_WARNING_POP

View File

@@ -0,0 +1,47 @@
/// @file types.h
/// Defines fractional types used for lib8tion functions
#pragma once
#include "fl/stdint.h"
#include "fl/int.h"
#include "fl/namespace.h"
FASTLED_NAMESPACE_BEGIN
/// @addtogroup lib8tion
/// @{
/// @defgroup FractionalTypes Fixed-Point Fractional Types.
/// Types for storing fractional data.
/// Note: Fractional types have been moved to fl/int.h
/// @{
/// typedef for IEEE754 "binary32" float type internals
/// @see https://en.wikipedia.org/wiki/IEEE_754
typedef union {
uint32_t i; ///< raw value, as an integer
float f; ///< raw value, as a float
struct {
uint32_t mantissa: 23; ///< 23-bit mantissa
uint32_t exponent: 8; ///< 8-bit exponent
uint32_t signbit: 1; ///< sign bit
};
struct {
uint32_t mant7 : 7; ///< @todo Doc: what is this for?
uint32_t mant16: 16; ///< @todo Doc: what is this for?
uint32_t exp_ : 8; ///< @todo Doc: what is this for?
uint32_t sb_ : 1; ///< @todo Doc: what is this for?
};
struct {
uint32_t mant_lo8 : 8; ///< @todo Doc: what is this for?
uint32_t mant_hi16_exp_lo1 : 16; ///< @todo Doc: what is this for?
uint32_t sb_exphi7 : 8; ///< @todo Doc: what is this for?
};
} IEEE754binary32_t;
/// @} FractionalTypes
/// @} lib8tion
FASTLED_NAMESPACE_END