702 lines
21 KiB
C
702 lines
21 KiB
C
#pragma once
|
|
|
|
#include "lib8tion/config.h"
|
|
#include "scale8.h"
|
|
#include "lib8tion/lib8static.h"
|
|
#include "intmap.h"
|
|
#include "fl/namespace.h"
|
|
|
|
#include "fl/compiler_control.h"
|
|
|
|
|
|
FL_DISABLE_WARNING_PUSH
|
|
FL_DISABLE_WARNING_UNUSED_PARAMETER
|
|
FL_DISABLE_WARNING_RETURN_TYPE
|
|
FL_DISABLE_WARNING_IMPLICIT_INT_CONVERSION
|
|
|
|
FASTLED_NAMESPACE_BEGIN
|
|
|
|
/// @file math8.h
|
|
/// Fast, efficient 8-bit math functions specifically
|
|
/// designed for high-performance LED programming.
|
|
|
|
/// @ingroup lib8tion
|
|
/// @{
|
|
|
|
/// @defgroup Math Basic Math Operations
|
|
/// Fast, efficient 8-bit math functions specifically
|
|
/// designed for high-performance LED programming.
|
|
///
|
|
/// Because of the AVR (Arduino) and ARM assembly language
|
|
/// implementations provided, using these functions often
|
|
/// results in smaller and faster code than the equivalent
|
|
/// program using plain "C" arithmetic and logic.
|
|
/// @{
|
|
|
|
/// Add one byte to another, saturating at 0xFF
|
|
/// @param i first byte to add
|
|
/// @param j second byte to add
|
|
/// @returns the sum of i + j, capped at 0xFF
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t qadd8(uint8_t i, uint8_t j) {
|
|
#if QADD8_C == 1
|
|
unsigned int t = i + j;
|
|
if (t > 255)
|
|
t = 255;
|
|
return static_cast<uint8_t>(t);
|
|
#elif QADD8_AVRASM == 1
|
|
asm volatile(
|
|
/* First, add j to i, conditioning the C flag */
|
|
"add %0, %1 \n\t"
|
|
|
|
/* Now test the C flag.
|
|
If C is clear, we branch around a load of 0xFF into i.
|
|
If C is set, we go ahead and load 0xFF into i.
|
|
*/
|
|
"brcc L_%= \n\t"
|
|
"ldi %0, 0xFF \n\t"
|
|
"L_%=: "
|
|
: "+d"(i) // r16-r31, restricted by ldi
|
|
: "r"(j));
|
|
return i;
|
|
#elif QADD8_ARM_DSP_ASM == 1
|
|
asm volatile("uqadd8 %0, %0, %1" : "+r"(i) : "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for qadd8 available."
|
|
#endif
|
|
}
|
|
|
|
/// Add one byte to another, saturating at 0x7F and -0x80
|
|
/// @param i first byte to add
|
|
/// @param j second byte to add
|
|
/// @returns the sum of i + j, capped at 0x7F and -0x80
|
|
LIB8STATIC_ALWAYS_INLINE int8_t qadd7(int8_t i, int8_t j) {
|
|
#if QADD7_C == 1
|
|
int16_t t = i + j;
|
|
if (t > 127)
|
|
t = 127;
|
|
else if (t < -128)
|
|
t = -128;
|
|
return static_cast<int8_t>(t);
|
|
#elif QADD7_AVRASM == 1
|
|
asm volatile(
|
|
/* First, add j to i, conditioning the V and C flags */
|
|
"add %0, %1 \n\t"
|
|
|
|
/* Now test the V flag.
|
|
If V is clear, we branch to end.
|
|
If V is set, we go ahead and load 0x7F into i.
|
|
*/
|
|
"brvc L_%= \n\t"
|
|
"ldi %0, 0x7F \n\t"
|
|
|
|
/* When both numbers are negative, C is set.
|
|
Adding it to make result negative. */
|
|
"adc %0, __zero_reg__\n\t"
|
|
"L_%=: "
|
|
: "+d"(i) // r16-r31, restricted by ldi
|
|
: "r"(j));
|
|
return i;
|
|
#elif QADD7_ARM_DSP_ASM == 1
|
|
asm volatile("qadd8 %0, %0, %1" : "+r"(i) : "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for qadd7 available."
|
|
#endif
|
|
}
|
|
|
|
/// Subtract one byte from another, saturating at 0x00
|
|
/// @param i byte to subtract from
|
|
/// @param j byte to subtract
|
|
/// @returns i - j with a floor of 0
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t qsub8(uint8_t i, uint8_t j) {
|
|
#if QSUB8_C == 1
|
|
int t = i - j;
|
|
if (t < 0)
|
|
t = 0;
|
|
return static_cast<uint8_t>(t);
|
|
#elif QSUB8_AVRASM == 1
|
|
|
|
asm volatile(
|
|
/* First, subtract j from i, conditioning the C flag */
|
|
"sub %0, %1 \n\t"
|
|
|
|
/* Now test the C flag.
|
|
If C is clear, we branch around a load of 0x00 into i.
|
|
If C is set, we go ahead and load 0x00 into i.
|
|
*/
|
|
"brcc L_%= \n\t"
|
|
"ldi %0, 0x00 \n\t"
|
|
"L_%=: "
|
|
: "+d"(i) // r16-r31, restricted by ldi
|
|
: "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for qsub8 available."
|
|
#endif
|
|
}
|
|
|
|
/// Add one byte to another, with 8-bit result
|
|
/// @note This does not saturate and may overflow!
|
|
/// @param i first byte to add
|
|
/// @param j second byte to add
|
|
/// @returns the sum of i + j, 8-bit
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t add8(uint8_t i, uint8_t j) {
|
|
#if ADD8_C == 1
|
|
int t = i + j;
|
|
return static_cast<uint8_t>(t);
|
|
#elif ADD8_AVRASM == 1
|
|
// Add j to i, period.
|
|
asm volatile("add %0, %1" : "+r"(i) : "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for add8 available."
|
|
#endif
|
|
}
|
|
|
|
/// Add one byte to two bytes, with 16-bit result
|
|
/// @note This does not saturate and may overflow!
|
|
/// @param i first value to add, 8-bit
|
|
/// @param j second value to add, 16-bit
|
|
/// @returns the sum of i + j, 16-bit
|
|
LIB8STATIC_ALWAYS_INLINE uint16_t add8to16(uint8_t i, uint16_t j) {
|
|
#if ADD8_C == 1
|
|
uint16_t t = i + j;
|
|
return t;
|
|
#elif ADD8_AVRASM == 1
|
|
// Add i(one byte) to j(two bytes)
|
|
asm volatile("add %A[j], %[i] \n\t"
|
|
"adc %B[j], __zero_reg__ \n\t"
|
|
: [j] "+r"(j)
|
|
: [i] "r"(i));
|
|
return i;
|
|
#else
|
|
#error "No implementation for add8to16 available."
|
|
#endif
|
|
}
|
|
|
|
/// Subtract one byte from another, 8-bit result
|
|
/// @note This does not saturate and may overflow!
|
|
/// @param i byte to subtract from
|
|
/// @param j byte to subtract
|
|
/// @returns i - j
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t sub8(uint8_t i, uint8_t j) {
|
|
#if SUB8_C == 1
|
|
int t = i - j;
|
|
return static_cast<uint8_t>(t);
|
|
#elif SUB8_AVRASM == 1
|
|
// Subtract j from i, period.
|
|
asm volatile("sub %0, %1" : "+r"(i) : "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for sub8 available."
|
|
#endif
|
|
}
|
|
|
|
/// Calculate an integer average of two unsigned
|
|
/// 8-bit integer values (uint8_t), rounded down.
|
|
/// Fractional results are rounded down, e.g. avg8(20,41) = 30
|
|
/// @param i first value to average
|
|
/// @param j second value to average
|
|
/// @returns mean average of i and j, rounded down
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t avg8(uint8_t i, uint8_t j) {
|
|
#if AVG8_C == 1
|
|
return (i + j) >> 1;
|
|
#elif AVG8_AVRASM == 1
|
|
asm volatile(
|
|
/* First, add j to i, 9th bit overflows into C flag */
|
|
"add %0, %1 \n\t"
|
|
/* Divide by two, moving C flag into high 8th bit */
|
|
"ror %0 \n\t"
|
|
: "+r"(i)
|
|
: "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for avg8 available."
|
|
#endif
|
|
}
|
|
|
|
/// Calculate an integer average of two unsigned
|
|
/// 16-bit integer values (uint16_t), rounded down.
|
|
/// Fractional results are rounded down, e.g. avg16(20,41) = 30
|
|
/// @param i first value to average
|
|
/// @param j second value to average
|
|
/// @returns mean average of i and j, rounded down
|
|
LIB8STATIC_ALWAYS_INLINE uint16_t avg16(uint16_t i, uint16_t j) {
|
|
#if AVG16_C == 1
|
|
// return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
|
|
uint32_t tmp = i;
|
|
tmp += j;
|
|
return static_cast<uint16_t>(tmp >> 1);
|
|
#elif AVG16_AVRASM == 1
|
|
asm volatile(
|
|
/* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
|
|
"add %A[i], %A[j] \n\t"
|
|
/* Now, add C + jHi to iHi, 17th bit overflows into C flag */
|
|
"adc %B[i], %B[j] \n\t"
|
|
/* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now
|
|
in
|
|
C */
|
|
"ror %B[i] \n\t"
|
|
/* Divide iLo by two, moving C flag into high 8th bit */
|
|
"ror %A[i] \n\t"
|
|
: [i] "+r"(i)
|
|
: [j] "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for avg16 available."
|
|
#endif
|
|
}
|
|
|
|
/// Calculate an integer average of two unsigned
|
|
/// 8-bit integer values (uint8_t), rounded up.
|
|
/// Fractional results are rounded up, e.g. avg8r(20,41) = 31
|
|
/// @param i first value to average
|
|
/// @param j second value to average
|
|
/// @returns mean average of i and j, rounded up
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t avg8r(uint8_t i, uint8_t j) {
|
|
#if AVG8R_C == 1
|
|
return (i + j + 1) >> 1;
|
|
#elif AVG8R_AVRASM == 1
|
|
asm volatile(
|
|
/* First, add j to i, 9th bit overflows into C flag */
|
|
"add %0, %1 \n\t"
|
|
/* Divide by two, moving C flag into high 8th bit, old 1st bit now in C
|
|
*/
|
|
"ror %0 \n\t"
|
|
/* Add C flag */
|
|
"adc %0, __zero_reg__\n\t"
|
|
: "+r"(i)
|
|
: "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for avg8r available."
|
|
#endif
|
|
}
|
|
|
|
/// Calculate an integer average of two unsigned
|
|
/// 16-bit integer values (uint16_t), rounded up.
|
|
/// Fractional results are rounded up, e.g. avg16r(20,41) = 31
|
|
/// @param i first value to average
|
|
/// @param j second value to average
|
|
/// @returns mean average of i and j, rounded up
|
|
LIB8STATIC_ALWAYS_INLINE uint16_t avg16r(uint16_t i, uint16_t j) {
|
|
#if AVG16R_C == 1
|
|
// return (uint32_t)((uint32_t)(i) + (uint32_t)(j) + 1) >> 1;
|
|
uint32_t tmp = i;
|
|
tmp += j;
|
|
tmp += 1;
|
|
return static_cast<uint16_t>(tmp >> 1);
|
|
#elif AVG16R_AVRASM == 1
|
|
asm volatile(
|
|
/* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
|
|
"add %A[i], %A[j] \n\t"
|
|
/* Now, add C + jHi to iHi, 17th bit overflows into C flag */
|
|
"adc %B[i], %B[j] \n\t"
|
|
/* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now
|
|
in
|
|
C */
|
|
"ror %B[i] \n\t"
|
|
/* Divide iLo by two, moving C flag into high 8th bit, old 1st bit now
|
|
in
|
|
C */
|
|
"ror %A[i] \n\t"
|
|
/* Add C flag */
|
|
"adc %A[i], __zero_reg__\n\t"
|
|
"adc %B[i], __zero_reg__\n\t"
|
|
: [i] "+r"(i)
|
|
: [j] "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for avg16r available."
|
|
#endif
|
|
}
|
|
|
|
/// Calculate an integer average of two signed 7-bit
|
|
/// integers (int8_t).
|
|
/// If the first argument is even, result is rounded down.
|
|
/// If the first argument is odd, result is rounded up.
|
|
/// @param i first value to average
|
|
/// @param j second value to average
|
|
/// @returns mean average of i and j, rounded
|
|
LIB8STATIC_ALWAYS_INLINE int8_t avg7(int8_t i, int8_t j) {
|
|
#if AVG7_C == 1
|
|
return (i >> 1) + (j >> 1) + (i & 0x1);
|
|
#elif AVG7_AVRASM == 1
|
|
asm volatile("asr %1 \n\t"
|
|
"asr %0 \n\t"
|
|
"adc %0, %1 \n\t"
|
|
: "+r"(i)
|
|
: "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for avg7 available."
|
|
#endif
|
|
}
|
|
|
|
/// Calculate an integer average of two signed 15-bit
|
|
/// integers (int16_t).
|
|
/// If the first argument is even, result is rounded down.
|
|
/// If the first argument is odd, result is rounded up.
|
|
/// @param i first value to average
|
|
/// @param j second value to average
|
|
/// @returns mean average of i and j, rounded
|
|
LIB8STATIC_ALWAYS_INLINE int16_t avg15(int16_t i, int16_t j) {
|
|
#if AVG15_C == 1
|
|
return (i >> 1) + (j >> 1) + (i & 0x1);
|
|
#elif AVG15_AVRASM == 1
|
|
asm volatile(
|
|
/* first divide j by 2, throwing away lowest bit */
|
|
"asr %B[j] \n\t"
|
|
"ror %A[j] \n\t"
|
|
/* now divide i by 2, with lowest bit going into C */
|
|
"asr %B[i] \n\t"
|
|
"ror %A[i] \n\t"
|
|
/* add j + C to i */
|
|
"adc %A[i], %A[j] \n\t"
|
|
"adc %B[i], %B[j] \n\t"
|
|
: [i] "+r"(i)
|
|
: [j] "r"(j));
|
|
return i;
|
|
#else
|
|
#error "No implementation for avg15 available."
|
|
#endif
|
|
}
|
|
|
|
/// Calculate the remainder of one unsigned 8-bit
|
|
/// value divided by anoter, aka A % M.
|
|
/// Implemented by repeated subtraction, which is
|
|
/// very compact, and very fast if A is "probably"
|
|
/// less than M. If A is a large multiple of M,
|
|
/// the loop has to execute multiple times. However,
|
|
/// even in that case, the loop is only two
|
|
/// instructions long on AVR, i.e., quick.
|
|
/// @param a dividend byte
|
|
/// @param m divisor byte
|
|
/// @returns remainder of a / m (i.e. a % m)
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t mod8(uint8_t a, uint8_t m) {
|
|
#if defined(__AVR__)
|
|
asm volatile("L_%=: sub %[a],%[m] \n\t"
|
|
" brcc L_%= \n\t"
|
|
" add %[a],%[m] \n\t"
|
|
: [a] "+r"(a)
|
|
: [m] "r"(m));
|
|
#else
|
|
while (a >= m)
|
|
a -= m;
|
|
#endif
|
|
return a;
|
|
}
|
|
|
|
/// Add two numbers, and calculate the modulo
|
|
/// of the sum and a third number, M.
|
|
/// In other words, it returns (A+B) % M.
|
|
/// It is designed as a compact mechanism for
|
|
/// incrementing a "mode" switch and wrapping
|
|
/// around back to "mode 0" when the switch
|
|
/// goes past the end of the available range.
|
|
/// e.g. if you have seven modes, this switches
|
|
/// to the next one and wraps around if needed:
|
|
/// @code{.cpp}
|
|
/// mode = addmod8( mode, 1, 7);
|
|
/// @endcode
|
|
/// @param a dividend byte
|
|
/// @param b value to add to the dividend
|
|
/// @param m divisor byte
|
|
/// @returns remainder of (a + b) / m
|
|
/// @see mod8() for notes on performance.
|
|
LIB8STATIC uint8_t addmod8(uint8_t a, uint8_t b, uint8_t m) {
|
|
#if defined(__AVR__)
|
|
asm volatile(" add %[a],%[b] \n\t"
|
|
"L_%=: sub %[a],%[m] \n\t"
|
|
" brcc L_%= \n\t"
|
|
" add %[a],%[m] \n\t"
|
|
: [a] "+r"(a)
|
|
: [b] "r"(b), [m] "r"(m));
|
|
#else
|
|
a += b;
|
|
while (a >= m)
|
|
a -= m;
|
|
#endif
|
|
return a;
|
|
}
|
|
|
|
/// Subtract two numbers, and calculate the modulo
|
|
/// of the difference and a third number, M.
|
|
/// In other words, it returns (A-B) % M.
|
|
/// It is designed as a compact mechanism for
|
|
/// decrementing a "mode" switch and wrapping
|
|
/// around back to "mode 0" when the switch
|
|
/// goes past the start of the available range.
|
|
/// e.g. if you have seven modes, this switches
|
|
/// to the previous one and wraps around if needed:
|
|
/// @code{.cpp}
|
|
/// mode = submod8( mode, 1, 7);
|
|
/// @endcode
|
|
/// @param a dividend byte
|
|
/// @param b value to subtract from the dividend
|
|
/// @param m divisor byte
|
|
/// @returns remainder of (a - b) / m
|
|
/// @see mod8() for notes on performance.
|
|
LIB8STATIC uint8_t submod8(uint8_t a, uint8_t b, uint8_t m) {
|
|
#if defined(__AVR__)
|
|
asm volatile(" sub %[a],%[b] \n\t"
|
|
"L_%=: sub %[a],%[m] \n\t"
|
|
" brcc L_%= \n\t"
|
|
" add %[a],%[m] \n\t"
|
|
: [a] "+r"(a)
|
|
: [b] "r"(b), [m] "r"(m));
|
|
#else
|
|
a -= b;
|
|
while (a >= m)
|
|
a -= m;
|
|
#endif
|
|
return a;
|
|
}
|
|
|
|
/// 8x8 bit multiplication, with 8-bit result.
|
|
/// @param i first byte to multiply
|
|
/// @param j second byte to multiply
|
|
/// @returns the product of i * j
|
|
/// @note This does not saturate and may overflow!
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t mul8(uint8_t i, uint8_t j) {
|
|
#if MUL8_C == 1
|
|
return ((int)i * (int)(j)) & 0xFF;
|
|
#elif MUL8_AVRASM == 1
|
|
asm volatile(
|
|
/* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
|
|
"mul %0, %1 \n\t"
|
|
/* Extract the LOW 8-bits (r0) */
|
|
"mov %0, r0 \n\t"
|
|
/* Restore r1 to "0"; it's expected to always be that */
|
|
"clr __zero_reg__ \n\t"
|
|
: "+r"(i)
|
|
: "r"(j)
|
|
: "r0", "r1");
|
|
return i;
|
|
#else
|
|
#error "No implementation for mul8 available."
|
|
#endif
|
|
}
|
|
|
|
/// 8x8 bit multiplication with 8-bit result, saturating at 0xFF.
|
|
/// @param i first byte to multiply
|
|
/// @param j second byte to multiply
|
|
/// @returns the product of i * j, capping at 0xFF
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t qmul8(uint8_t i, uint8_t j) {
|
|
#if QMUL8_C == 1
|
|
unsigned p = (unsigned)i * (unsigned)j;
|
|
if (p > 255)
|
|
p = 255;
|
|
return p;
|
|
#elif QMUL8_AVRASM == 1
|
|
asm volatile(
|
|
/* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
|
|
" mul %0, %1 \n\t"
|
|
/* Extract the LOW 8-bits (r0) */
|
|
" mov %0, r0 \n\t"
|
|
/* If high byte of result is zero, all is well. */
|
|
" tst r1 \n\t"
|
|
" breq Lnospill_%= \n\t"
|
|
/* If high byte of result > 0, saturate to 0xFF */
|
|
" ldi %0, 0xFF \n\t"
|
|
"Lnospill_%=: \n\t"
|
|
/* Restore r1 to "0"; it's expected to always be that */
|
|
" clr __zero_reg__ \n\t"
|
|
: "+d"(i) // r16-r31, restricted by ldi
|
|
: "r"(j)
|
|
: "r0", "r1");
|
|
return i;
|
|
#else
|
|
#error "No implementation for qmul8 available."
|
|
#endif
|
|
}
|
|
|
|
/// Take the absolute value of a signed 8-bit uint8_t.
|
|
LIB8STATIC_ALWAYS_INLINE int8_t abs8(int8_t i) {
|
|
#if ABS8_C == 1
|
|
if (i < 0)
|
|
i = -i;
|
|
return i;
|
|
#elif ABS8_AVRASM == 1
|
|
asm volatile(
|
|
/* First, check the high bit, and prepare to skip if it's clear */
|
|
"sbrc %0, 7 \n"
|
|
|
|
/* Negate the value */
|
|
"neg %0 \n"
|
|
|
|
: "+r"(i)
|
|
: "r"(i));
|
|
return i;
|
|
#else
|
|
#error "No implementation for abs8 available."
|
|
#endif
|
|
}
|
|
|
|
/// Square root for 16-bit integers.
|
|
/// About three times faster and five times smaller
|
|
/// than Arduino's general `sqrt` on AVR.
|
|
LIB8STATIC uint8_t sqrt16(uint16_t x) {
|
|
if (x <= 1) {
|
|
return x;
|
|
}
|
|
|
|
uint8_t low = 1; // lower bound
|
|
uint8_t hi, mid;
|
|
|
|
if (x > 7904) {
|
|
hi = 255;
|
|
} else {
|
|
hi = (x >> 5) + 8; // initial estimate for upper bound
|
|
}
|
|
|
|
do {
|
|
mid = (low + hi) >> 1;
|
|
if ((uint16_t)(mid * mid) > x) {
|
|
hi = mid - 1;
|
|
} else {
|
|
if (mid == 255) {
|
|
return 255;
|
|
}
|
|
low = mid + 1;
|
|
}
|
|
} while (hi >= low);
|
|
|
|
return low - 1;
|
|
}
|
|
|
|
LIB8STATIC_ALWAYS_INLINE uint8_t sqrt8(uint8_t x) {
|
|
return sqrt16(map8_to_16(x));
|
|
}
|
|
|
|
/// Blend a variable proportion (0-255) of one byte to another.
|
|
/// @param a the starting byte value
|
|
/// @param b the byte value to blend toward
|
|
/// @param amountOfB the proportion (0-255) of b to blend
|
|
/// @returns a byte value between a and b, inclusive
|
|
#if (FASTLED_BLEND_FIXED == 1)
|
|
LIB8STATIC uint8_t blend8(uint8_t a, uint8_t b, uint8_t amountOfB) {
|
|
|
|
// The BLEND_FIXED formula is
|
|
//
|
|
// result = ( A*(amountOfA) + B*(amountOfB) )/ 256
|
|
//
|
|
// …where amountOfA = 255-amountOfB.
|
|
//
|
|
// This formula will never return 255, which is why the BLEND_FIXED +
|
|
// SCALE8_FIXED version is
|
|
//
|
|
// result = ( A*(amountOfA) + A + B*(amountOfB) + B ) / 256
|
|
//
|
|
// We can rearrange this formula for some great optimisations.
|
|
//
|
|
// result = ( A*(amountOfA) + A + B*(amountOfB) + B ) / 256
|
|
// = ( A*(255-amountOfB) + A + B*(amountOfB) + B ) / 256
|
|
// = ( A*(256-amountOfB) + B*(amountOfB) + B ) / 256
|
|
// = ( A*256 + B + B*(amountOfB) - A*(amountOfB) ) / 256 // this
|
|
// is the version used in SCALE8_FIXED AVR below = ( A*256 + B +
|
|
// (B-A)*(amountOfB) ) / 256 // this is the version
|
|
// used in SCALE8_FIXED C below
|
|
|
|
uint16_t partial;
|
|
uint8_t result;
|
|
|
|
#if BLEND8_C == 1
|
|
|
|
#if (FASTLED_SCALE8_FIXED == 1)
|
|
partial = (a << 8) | b; // A*256 + B
|
|
|
|
// on many platforms this compiles to a single multiply of (B-A) * amountOfB
|
|
partial += (b * amountOfB);
|
|
partial -= (a * amountOfB);
|
|
|
|
#else
|
|
uint8_t amountOfA = 255 - amountOfB;
|
|
|
|
// on the other hand, this compiles to two multiplies, and gives the "wrong"
|
|
// answer :]
|
|
partial = (a * amountOfA);
|
|
partial += (b * amountOfB);
|
|
#endif
|
|
|
|
result = partial >> 8;
|
|
|
|
return result;
|
|
|
|
#elif BLEND8_AVRASM == 1
|
|
|
|
#if (FASTLED_SCALE8_FIXED == 1)
|
|
|
|
// 1 or 2 cycles depending on how the compiler optimises
|
|
partial = (a << 8) | b;
|
|
|
|
// 7 cycles
|
|
asm volatile(" mul %[a], %[amountOfB] \n\t"
|
|
" sub %A[partial], r0 \n\t"
|
|
" sbc %B[partial], r1 \n\t"
|
|
" mul %[b], %[amountOfB] \n\t"
|
|
" add %A[partial], r0 \n\t"
|
|
" adc %B[partial], r1 \n\t"
|
|
" clr __zero_reg__ \n\t"
|
|
: [partial] "+r"(partial)
|
|
: [amountOfB] "r"(amountOfB), [a] "r"(a), [b] "r"(b)
|
|
: "r0", "r1");
|
|
|
|
#else
|
|
|
|
// non-SCALE8-fixed version
|
|
|
|
// 7 cycles
|
|
asm volatile(
|
|
/* partial = b * amountOfB */
|
|
" mul %[b], %[amountOfB] \n\t"
|
|
" movw %A[partial], r0 \n\t"
|
|
|
|
/* amountOfB (aka amountOfA) = 255 - amountOfB */
|
|
" com %[amountOfB] \n\t"
|
|
|
|
/* partial += a * amountOfB (aka amountOfA) */
|
|
" mul %[a], %[amountOfB] \n\t"
|
|
|
|
" add %A[partial], r0 \n\t"
|
|
" adc %B[partial], r1 \n\t"
|
|
|
|
" clr __zero_reg__ \n\t"
|
|
|
|
: [partial] "=r"(partial), [amountOfB] "+r"(amountOfB)
|
|
: [a] "r"(a), [b] "r"(b)
|
|
: "r0", "r1");
|
|
|
|
#endif
|
|
|
|
result = partial >> 8;
|
|
|
|
return result;
|
|
|
|
#else
|
|
#error "No implementation for blend8 available."
|
|
#endif
|
|
}
|
|
|
|
#else
|
|
LIB8STATIC uint8_t blend8(uint8_t a, uint8_t b, uint8_t amountOfB) {
|
|
// This version loses precision in the integer math
|
|
// and can actually return results outside of the range
|
|
// from a to b. Its use is not recommended.
|
|
uint8_t result;
|
|
uint8_t amountOfA = 255 - amountOfB;
|
|
result = scale8_LEAVING_R1_DIRTY(a, amountOfA) +
|
|
scale8_LEAVING_R1_DIRTY(b, amountOfB);
|
|
cleanup_R1();
|
|
return result;
|
|
}
|
|
#endif
|
|
|
|
/// @} Math
|
|
/// @} lib8tion
|
|
|
|
FASTLED_NAMESPACE_END
|
|
|
|
FL_DISABLE_WARNING_POP
|