Convert custom maths to inlines (#10728)
This commit is contained in:
		| @@ -23,92 +23,95 @@ | ||||
| #ifndef _MATH_AVR_H_ | ||||
| #define _MATH_AVR_H_ | ||||
|  | ||||
| #define a(CODE) " " CODE "\n\t" | ||||
|  | ||||
| /** | ||||
|  * Optimized math functions for AVR | ||||
|  */ | ||||
|  | ||||
| // intRes = longIn1 * longIn2 >> 24 | ||||
| // uses: | ||||
| // r26 to store 0 | ||||
| // r27 to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result. | ||||
| // A[tmp] to store 0 | ||||
| // B[tmp] to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result. | ||||
| // note that the lower two bytes and the upper byte of the 48bit result are not calculated. | ||||
| // this can cause the result to be out by one as the lower bytes may cause carries into the upper ones. | ||||
| // B0 A0 are bits 24-39 and are the returned value | ||||
| // C1 B1 A1 is longIn1 | ||||
| // D2 C2 B2 A2 is longIn2 | ||||
| // B A are bits 24-39 and are the returned value | ||||
| // C B A is longIn1 | ||||
| // D C B A is longIn2 | ||||
| // | ||||
| #define MultiU24X32toH16(intRes, longIn1, longIn2) \ | ||||
|   asm volatile ( \ | ||||
|                  A("clr r26")      \ | ||||
|                  A("mul %A1, %B2") \ | ||||
|                  A("mov r27, r1")  \ | ||||
|                  A("mul %B1, %C2") \ | ||||
|                  A("movw %A0, r0") \ | ||||
|                  A("mul %C1, %C2") \ | ||||
|                  A("add %B0, r0")  \ | ||||
|                  A("mul %C1, %B2") \ | ||||
|                  A("add %A0, r0")  \ | ||||
|                  A("adc %B0, r1")  \ | ||||
|                  A("mul %A1, %C2") \ | ||||
|                  A("add r27, r0")  \ | ||||
|                  A("adc %A0, r1")  \ | ||||
|                  A("adc %B0, r26") \ | ||||
|                  A("mul %B1, %B2") \ | ||||
|                  A("add r27, r0")  \ | ||||
|                  A("adc %A0, r1")  \ | ||||
|                  A("adc %B0, r26") \ | ||||
|                  A("mul %C1, %A2") \ | ||||
|                  A("add r27, r0")  \ | ||||
|                  A("adc %A0, r1")  \ | ||||
|                  A("adc %B0, r26") \ | ||||
|                  A("mul %B1, %A2") \ | ||||
|                  A("add r27, r1")  \ | ||||
|                  A("adc %A0, r26") \ | ||||
|                  A("adc %B0, r26") \ | ||||
|                  A("lsr r27")      \ | ||||
|                  A("adc %A0, r26") \ | ||||
|                  A("adc %B0, r26") \ | ||||
|                  A("mul %D2, %A1") \ | ||||
|                  A("add %A0, r0")  \ | ||||
|                  A("adc %B0, r1")  \ | ||||
|                  A("mul %D2, %B1") \ | ||||
|                  A("add %B0, r0")  \ | ||||
|                  A("clr r1")       \ | ||||
|                  : \ | ||||
|                  "=&r" (intRes) \ | ||||
|                  : \ | ||||
|                  "d" (longIn1), \ | ||||
|                  "d" (longIn2) \ | ||||
|                  : \ | ||||
|                  "r26" , "r27" \ | ||||
|                ) | ||||
| static FORCE_INLINE uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2) { | ||||
|   register uint8_t tmp1; | ||||
|   register uint8_t tmp2; | ||||
|   register uint16_t intRes; | ||||
|   __asm__ __volatile__( | ||||
|     A("clr %[tmp1]") | ||||
|     A("mul %A[longIn1], %B[longIn2]") | ||||
|     A("mov %[tmp2], r1") | ||||
|     A("mul %B[longIn1], %C[longIn2]") | ||||
|     A("movw %A[intRes], r0") | ||||
|     A("mul %C[longIn1], %C[longIn2]") | ||||
|     A("add %B[intRes], r0") | ||||
|     A("mul %C[longIn1], %B[longIn2]") | ||||
|     A("add %A[intRes], r0") | ||||
|     A("adc %B[intRes], r1") | ||||
|     A("mul %A[longIn1], %C[longIn2]") | ||||
|     A("add %[tmp2], r0") | ||||
|     A("adc %A[intRes], r1") | ||||
|     A("adc %B[intRes], %[tmp1]") | ||||
|     A("mul %B[longIn1], %B[longIn2]") | ||||
|     A("add %[tmp2], r0") | ||||
|     A("adc %A[intRes], r1") | ||||
|     A("adc %B[intRes], %[tmp1]") | ||||
|     A("mul %C[longIn1], %A[longIn2]") | ||||
|     A("add %[tmp2], r0") | ||||
|     A("adc %A[intRes], r1") | ||||
|     A("adc %B[intRes], %[tmp1]") | ||||
|     A("mul %B[longIn1], %A[longIn2]") | ||||
|     A("add %[tmp2], r1") | ||||
|     A("adc %A[intRes], %[tmp1]") | ||||
|     A("adc %B[intRes], %[tmp1]") | ||||
|     A("lsr %[tmp2]") | ||||
|     A("adc %A[intRes], %[tmp1]") | ||||
|     A("adc %B[intRes], %[tmp1]") | ||||
|     A("mul %D[longIn2], %A[longIn1]") | ||||
|     A("add %A[intRes], r0") | ||||
|     A("adc %B[intRes], r1") | ||||
|     A("mul %D[longIn2], %B[longIn1]") | ||||
|     A("add %B[intRes], r0") | ||||
|     A("clr r1") | ||||
|       : [intRes] "=&r" (intRes), | ||||
|         [tmp1] "=&r" (tmp1), | ||||
|         [tmp2] "=&r" (tmp2) | ||||
|       : [longIn1] "d" (longIn1), | ||||
|         [longIn2] "d" (longIn2) | ||||
|       : "cc" | ||||
|   ); | ||||
|   return intRes; | ||||
| } | ||||
|  | ||||
| // intRes = intIn1 * intIn2 >> 16 | ||||
| // uses: | ||||
| // r26 to store 0 | ||||
| // r27 to store the byte 1 of the 24 bit result | ||||
| #define MultiU16X8toH16(intRes, charIn1, intIn2) \ | ||||
|   asm volatile ( \ | ||||
|                  A("clr r26")      \ | ||||
|                  A("mul %A1, %B2") \ | ||||
|                  A("movw %A0, r0") \ | ||||
|                  A("mul %A1, %A2") \ | ||||
|                  A("add %A0, r1")  \ | ||||
|                  A("adc %B0, r26") \ | ||||
|                  A("lsr r0")       \ | ||||
|                  A("adc %A0, r26") \ | ||||
|                  A("adc %B0, r26") \ | ||||
|                  A("clr r1")       \ | ||||
|                  : \ | ||||
|                  "=&r" (intRes) \ | ||||
|                  : \ | ||||
|                  "d" (charIn1), \ | ||||
|                  "d" (intIn2) \ | ||||
|                  : \ | ||||
|                  "r26" \ | ||||
|                ) | ||||
|  | ||||
| static FORCE_INLINE uint16_t MultiU16X8toH16(uint8_t charIn1, uint16_t intIn2) { | ||||
|   register uint8_t tmp; | ||||
|   register uint16_t intRes; | ||||
|   __asm__ __volatile__ ( | ||||
|     A("clr %[tmp]") | ||||
|     A("mul %[charIn1], %B[intIn2]") | ||||
|     A("movw %A[intRes], r0") | ||||
|     A("mul %[charIn1], %A[intIn2]") | ||||
|     A("add %A[intRes], r1") | ||||
|     A("adc %B[intRes], %[tmp]") | ||||
|     A("lsr r0") | ||||
|     A("adc %A[intRes], %[tmp]") | ||||
|     A("adc %B[intRes], %[tmp]") | ||||
|     A("clr r1") | ||||
|       : [intRes] "=&r" (intRes), | ||||
|         [tmp] "=&r" (tmp) | ||||
|       : [charIn1] "d" (charIn1), | ||||
|         [intIn2] "d" (intIn2) | ||||
|       : "cc" | ||||
|   ); | ||||
|   return intRes; | ||||
| } | ||||
|  | ||||
| #endif // _MATH_AVR_H_ | ||||
|   | ||||
| @@ -23,11 +23,13 @@ | ||||
| #ifndef MATH_32BIT_H | ||||
| #define MATH_32BIT_H | ||||
|  | ||||
| #include "../core/macros.h" | ||||
|  | ||||
| /** | ||||
|  * Math helper functions for 32 bit CPUs | ||||
|  */ | ||||
|  | ||||
| #define MultiU32X32toH32(intRes, longIn1, longIn2) intRes = ((uint64_t)longIn1 * longIn2 + 0x80000000) >> 32 | ||||
| #define MultiU32X24toH32(intRes, longIn1, longIn2) intRes = ((uint64_t)longIn1 * longIn2 + 0x00800000) >> 24 | ||||
| static FORCE_INLINE uint32_t MultiU32X24toH32(uint32_t longIn1, uint32_t longIn2) { | ||||
|   return ((uint64_t)longIn1 * longIn2 + 0x00800000) >> 24; | ||||
| } | ||||
|  | ||||
| #endif // MATH_32BIT_H | ||||
|   | ||||
| @@ -1158,6 +1158,12 @@ HAL_STEP_TIMER_ISR { | ||||
|   HAL_timer_isr_epilogue(STEP_TIMER_NUM); | ||||
| } | ||||
|  | ||||
| #ifdef CPU_32_BIT | ||||
|   #define STEP_MULTIPLY(A,B) MultiU32X24toH32(A, B); | ||||
| #else | ||||
|   #define STEP_MULTIPLY(A,B) MultiU24X32toH16(A, B); | ||||
| #endif | ||||
|  | ||||
| void Stepper::isr() { | ||||
|  | ||||
|   #define ENDSTOP_NOMINAL_OCR_VAL 1500 * HAL_TICKS_PER_US // Check endstops every 1.5ms to guarantee two stepper ISRs within 5ms for BLTouch | ||||
| @@ -1525,14 +1531,7 @@ void Stepper::isr() { | ||||
|           ? _eval_bezier_curve(acceleration_time) | ||||
|           : current_block->cruise_rate; | ||||
|     #else | ||||
|       #ifdef CPU_32_BIT | ||||
|         MultiU32X24toH32(acc_step_rate, acceleration_time, current_block->acceleration_rate); | ||||
|       #else | ||||
|         MultiU24X32toH16(acc_step_rate, acceleration_time, current_block->acceleration_rate); | ||||
|       #endif | ||||
|       acc_step_rate += current_block->initial_rate; | ||||
|  | ||||
|       // upper limit | ||||
|       acc_step_rate = STEP_MULTIPLY(acceleration_time, current_block->acceleration_rate) + current_block->initial_rate; | ||||
|       NOMORE(acc_step_rate, current_block->nominal_rate); | ||||
|     #endif | ||||
|  | ||||
| @@ -1576,18 +1575,14 @@ void Stepper::isr() { | ||||
|     #else | ||||
|  | ||||
|       // Using the old trapezoidal control | ||||
|       #ifdef CPU_32_BIT | ||||
|         MultiU32X24toH32(step_rate, deceleration_time, current_block->acceleration_rate); | ||||
|       #else | ||||
|         MultiU24X32toH16(step_rate, deceleration_time, current_block->acceleration_rate); | ||||
|       #endif | ||||
|  | ||||
|       step_rate = STEP_MULTIPLY(deceleration_time, current_block->acceleration_rate); | ||||
|       if (step_rate < acc_step_rate) { // Still decelerating? | ||||
|         step_rate = acc_step_rate - step_rate; | ||||
|         NOLESS(step_rate, current_block->final_rate); | ||||
|       } | ||||
|       else | ||||
|         step_rate = current_block->final_rate; | ||||
|  | ||||
|     #endif | ||||
|  | ||||
|     // step_rate to timer interval | ||||
|   | ||||
| @@ -340,24 +340,24 @@ class Stepper { | ||||
|  | ||||
|       #ifdef CPU_32_BIT | ||||
|         // In case of high-performance processor, it is able to calculate in real-time | ||||
|         const uint32_t MIN_TIME_PER_STEP = (HAL_STEPPER_TIMER_RATE) / ((STEP_DOUBLER_FREQUENCY) * 2); | ||||
|         const uint32_t min_time_per_step = (HAL_STEPPER_TIMER_RATE) / ((STEP_DOUBLER_FREQUENCY) * 2); | ||||
|         timer = uint32_t(HAL_STEPPER_TIMER_RATE) / step_rate; | ||||
|         NOLESS(timer, MIN_TIME_PER_STEP); // (STEP_DOUBLER_FREQUENCY * 2 kHz - this should never happen) | ||||
|         NOLESS(timer, min_time_per_step); // (STEP_DOUBLER_FREQUENCY * 2 kHz - this should never happen) | ||||
|       #else | ||||
|         NOLESS(step_rate, F_CPU / 500000); | ||||
|         step_rate -= F_CPU / 500000; // Correct for minimal speed | ||||
|         if (step_rate >= (8 * 256)) { // higher step rate | ||||
|           unsigned short table_address = (unsigned short)&speed_lookuptable_fast[(unsigned char)(step_rate >> 8)][0]; | ||||
|           unsigned char tmp_step_rate = (step_rate & 0x00FF); | ||||
|           unsigned short gain = (unsigned short)pgm_read_word_near(table_address + 2); | ||||
|           MultiU16X8toH16(timer, tmp_step_rate, gain); | ||||
|           timer = (unsigned short)pgm_read_word_near(table_address) - timer; | ||||
|           uint8_t tmp_step_rate = (step_rate & 0x00FF); | ||||
|           uint16_t table_address = (uint16_t)&speed_lookuptable_fast[(uint8_t)(step_rate >> 8)][0]; | ||||
|           uint16_t gain = (uint16_t)pgm_read_word_near(table_address + 2); | ||||
|           timer = MultiU16X8toH16(tmp_step_rate, gain); | ||||
|           timer = (uint16_t)pgm_read_word_near(table_address) - timer; | ||||
|         } | ||||
|         else { // lower step rates | ||||
|           unsigned short table_address = (unsigned short)&speed_lookuptable_slow[0][0]; | ||||
|           uint16_t table_address = (uint16_t)&speed_lookuptable_slow[0][0]; | ||||
|           table_address += ((step_rate) >> 1) & 0xFFFC; | ||||
|           timer = (unsigned short)pgm_read_word_near(table_address); | ||||
|           timer -= (((unsigned short)pgm_read_word_near(table_address + 2) * (unsigned char)(step_rate & 0x0007)) >> 3); | ||||
|           timer = (uint16_t)pgm_read_word_near(table_address); | ||||
|           timer -= (((uint16_t)pgm_read_word_near(table_address + 2) * (uint8_t)(step_rate & 0x0007)) >> 3); | ||||
|         } | ||||
|         if (timer < 100) { // (20kHz - this should never happen) | ||||
|           timer = 100; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user