Better handling of DELAY_NS and DELAY_US (#10716)

Co-Authored-By: ejtagle <ejtagle@hotmail.com>
This commit is contained in:
Scott Lahteine
2018-05-12 08:34:04 -05:00
committed by GitHub
parent f5aaa2d6c0
commit a1062eec5b
21 changed files with 273 additions and 268 deletions

133
Marlin/src/HAL/Delay.h Normal file
View File

@@ -0,0 +1,133 @@
/**
* Marlin 3D Printer Firmware
* Copyright (C) 2016 MarlinFirmware [https://github.com/MarlinFirmware/Marlin]
*
* Based on Sprinter and grbl.
* Copyright (C) 2011 Camiel Gubbels / Erik van der Zalm
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
/**
Busy wait delay Cycles routines:
DELAY_CYCLES(count): Delay execution in cycles
DELAY_NS(count): Delay execution in nanoseconds
DELAY_US(count): Delay execution in microseconds
*/
#ifndef MARLIN_DELAY_H
#define MARLIN_DELAY_H
#if defined(__arm__) || defined(__thumb__)
/* https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles */
#define nop() __asm__ __volatile__("nop;\n\t":::)
FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
#if ARCH_PIPELINE_RELOAD_CYCLES < 2
#define EXTRA_NOP_CYCLES A("nop")
#else
#define EXTRA_NOP_CYCLES ""
#endif
__asm__ __volatile__(
A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
L("1")
A("subs %[cnt],#1")
EXTRA_NOP_CYCLES
A("bne 1b")
: [cnt]"+r"(cy) // output: +r means input+output
: // input:
: "cc" // clobbers:
);
}
/* ---------------- Delay in cycles */
FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
if (__builtin_constant_p(x)) {
#define MAXNOPS 4
if (x <= (MAXNOPS)) {
switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
}
else { // because of +1 cycle inside delay_4cycles
const uint32_t rem = (x - 1) % (MAXNOPS);
switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
if ((x = (x - 1) / (MAXNOPS)))
__delay_4cycles(x); // if need more then 4 nop loop is more optimal
}
#undef MAXNOPS
}
else
__delay_4cycles(x / 4);
}
#undef nop
#elif defined(__AVR__)
#define nop() __asm__ __volatile__("nop;\n\t":::)
FORCE_INLINE static void __delay_4cycles(uint8_t cy) {
__asm__ __volatile__(
L("1")
A("dec %[cnt]")
A("nop")
A("brne 1b")
: [cnt] "+r"(cy) // output: +r means input+output
: // input:
: "cc" // clobbers:
);
}
/* ---------------- Delay in cycles */
FORCE_INLINE static void DELAY_CYCLES(uint16_t x) {
if (__builtin_constant_p(x)) {
#define MAXNOPS 4
if (x <= (MAXNOPS)) {
switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
}
else {
const uint32_t rem = (x) % (MAXNOPS);
switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
if ((x = (x) / (MAXNOPS)))
__delay_4cycles(x); // if need more then 4 nop loop is more optimal
}
#undef MAXNOPS
}
else
__delay_4cycles(x / 4);
}
#undef nop
#else
#error "Unsupported MCU architecture"
#endif
/* ---------------- Delay in nanoseconds */
#define DELAY_NS(x) DELAY_CYCLES( (x) * (F_CPU/1000000L) / 1000L )
/* ---------------- Delay in microseconds */
#define DELAY_US(x) DELAY_CYCLES( (x) * (F_CPU/1000000L) )
#endif // MARLIN_DELAY_H

View File

@@ -42,6 +42,7 @@
// --------------------------------------------------------------------------
#include "../../inc/MarlinConfig.h"
#include "../Delay.h"
// --------------------------------------------------------------------------
// Public Variables
@@ -58,66 +59,16 @@
// software SPI
// --------------------------------------------------------------------------
// set optimization so ARDUINO optimizes this file
// Make sure GCC optimizes this file.
// Note that this line triggers a bug in GCC which is fixed by casting.
// See the note below.
#pragma GCC optimize (3)
/* ---------------- Delay Cycles routine -------------- */
/* https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles */
#define nop() __asm__ __volatile__("nop;\n\t":::)
FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
#if ARCH_PIPELINE_RELOAD_CYCLES<2
#define EXTRA_NOP_CYCLES "nop"
#else
#define EXTRA_NOP_CYCLES ""
#endif
__asm__ __volatile__(
".syntax unified" "\n\t" // is to prevent CM0,CM1 non-unified syntax
L("loop%=")
A("subs %[cnt],#1")
A(EXTRA_NOP_CYCLES)
A("bne loop%=")
: [cnt]"+r"(cy) // output: +r means input+output
: // input:
: "cc" // clobbers:
);
}
FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
if (__builtin_constant_p(x)) {
#define MAXNOPS 4
if (x <= (MAXNOPS)) {
switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
}
else { // because of +1 cycle inside delay_4cycles
const uint32_t rem = (x - 1) % (MAXNOPS);
switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
if ((x = (x - 1) / (MAXNOPS)))
__delay_4cycles(x); // if need more then 4 nop loop is more optimal
}
}
else
__delay_4cycles(x / 4);
}
/* ---------------- Delay in nanoseconds and in microseconds */
#define DELAY_NS(x) DELAY_CYCLES( (x) * (F_CPU/1000000) / 1000)
typedef uint8_t (*pfnSpiTransfer) (uint8_t b);
typedef uint8_t (*pfnSpiTransfer)(uint8_t b);
typedef void (*pfnSpiRxBlock)(uint8_t* buf, uint32_t nbyte);
typedef void (*pfnSpiTxBlock)(const uint8_t* buf, uint32_t nbyte);
/* ---------------- Macros to be able to access definitions from asm */
#define _PORT(IO) DIO ## IO ## _WPORT
#define _PIN_MASK(IO) MASK(DIO ## IO ## _PIN)
#define _PIN_SHIFT(IO) DIO ## IO ## _PIN
@@ -202,10 +153,10 @@
return 0;
}
// Calculates the bit band alias address and returns a pointer address to word.
// addr: The byte address of bitbanding bit.
// bit: The bit position of bitbanding bit.
#define BITBAND_ADDRESS(addr, bit) \
// Calculates the bit band alias address and returns a pointer address to word.
// addr: The byte address of bitbanding bit.
// bit: The bit position of bitbanding bit.
#define BITBAND_ADDRESS(addr, bit) \
(((uint32_t)(addr) & 0xF0000000) + 0x02000000 + ((uint32_t)(addr)&0xFFFFF)*32 + (bit)*4)
// run at ~8 .. ~10Mhz - Rx version (Tx line not altered)
@@ -319,8 +270,14 @@
}
// Pointers to generic functions for byte transfers
static pfnSpiTransfer spiTransferTx = spiTransferX;
static pfnSpiTransfer spiTransferRx = spiTransferX;
/**
* Note: The cast is unnecessary, but without it, this file triggers a GCC 4.8.3-2014 bug.
* Later GCC versions do not have this problem, but at this time (May 2018) Arduino still
* uses that buggy and obsolete GCC version!!
*/
static pfnSpiTransfer spiTransferRx = (pfnSpiTransfer)spiTransferX;
static pfnSpiTransfer spiTransferTx = (pfnSpiTransfer)spiTransferX;
// Block transfers run at ~8 .. ~10Mhz - Tx version (Rx data discarded)
static void spiTxBlock0(const uint8_t* ptr, uint32_t todo) {
@@ -384,7 +341,7 @@
A("str %[sck_mask],[%[sck_port],#0x4]") /* CODR */
/* Bit 0 */
A("str %[mosi_mask],[%[mosi_port], %[work],LSL #2]") /* Access the proper SODR or CODR registers based on that bit */
A("str %[mosi_mask],[%[mosi_port], %[work],LSL #2]") /* Access the proper SODR or CODR registers based on that bit */
A("str %[sck_mask],[%[sck_port]]") /* SODR */
A("subs %[todo],#1") /* Decrement count of pending words to send, update status */
A("str %[sck_mask],[%[sck_port],#0x4]") /* CODR */
@@ -491,8 +448,8 @@
}
// Pointers to generic functions for block tranfers
static pfnSpiTxBlock spiTxBlock = spiTxBlockX;
static pfnSpiRxBlock spiRxBlock = spiRxBlockX;
static pfnSpiTxBlock spiTxBlock = (pfnSpiTxBlock)spiTxBlockX;
static pfnSpiRxBlock spiRxBlock = (pfnSpiRxBlock)spiRxBlockX;
#if MB(ALLIGATOR) // control SDSS pin
void spiBegin() {
@@ -580,23 +537,23 @@
void spiInit(uint8_t spiRate) {
switch (spiRate) {
case 0:
spiTransferTx = spiTransferTx0;
spiTransferRx = spiTransferRx0;
spiTxBlock = spiTxBlock0;
spiRxBlock = spiRxBlock0;
spiTransferTx = (pfnSpiTransfer)spiTransferTx0;
spiTransferRx = (pfnSpiTransfer)spiTransferRx0;
spiTxBlock = (pfnSpiTxBlock)spiTxBlock0;
spiRxBlock = (pfnSpiRxBlock)spiRxBlock0;
break;
case 1:
spiTransferTx = spiTransfer1;
spiTransferRx = spiTransfer1;
spiTxBlock = spiTxBlockX;
spiRxBlock = spiRxBlockX;
spiTransferTx = (pfnSpiTransfer)spiTransfer1;
spiTransferRx = (pfnSpiTransfer)spiTransfer1;
spiTxBlock = (pfnSpiTxBlock)spiTxBlockX;
spiRxBlock = (pfnSpiRxBlock)spiRxBlockX;
break;
default:
spiDelayCyclesX4 = (F_CPU/1000000) >> (6 - spiRate);
spiTransferTx = spiTransferX;
spiTransferRx = spiTransferX;
spiTxBlock = spiTxBlockX;
spiRxBlock = spiRxBlockX;
spiTransferTx = (pfnSpiTransfer)spiTransferX;
spiTransferRx = (pfnSpiTransfer)spiTransferX;
spiTxBlock = (pfnSpiTxBlock)spiTxBlockX;
spiRxBlock = (pfnSpiRxBlock)spiRxBlockX;
break;
}
@@ -614,7 +571,7 @@
#pragma GCC reset_options
#else
#else // !SOFTWARE_SPI
#if MB(ALLIGATOR)
@@ -714,7 +671,7 @@
while ((SPI0->SPI_SR & SPI_SR_RDRF) == 0);
// clear status
SPI0->SPI_RDR;
//delayMicroseconds(1U);
//DELAY_US(1U);
}
void spiSend(const uint8_t* buf, size_t n) {
@@ -724,7 +681,7 @@
while ((SPI0->SPI_SR & SPI_SR_TDRE) == 0);
while ((SPI0->SPI_SR & SPI_SR_RDRF) == 0);
SPI0->SPI_RDR;
//delayMicroseconds(1U);
//DELAY_US(1U);
}
spiSend(buf[n - 1]);
}
@@ -767,7 +724,7 @@
// wait for receive register
while ((SPI0->SPI_SR & SPI_SR_RDRF) == 0);
// get byte from receive register
//delayMicroseconds(1U);
//DELAY_US(1U);
return SPI0->SPI_RDR;
}
@@ -797,7 +754,7 @@
SPI0->SPI_TDR = 0x000000FF | SPI_PCS(SPI_CHAN);
while ((SPI0->SPI_SR & SPI_SR_RDRF) == 0);
buf[i] = SPI0->SPI_RDR;
//delayMicroseconds(1U);
//DELAY_US(1U);
}
buf[nbyte] = spiRec();
}
@@ -813,7 +770,7 @@
while ((SPI0->SPI_SR & SPI_SR_TDRE) == 0);
while ((SPI0->SPI_SR & SPI_SR_RDRF) == 0);
SPI0->SPI_RDR;
//delayMicroseconds(1U);
//DELAY_US(1U);
}
spiSend(buf[511]);
}
@@ -902,7 +859,7 @@
spiTransfer(buf[i]);
}
#endif //MB(ALLIGATOR)
#endif // ENABLED(SOFTWARE_SPI)
#endif // !ALLIGATOR
#endif // !SOFTWARE_SPI
#endif // ARDUINO_ARCH_SAM

View File

@@ -21,12 +21,13 @@
#ifdef TARGET_LPC1768
#include "../../inc/MarlinConfig.h"
#include "../Delay.h"
HalSerial usb_serial;
// U8glib required functions
extern "C" void u8g_xMicroDelay(uint16_t val) {
delayMicroseconds(val);
DELAY_US(val);
}
extern "C" void u8g_MicroDelay(void) {
u8g_xMicroDelay(1);

View File

@@ -68,9 +68,9 @@ extern "C" volatile uint32_t _millis;
#include "HAL_timers.h"
#include "HardwareSerial.h"
#define ST7920_DELAY_1 DELAY_20_NOP;DELAY_20_NOP;DELAY_20_NOP
#define ST7920_DELAY_2 DELAY_20_NOP;DELAY_20_NOP;DELAY_20_NOP;DELAY_10_NOP;DELAY_5_NOP
#define ST7920_DELAY_3 DELAY_20_NOP;DELAY_20_NOP;DELAY_20_NOP;DELAY_10_NOP;DELAY_5_NOP
#define ST7920_DELAY_1 DELAY_NS(600)
#define ST7920_DELAY_2 DELAY_NS(750)
#define ST7920_DELAY_3 DELAY_NS(750)
extern HalSerial usb_serial;

View File

@@ -37,6 +37,7 @@
//
//#include <WInterrupts.h>
#include "../../inc/MarlinConfig.h"
#include "../Delay.h"
#include <stdint.h>
#include <stdarg.h>
#include <Arduino.h>
@@ -78,28 +79,9 @@ static const DELAY_TABLE table[] = {
// Private methods
//
#if 0
/* static */
inline void SoftwareSerial::tunedDelay(const uint32_t count) {
asm volatile(
"mov r3, %[loopsPerMicrosecond] \n\t" //load the initial loop counter
"1: \n\t"
"sub r3, r3, #1 \n\t"
"bne 1b \n\t"
://empty output list
:[loopsPerMicrosecond] "r" (count)
:"r3", "cc" //clobber list
);
DELAY_US(count);
}
#else
inline void SoftwareSerial::tunedDelay(const uint32_t count) {
delayMicroseconds(count);
}
#endif
// This function sets the current object as the "listening"
// one and returns true if it replaces another

View File

@@ -26,6 +26,7 @@
#include <lpc17xx_pinsel.h>
#include "../../inc/MarlinConfig.h"
#include "../Delay.h"
// Interrupts
void cli(void) { __disable_irq(); } // Disable
@@ -40,26 +41,9 @@ uint32_t millis() {
return _millis;
}
// This is required for some Arduino libraries we are using
void delayMicroseconds(uint32_t us) {
static const int nop_factor = (SystemCoreClock / 11000000);
static volatile int loops = 0;
//previous ops already burned most of 1us, burn the rest
loops = nop_factor / 4; //measured at 1us
while (loops > 0) --loops;
if (us < 2) return;
us--;
//redirect to delay for large values, then set new delay to remainder
if (us > 1000) {
delay(us / 1000);
us = us % 1000;
}
// burn cycles, time in interrupts will not be taken into account
loops = us * nop_factor;
while (loops > 0) --loops;
DELAY_US(us);
}
extern "C" void delay(const int msec) {

View File

@@ -63,7 +63,6 @@
#include <U8glib.h>
void delayMicroseconds(uint32_t us);
//void pinMode(int16_t pin, uint8_t mode);
//void digitalWrite(int16_t pin, uint8_t pin_status);
@@ -122,13 +121,13 @@ uint8_t u8g_i2c_start_sw(uint8_t sla) { // assert start condition and then send
LPC_GPIO(SDA_port_HAL_LPC1768_sw_I2C)->FIOCLR = LPC_PIN(SDA_pin_HAL_LPC1768_sw_I2C);
LPC_GPIO(SCL_port_HAL_LPC1768_sw_I2C)->FIOCLR = LPC_PIN(SCL_pin_HAL_LPC1768_sw_I2C);
delayMicroseconds(2);
DELAY_US(2);
LPC_GPIO(SCL_port_HAL_LPC1768_sw_I2C)->FIOSET = LPC_PIN(SCL_pin_HAL_LPC1768_sw_I2C);
delayMicroseconds(2);
DELAY_US(2);
LPC_GPIO(SDA_port_HAL_LPC1768_sw_I2C)->FIOSET = LPC_PIN(SDA_pin_HAL_LPC1768_sw_I2C);
delayMicroseconds(2);
DELAY_US(2);
LPC_GPIO(SDA_port_HAL_LPC1768_sw_I2C)->FIOCLR = LPC_PIN(SDA_pin_HAL_LPC1768_sw_I2C);
delayMicroseconds(2);
DELAY_US(2);
LPC_GPIO(SCL_port_HAL_LPC1768_sw_I2C)->FIOCLR = LPC_PIN(SCL_pin_HAL_LPC1768_sw_I2C);
u8g_i2c_send_byte_sw(I2C_SLA); // send slave address with write bit