Some Performance Measurements on STM32 MCUs
Maximum & Minimum Math Operation
All the operation is repeated 10000 times with random values
void APP_main() {
HAL_GPIO_WritePin(GPIOC, GPIO_PIN_14, 1);
for (uint16_t i=0; i<10000; i+=1) {
volatile float a = (float)rand();
volatile float b = (float)rand();
volatile float c = fmax(a, b); // or fmaxf(); or fast_fmaxf()
}
HAL_GPIO_WritePin(GPIOC, GPIO_PIN_14, 0);
HAL_Delay(10);
}
fast_fmaxf implementation
float fast_fmaxf(float a, float b) {
return a > b ? a : b;
}
Trigonometry Math Operation
All the operation is repeated 1000 times with increasing values from 0 to 999
void APP_init() {
CORDIC_ConfigTypeDef cordic_config;
cordic_config.Function = CORDIC_FUNCTION_COSINE; // ouput : cosine, then sine
cordic_config.Scale = CORDIC_SCALE_0; // not used
cordic_config.InSize = CORDIC_INSIZE_32BITS; // q31
cordic_config.OutSize = CORDIC_OUTSIZE_32BITS; // q31
cordic_config.NbWrite = CORDIC_NBWRITE_1; // ARG2 is 1 default
cordic_config.NbRead = CORDIC_NBREAD_2; // read cosine and sine
cordic_config.Precision = CORDIC_PRECISION_4CYCLES; // better than 10-3
HAL_CORDIC_Configure(&hcordic, &cordic_config);
}
void APP_main() {
HAL_GPIO_WritePin(GPIOC, GPIO_PIN_14, 1);
float result = 0;
for (uint16_t i=0; i<1000; i+=1) {
volatile float a = 0.001 * i;
// math lib
volatile float cos_a = cosf(a);
volatile float sin_a = sinf(a);
// LUT
// volatile float cos_a = cosf_lut(a);
// volatile float sin_a = sinf_lut(a);
// CORDIC
// int32_t cordic_arg[1];
// int32_t cordic_res[2];
// cordic_arg[0] = FLOAT_TO_Q31(wrapToPi(a)/M_PI);
// HAL_CORDIC_Calculate(&hcordic, cordic_arg, cordic_res, 1, 100);
// volatile float cos_a = Q31_TO_FLOAT(cordic_res[0]);
// volatile float sin_a = Q31_TO_FLOAT(cordic_res[1]);
result += sin_a;
}
HAL_GPIO_WritePin(GPIOC, GPIO_PIN_14, 0);
char str[64];
sprintf(str, "result: %f\r\n", result);
HAL_UART_Transmit(&huart3, (uint8_t *)str, strlen(str), 1000);
HAL_Delay(10);
}
Interesting observation:
when using sinf() and cosf() from the standard math library to perform the calculation, the processing speed varies greatly depending on the magnitude of the numbers.
When the absolute value of the operand is greater than 201, the processing speed decreases by 10x.
Trig LUT
#define LUT_MULT 81.4873308631f
const float sin_tab[] = {
0,0.012296,0.024589,0.036879,0.049164,0.061441,0.073708,0.085965,0.098208,0.11044,0.12265,0.13484,0.14702,0.15917,0.17129,0.18339,0.19547,0.20751,0.21952,0.2315,0.24345,0.25535,0.26722,0.27905,0.29084,0.30258,0.31427,0.32592,0.33752,0.34907,0.36057,0.37201,0.38339,0.39472,0.40599,0.41719,0.42834,0.43941,0.45043,0.46137,0.47224,0.48305,0.49378,0.50443,0.51501,0.52551,0.53593,0.54627,0.55653,0.5667,0.57679,0.58679,0.5967,0.60652,0.61625,0.62589,0.63543,0.64488,0.65423,0.66348,0.67263,0.68167,0.69062,0.69946,0.70819,0.71682,0.72534,0.73375,0.74205,0.75023,0.75831,0.76626,0.77411,0.78183,0.78944,0.79693,0.80429,0.81154,0.81866,0.82566,0.83254,0.83928,0.84591,0.8524,0.85876,0.865,0.8711,0.87708,0.88292,0.88862,0.89419,0.89963,0.90493,0.9101,0.91512,0.92001,0.92476,0.92937,0.93384,0.93816,0.94235,0.94639,0.95029,0.95405,0.95766,0.96113,0.96445,0.96763,0.97066,0.97354,0.97628,0.97887,0.98131,0.9836,0.98574,0.98774,0.98958,0.99128,0.99282,0.99422,0.99546,0.99656,0.9975,0.99829,0.99894,0.99943,0.99977,0.99996,1,0.99988,0.99962,0.9992,0.99863,0.99792,0.99705,0.99603,0.99486,0.99354,0.99207,0.99045,0.98868,0.98676,0.98469,0.98247,0.9801,0.97759,0.97493,0.97212,0.96916,0.96606,0.96281,0.95941,0.95587,0.95219,0.94836,0.94439,0.94028,0.93602,0.93162,0.92708,0.9224,0.91758,0.91263,0.90753,0.9023,0.89693,0.89142,0.88579,0.88001,0.87411,0.86807,0.8619,0.8556,0.84917,0.84261,0.83593,0.82911,0.82218,0.81512,0.80793,0.80062,0.7932,0.78565,0.77798,0.7702,0.7623,0.75428,0.74615,0.73791,0.72956,0.72109,0.71252,0.70384,0.69505,0.68616,0.67716,0.66806,0.65886,0.64956,0.64017,0.63067,0.62108,0.6114,0.60162,0.59176,0.5818,0.57176,0.56163,0.55141,0.54111,0.53073,0.52027,0.50973,0.49911,0.48842,0.47765,0.46682,0.45591,0.44493,0.43388,0.42277,0.4116,0.40036,0.38906,0.37771,0.36629,0.35483,0.3433,0.33173,0.32011,0.30843,0.29671,0.28495,0.27314,0.26129,0.2494,0.23748,0.22552,0.21352,0.20149,0.18943,0.17735,0.16523,0.15309,0.14093,0.12875,0.11655,0.10432,0.092088,0.079838,0.067576,0.055303,0.043022,0.030735,0.018443,0.0061479,-0.0061479,-0.018443,-0.030735,-0.043022,-0.055303,-0.067576,-0.079838,-0.092088,-0.10432,-0.11655,-0.12875,-0.14093,-0.15309,-0.16523,-0.17735,-0.18943,-0.20149,-0.21352,-0.22552,-0.23748,-0.2494,-0.26129,-0.27314,-0.28495,-0.29671,-0.30843,-0.32011,-0.33173,-0.3433,-0.35483,-0.36629,-0.37771,-0.38906,-0.40036,-0.4116,-0.42277,-0.43388,-0.44493,-0.45591,-0.46682,-0.47765,-0.48842,-0.49911,-0.50973,-0.52027,-0.53073,-0.54111,-0.55141,-0.56163,-0.57176,-0.5818,-0.59176,-0.60162,-0.6114,-0.62108,-0.63067,-0.64017,-0.64956,-0.65886,-0.66806,-0.67716,-0.68616,-0.69505,-0.70384,-0.71252,-0.72109,-0.72956,-0.73791,-0.74615,-0.75428,-0.7623,-0.7702,-0.77798,-0.78565,-0.7932,-0.80062,-0.80793,-0.81512,-0.82218,-0.82911,-0.83593,-0.84261,-0.84917,-0.8556,-0.8619,-0.86807,-0.87411,-0.88001,-0.88579,-0.89142,-0.89693,-0.9023,-0.90753,-0.91263,-0.91758,-0.9224,-0.92708,-0.93162,-0.93602,-0.94028,-0.94439,-0.94836,-0.95219,-0.95587,-0.95941,-0.96281,-0.96606,-0.96916,-0.97212,-0.97493,-0.97759,-0.9801,-0.98247,-0.98469,-0.98676,-0.98868,-0.99045,-0.99207,-0.99354,-0.99486,-0.99603,-0.99705,-0.99792,-0.99863,-0.9992,-0.99962,-0.99988,-1,-0.99996,-0.99977,-0.99943,-0.99894,-0.99829,-0.9975,-0.99656,-0.99546,-0.99422,-0.99282,-0.99128,-0.98958,-0.98774,-0.98574,-0.9836,-0.98131,-0.97887,-0.97628,-0.97354,-0.97066,-0.96763,-0.96445,-0.96113,-0.95766,-0.95405,-0.95029,-0.94639,-0.94235,-0.93816,-0.93384,-0.92937,-0.92476,-0.92001,-0.91512,-0.9101,-0.90493,-0.89963,-0.89419,-0.88862,-0.88292,-0.87708,-0.8711,-0.865,-0.85876,-0.8524,-0.84591,-0.83928,-0.83254,-0.82566,-0.81866,-0.81154,-0.80429,-0.79693,-0.78944,-0.78183,-0.77411,-0.76626,-0.75831,-0.75023,-0.74205,-0.73375,-0.72534,-0.71682,-0.70819,-0.69946,-0.69062,-0.68167,-0.67263,-0.66348,-0.65423,-0.64488,-0.63543,-0.62589,-0.61625,-0.60652,-0.5967,-0.58679,-0.57679,-0.5667,-0.55653,-0.54627,-0.53593,-0.52551,-0.51501,-0.50443,-0.49378,-0.48305,-0.47224,-0.46137,-0.45043,-0.43941,-0.42834,-0.41719,-0.40599,-0.39472,-0.38339,-0.37201,-0.36057,-0.34907,-0.33752,-0.32592,-0.31427,-0.30258,-0.29084,-0.27905,-0.26722,-0.25535,-0.24345,-0.2315,-0.21952,-0.20751,-0.19547,-0.18339,-0.17129,-0.15917,-0.14702,-0.13484,-0.12265,-0.11044,-0.098208,-0.085965,-0.073708,-0.061441,-0.049164,-0.036879,-0.024589,-0.012296,0
};
float sinf_lut(float theta){
theta = fmodf(theta, 2*M_PI);
theta = theta<0 ? theta + 2*M_PI : theta;
return sin_tab[(int) (LUT_MULT*theta)];
}
float cosf_lut(float theta){
return sinf_lut(M_PI / 2 - theta);
}
CORDIC
#define Q31_TO_FLOAT(x) ((float)(x) / (float)(0x80000000))
#define FLOAT_TO_Q31(x) ((int32_t)((float)(x) * (float)0x7FFFFFFF))
float wrapToPi(float value) {
return fmodf(value + M_PI, 2*M_PI) - M_PI;
}
SPI AS5047P Reading
SPI Speed: 1.25MBits/s and 10MBits/s
/*
* app.c
*
* Created on: Dec 31, 2022
* Author: TK
*/
#include "app.h"
#define USE_INTERRUPT 1
extern SPI_HandleTypeDef hspi1;
extern UART_HandleTypeDef huart2;
uint8_t spi_received;
uint16_t getParity(uint16_t data) {
data ^= data >> 8; // example for 8-bit (this line scales it up to 16 bit)
data ^= data >> 4; // ( a b c d e f g h ) xor ( 0 0 0 0 a b c d ) = ( a b c d ae bf cg dh )
data ^= data >> 2; // ( a b c d ae bf cg dh ) xor ( 0 0 a b c d ae bf ) = ( a b ac bd ace bdf aceg bdfh )
data ^= data >> 1; // ( a b ac bd ace bdf aceg bdfh ) xor ( 0 a b ac bd ace bdf aceg ) = ( a ab abc abcd abcde abcdef abcdefg abcdefgh )
return data & 1; // if lsb of data is 0 -> data is even. if lsb of data is 1 -> data is odd.
}
void HAL_SPI_TxRxCpltCallback(SPI_HandleTypeDef *hspi) {
HAL_GPIO_WritePin(GPIOA, GPIO_PIN_15, 1);
spi_received = 1;
}
void APP_init() {
spi_received = 0;
}
void APP_main() {
uint16_t tx_buffer;
uint16_t rx_buffer;
tx_buffer = 0x3FFF;
tx_buffer |= 1 << 14;
tx_buffer |= getParity(tx_buffer) << 15;
spi_received = 0;
HAL_GPIO_WritePin(GPIOC, GPIO_PIN_14, 1); // Limit SW L
HAL_GPIO_WritePin(GPIOA, GPIO_PIN_15, 0);
#if !USE_INTERRUPT
HAL_SPI_TransmitReceive(&hspi1, (uint8_t *)&tx_buffer, (uint8_t *)&rx_buffer, 1, 100);
HAL_GPIO_WritePin(GPIOA, GPIO_PIN_15, 1);
#else
HAL_SPI_TransmitReceive_IT(&hspi1, (uint8_t *)&tx_buffer, (uint8_t *)&rx_buffer, 1);
while (!spi_received) {}
#endif
HAL_GPIO_WritePin(GPIOC, GPIO_PIN_14, 0); // Limit SW L
}
There's an overhead of 1.7 us ~ 2.1 us when using interrupt mode compared to blocking mode.
Last updated