Test Condition
MCU: STM32G431RB @170MHz
IDE: IAR V9.40
Optimization: -o3
Math Operation Benchmark test
OP times: 1M
Multiply
Float32 | Q31 | Q15 | |
---|---|---|---|
a = 1, b = 2 | 58823 | 47058 | 52941 |
a = 10000, b = 2 | 58823 | 47058 | 52941 |
a = 10000, b = 20000 | 58823 | 47058 | 52941 |
a = 1, b = 20000 | 58823 | 47058 | 52941 |
a = 32767, b = 2 | 58823 | 47058 | 52941 |
a = 2147483647, b = 2 | 58823 | 47058 | - |
a = -1, b = 2 | 58823 | 47058 | 52941 |
a = 1, b = -2 | 58823 | 47058 | 52941 |
a = -1, b = -2 | 58823 | 47058 | 52941 |
上述表格中Q15的测试数据为把相乘的结果赋值给int16_t,如果改为把结果赋值给int32_t,则所耗时间与Q31一致,更少。
Division
Float32 | Q31 | Q15 | |
---|---|---|---|
a = 1, b = 2 | 129411 | 58823 | 58823 |
a = 10000, b = 2 | 129411 | 88235 | 88235 |
a = 10000, b = 20000 | 129411 | 58823 | 58823 |
a = 32767, b = 2 | 129411 | 88235 | 88235 |
a = 2147483647, b = 2 | 129411 | 111764 | - |
a = -1, b = 2 | 129411 | 58823 | 58823 |
a = 1, b = -2 | 129411 | 70588 | 70588 |
a = -1, b = -2 | 129411 | 70588 | 70588 |
似乎同样的代码重新编译以后测试结果又不一样。。。
几点观察结论:
- 浮点数的乘除用时与操作数无关
- 浮点数的乘法用时比定点多 20% 左右
- 浮点数的除法所用时间在操作数较小时大约是定点数的两倍多,当被除数值接近极限时32位定点除法与浮点除法接近
- 盲猜在电机控制程序里,如果用全浮点数计算,所耗时间资源将会大约是定点数的150%左右
Test Code
volatile uint32_t sMulDurationUs;
volatile uint32_t iMulDurationUs;
volatile uint32_t fMulDurationUs;
volatile uint32_t sDivDurationUs;
volatile uint32_t iDivDurationUs;
volatile uint32_t fDivDurationUs;
void MathOpBenchmarkTest(void)
{
volatile int16_t sa = 1;
volatile int16_t sb = 2;
volatile int16_t sc = 3;
volatile int32_t ia = 1;
volatile int32_t ib = 2;
volatile int32_t ic = 3;
volatile float32_t fa = 1;
volatile float32_t fb = 2000;
volatile float32_t fc = 3;
volatile uint32_t startTick = DWT->CYCCNT;
for (uint32_t i = 1000000; i > 0; i--) {
sc = sa * sb;
}
volatile uint32_t stopTick = DWT->CYCCNT;
sMulDurationUs = TimeMeasure(startTick, stopTick);
startTick = DWT->CYCCNT;
for (uint32_t i = 1000000; i > 0; i--) {
ic = ia * ib;
}
stopTick = DWT->CYCCNT;
iMulDurationUs = TimeMeasure(startTick, stopTick);
startTick = DWT->CYCCNT;
for (uint32_t i = 1000000; i > 0; i--) {
fc = fa * fb;
}
stopTick = DWT->CYCCNT;
fMulDurationUs = TimeMeasure(startTick, stopTick);
startTick = DWT->CYCCNT;
for (uint32_t i = 1000000; i > 0; i--) {
sc = sa / sb;
}
stopTick = DWT->CYCCNT;
sDivDurationUs = TimeMeasure(startTick, stopTick);
startTick = DWT->CYCCNT;
for (uint32_t i = 1000000; i > 0; i--) {
ic = ia / ib;
}
stopTick = DWT->CYCCNT;
iDivDurationUs = TimeMeasure(startTick, stopTick);
startTick = DWT->CYCCNT;
for (uint32_t i = 1000000; i > 0; i--) {
fc = fa / fb;
}
stopTick = DWT->CYCCNT;
fDivDurationUs = TimeMeasure(startTick, stopTick);
sc++;
ic++;
fc++;
}