diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index 562d5376ae7a..d3ef8e416b27 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -52,7 +52,12 @@ static inline uint32_t __div64_32(uint64_t *n, uint32_t base) #else -static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias) +#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE +static __always_inline +#else +static inline +#endif +uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias) { unsigned long long res; register unsigned int tmp asm("ip") = 0; diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h index 5d59cf7e73d9..25e7b4b58dcf 100644 --- a/include/asm-generic/div64.h +++ b/include/asm-generic/div64.h @@ -134,7 +134,12 @@ * Hoping for compile-time optimization of conditional code. * Architectures may provide their own optimized assembly implementation. */ -static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) +#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE +static __always_inline +#else +static inline +#endif +uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) { uint32_t m_lo = m; uint32_t m_hi = m >> 32;