[backport from gcc-4.8/trunk r184957 ] Date: Tue, 31 Jan 2012 16:15:22 +1100 From: Richard Henderson Subject: [arm] Improve longlong.h umul_ppmm, count_trailing_zeros List-Archive: I noticed this accidentally, while looking for something else. There are significant improvements in the DImode multiplication and division routines for armv4+. Despite how trivial this is, I assume this must wait for stage1. Ok? r~ libgcc/ 2012-03-05 Richard Henderson * longlong.h [ARM] (umul_ppmm): Use umull for arm3m and later. [ARM] (count_trailing_zeros): Use the builtin. --- gcc-4.6.3/gcc/longlong.h.~1~ 2011-10-04 09:28:50.000000000 +0200 +++ gcc-4.6.3/gcc/longlong.h 2012-06-10 14:02:55.000000000 +0200 @@ -220,9 +220,12 @@ UDItype __umulsidi3 (USItype, USItype); "rI" ((USItype) (bh)), \ "r" ((USItype) (al)), \ "rI" ((USItype) (bl)) __CLOBBER_CC) -#define umul_ppmm(xh, xl, a, b) \ -{register USItype __t0, __t1, __t2; \ - __asm__ ("%@ Inlined umul_ppmm\n" \ +# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \ + || defined(__ARM_ARCH_3__) +# define umul_ppmm(xh, xl, a, b) \ + do { \ + register USItype __t0, __t1, __t2; \ + __asm__ ("%@ Inlined umul_ppmm\n" \ " mov %2, %5, lsr #16\n" \ " mov %0, %6, lsr #16\n" \ " bic %3, %5, %2, lsl #16\n" \ @@ -239,14 +242,26 @@ UDItype __umulsidi3 (USItype, USItype); "=r" ((USItype) (xl)), \ "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ : "r" ((USItype) (a)), \ - "r" ((USItype) (b)) __CLOBBER_CC );} -#define UMUL_TIME 20 -#define UDIV_TIME 100 + "r" ((USItype) (b)) __CLOBBER_CC ); \ + } while (0) +# define UMUL_TIME 20 +# else +# define umul_ppmm(xh, xl, a, b) \ + do { \ + /* Generate umull, under compiler control. */ \ + register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \ + (xl) = (USItype)__t0; \ + (xh) = (USItype)(__t0 >> 32); \ + } while (0) +# define UMUL_TIME 3 +# endif +# define UDIV_TIME 100 #endif /* __arm__ */ #if defined(__arm__) /* Let gcc decide how best to implement count_leading_zeros. */ #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) +#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) #define COUNT_LEADING_ZEROS_0 32 #endif