[backport from gcc-4.8/trunk r188661 ] gcc/ 2012-06-15 Ulrich Weigand PR tree-optimization/53636 * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Verify stride when doing basic-block vectorization. gcc/testsuite/ 2012-06-15 Ulrich Weigand PR tree-optimization/53636 * gcc.target/arm/pr53636.c: New test. --- gcc-4.7.1/gcc/testsuite/gcc.target/arm/pr53636.c.~1~ 1970-01-01 01:00:00.000000000 +0100 +++ gcc-4.7.1/gcc/testsuite/gcc.target/arm/pr53636.c 2012-06-16 13:25:50.000000000 +0200 @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O -ftree-vectorize" } */ +/* { dg-add-options arm_neon } */ + +void fill (short *buf) __attribute__ ((noinline)); +void fill (short *buf) +{ + int i; + + for (i = 0; i < 11 * 8; i++) + buf[i] = i; +} + +void test (unsigned char *dst) __attribute__ ((noinline)); +void test (unsigned char *dst) +{ + short tmp[11 * 8], *tptr; + int i; + + fill (tmp); + + tptr = tmp; + for (i = 0; i < 8; i++) + { + dst[0] = (-tptr[0] + 9 * tptr[0 + 1] + 9 * tptr[0 + 2] - tptr[0 + 3]) >> 7; + dst[1] = (-tptr[1] + 9 * tptr[1 + 1] + 9 * tptr[1 + 2] - tptr[1 + 3]) >> 7; + dst[2] = (-tptr[2] + 9 * tptr[2 + 1] + 9 * tptr[2 + 2] - tptr[2 + 3]) >> 7; + dst[3] = (-tptr[3] + 9 * tptr[3 + 1] + 9 * tptr[3 + 2] - tptr[3 + 3]) >> 7; + dst[4] = (-tptr[4] + 9 * tptr[4 + 1] + 9 * tptr[4 + 2] - tptr[4 + 3]) >> 7; + dst[5] = (-tptr[5] + 9 * tptr[5 + 1] + 9 * tptr[5 + 2] - tptr[5 + 3]) >> 7; + dst[6] = (-tptr[6] + 9 * tptr[6 + 1] + 9 * tptr[6 + 2] - tptr[6 + 3]) >> 7; + dst[7] = (-tptr[7] + 9 * tptr[7 + 1] + 9 * tptr[7 + 2] - tptr[7 + 3]) >> 7; + + dst += 8; + tptr += 11; + } +} + +int main (void) +{ + char buf [8 * 8]; + + test (buf); + + return 0; +} + --- gcc-4.7.1/gcc/tree-vect-data-refs.c.~1~ 2012-05-29 16:44:08.000000000 +0200 +++ gcc-4.7.1/gcc/tree-vect-data-refs.c 2012-06-16 13:25:50.000000000 +0200 @@ -844,6 +844,24 @@ vect_compute_data_ref_alignment (struct } } + /* Similarly, if we're doing basic-block vectorization, we can only use + base and misalignment information relative to an innermost loop if the + misalignment stays the same throughout the execution of the loop. + As above, this is the case if the stride of the dataref evenly divides + by the vector size. */ + if (!loop) + { + tree step = DR_STEP (dr); + HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); + + if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) + { + if (vect_print_dump_info (REPORT_ALIGNMENT)) + fprintf (vect_dump, "SLP: step doesn't divide the vector-size."); + misalign = NULL_TREE; + } + } + base = build_fold_indirect_ref (base_addr); alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);