[backport from gcc-4.8/trunk r190252 ] Date: Fri, 3 Aug 2012 16:33:19 +0200 From: Bernd Schmidt Subject: Reload/i386 patch for secondary memory vs subregs List-Archive: There are a number of problems in the interaction between secondary memory and subregs. If we reload the inner of a subreg, we forget about this when deciding whether to use secondary memory, and just use REGNO_REG_CLASS on the (now reloaded) inner. Also, we don't really know what to do if we get a subreg of something that's not a register, such as a symbol_ref. This is another part of the patch below, using a variant of find_valid_class that works in this case. The original testcase we had was the artificial test included in the patch, which involves reinterpreting a pointer to a string constant as a floating point value. Since then, we've also found a larger Altivec testcase which crashes the compiler without this patch. Bootstrapped and tested on i686-linux. It's also been in several of our internal trees, going back to even 4.4-based ones IIRC, and has had testing for several architectures. Ok for the i386 part? I intend to check the reload bits in soon if there are no objections. Bernd gcc/ 2012-08-09 Bernd Schmidt * reload.c (find_valid_class_1): New static function. (push_reload): Use it when reloading a SYMBOL_REG as the inner of a subreg. Keep better track of needed classes for the secondary memory case. * config/i386/i386.h (LIMIT_RELOAD_CLASS): Limit INT_SSE_REGS to GENERAL_REGS. * reload1.c (replaced_subreg): New static function. (gen_reload): Use it when deciding whether to use secondary memory. gcc/testsuite/ 2012-08-09 Bernd Schmidt * gcc.c-torture/compile/20120727-1.c: New test. --- gcc-4.6.3/gcc/config/i386/i386.h.~1~ 2011-06-29 22:15:32.000000000 +0200 +++ gcc-4.6.3/gcc/config/i386/i386.h 2012-08-12 10:58:41.000000000 +0200 @@ -1375,7 +1375,8 @@ enum reg_class ((MODE) == QImode && !TARGET_64BIT \ && ((CLASS) == ALL_REGS || (CLASS) == GENERAL_REGS \ || (CLASS) == LEGACY_REGS || (CLASS) == INDEX_REGS) \ - ? Q_REGS : (CLASS)) + ? Q_REGS \ + : (CLASS) == INT_SSE_REGS ? GENERAL_REGS : (CLASS)) /* If we are copying between general and FP registers, we need a memory location. The same is true for SSE and MMX registers. */ --- gcc-4.6.3/gcc/reload.c.~1~ 2011-06-17 13:13:38.000000000 +0200 +++ gcc-4.6.3/gcc/reload.c 2012-08-12 10:58:41.000000000 +0200 @@ -711,6 +711,55 @@ find_valid_class (enum machine_mode oute return best_class; } + +/* We are trying to reload a subreg of something that is not a register. + Find the largest class which has at least one register valid in + mode MODE. OUTER is the mode of the subreg, DEST_CLASS the class in + which we would eventually like to obtain the object. */ + +static enum reg_class +find_valid_class_1 (enum machine_mode outer ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + enum reg_class dest_class ATTRIBUTE_UNUSED) +{ + int best_cost = -1; + int rclass; + int regno; + enum reg_class best_class = NO_REGS; + unsigned int best_size = 0; + int cost; + + for (rclass = 1; rclass < N_REG_CLASSES; rclass++) + { + int bad = 0; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER && !bad; regno++) + if (TEST_HARD_REG_BIT (reg_class_contents[rclass], regno) + && !HARD_REGNO_MODE_OK (regno, mode)) + bad = 1; + + if (bad) + continue; + + cost = register_move_cost (outer, (enum reg_class) rclass, dest_class); + + if ((reg_class_size[rclass] > best_size + && (best_cost < 0 || best_cost >= cost)) + || best_cost > cost) + { + best_class = (enum reg_class) rclass; + best_size = reg_class_size[rclass]; + best_cost = register_move_cost (outer, (enum reg_class) rclass, + dest_class); + } + } + + gcc_assert (best_size != 0); + +#ifdef LIMIT_RELOAD_CLASS + best_class = LIMIT_RELOAD_CLASS (mode, best_class); +#endif + return best_class; +} /* Return the number of a previously made reload that can be combined with a new one, or n_reloads if none of the existing reloads can be used. @@ -927,6 +976,8 @@ push_reload (rtx in, rtx out, rtx *inloc int secondary_in_reload = -1, secondary_out_reload = -1; enum insn_code secondary_in_icode = CODE_FOR_nothing; enum insn_code secondary_out_icode = CODE_FOR_nothing; + enum reg_class subreg_in_class ATTRIBUTE_UNUSED; + subreg_in_class = NO_REGS; /* INMODE and/or OUTMODE could be VOIDmode if no mode has been specified for the operand. In that case, @@ -1091,16 +1142,18 @@ push_reload (rtx in, rtx out, rtx *inloc if (in != 0 && reload_inner_reg_of_subreg (in, inmode, 0)) { - enum reg_class in_class = rclass; - if (REG_P (SUBREG_REG (in))) - in_class + subreg_in_class = find_valid_class (inmode, GET_MODE (SUBREG_REG (in)), subreg_regno_offset (REGNO (SUBREG_REG (in)), GET_MODE (SUBREG_REG (in)), SUBREG_BYTE (in), GET_MODE (in)), REGNO (SUBREG_REG (in))); + else if (GET_CODE (SUBREG_REG (in)) == SYMBOL_REF) + subreg_in_class = find_valid_class_1 (inmode, + GET_MODE (SUBREG_REG (in)), + rclass); /* This relies on the fact that emit_reload_insns outputs the instructions for input reloads of type RELOAD_OTHER in the same @@ -1108,7 +1161,7 @@ push_reload (rtx in, rtx out, rtx *inloc RELOAD_OTHER, we are guaranteed that this inner reload will be output before the outer reload. */ push_reload (SUBREG_REG (in), NULL_RTX, &SUBREG_REG (in), (rtx *) 0, - in_class, VOIDmode, VOIDmode, 0, 0, opnum, type); + subreg_in_class, VOIDmode, VOIDmode, 0, 0, opnum, type); dont_remove_subreg = 1; } @@ -1325,13 +1378,15 @@ push_reload (rtx in, rtx out, rtx *inloc So add an additional reload. */ #ifdef SECONDARY_MEMORY_NEEDED - /* If a memory location is needed for the copy, make one. */ - if (in != 0 + if (subreg_in_class == NO_REGS + && in != 0 && (REG_P (in) || (GET_CODE (in) == SUBREG && REG_P (SUBREG_REG (in)))) - && reg_or_subregno (in) < FIRST_PSEUDO_REGISTER - && SECONDARY_MEMORY_NEEDED (REGNO_REG_CLASS (reg_or_subregno (in)), - rclass, inmode)) + && reg_or_subregno (in) < FIRST_PSEUDO_REGISTER) + subreg_in_class = REGNO_REG_CLASS (reg_or_subregno (in)); + /* If a memory location is needed for the copy, make one. */ + if (subreg_in_class != NO_REGS + && SECONDARY_MEMORY_NEEDED (subreg_in_class, rclass, inmode)) get_secondary_mem (in, inmode, opnum, type); #endif --- gcc-4.6.3/gcc/reload1.c.~1~ 2011-11-01 19:38:42.000000000 +0100 +++ gcc-4.6.3/gcc/reload1.c 2012-08-12 10:58:41.000000000 +0200 @@ -8400,6 +8400,18 @@ emit_insn_if_valid_for_reload (rtx insn) return NULL; } +/* If X is not a subreg, return it unmodified. If it is a subreg, + look up whether we made a replacement for the SUBREG_REG. Return + either the replacement or the SUBREG_REG. */ + +static rtx +replaced_subreg (rtx x) +{ + if (GET_CODE (x) == SUBREG) + return find_replacement (&SUBREG_REG (x)); + return x; +} + /* Emit code to perform a reload from IN (which may be a reload register) to OUT (which may also be a reload register). IN or OUT is from operand OPNUM with reload type TYPE. @@ -8410,7 +8422,7 @@ static rtx gen_reload (rtx out, rtx in, int opnum, enum reload_type type) { rtx last = get_last_insn (); - rtx tem; + rtx tem, tem1, tem2; /* If IN is a paradoxical SUBREG, remove it and try to put the opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ @@ -8556,14 +8568,12 @@ gen_reload (rtx out, rtx in, int opnum, #ifdef SECONDARY_MEMORY_NEEDED /* If we need a memory location to do the move, do it that way. */ - else if ((REG_P (in) - || (GET_CODE (in) == SUBREG && REG_P (SUBREG_REG (in)))) - && reg_or_subregno (in) < FIRST_PSEUDO_REGISTER - && (REG_P (out) - || (GET_CODE (out) == SUBREG && REG_P (SUBREG_REG (out)))) - && reg_or_subregno (out) < FIRST_PSEUDO_REGISTER - && SECONDARY_MEMORY_NEEDED (REGNO_REG_CLASS (reg_or_subregno (in)), - REGNO_REG_CLASS (reg_or_subregno (out)), + else if ((tem1 = replaced_subreg (in), tem2 = replaced_subreg (out), + (REG_P (tem1) && REG_P (tem2))) + && REGNO (tem1) < FIRST_PSEUDO_REGISTER + && REGNO (tem2) < FIRST_PSEUDO_REGISTER + && SECONDARY_MEMORY_NEEDED (REGNO_REG_CLASS (REGNO (tem1)), + REGNO_REG_CLASS (REGNO (tem2)), GET_MODE (out))) { /* Get the memory to use and rewrite both registers to its mode. */ --- gcc-4.6.3/gcc/testsuite/gcc.c-torture/compile/20120727-1.c.~1~ 1970-01-01 01:00:00.000000000 +0100 +++ gcc-4.6.3/gcc/testsuite/gcc.c-torture/compile/20120727-1.c 2012-08-12 10:58:41.000000000 +0200 @@ -0,0 +1,11 @@ +union { + char *p; + float f; +} u; + +void +f (void) +{ + u.p = ""; + u.f += 1.1; +}