[backport from gcc-4.8/trunk r192378 ] From: Steven Bosscher Date: Tue, 9 Oct 2012 23:24:54 +0200 Subject: [patch][IRA] Apply LRA lessons-learned to IRA List-Archive: Hello, For LRA, compressing the live range chains proved to be quite helpful. The same can be done for IRA, as in the attached patch. For the test case of PR54146 the effect is time spent in IRA cut in half: without patch: integrated RA : 206.35 (28%) with patch: integrated RA : 106.96 (16%) The peak memory usage also drops a few 100MB. There is a slight measurable effect on my collection of cc1-i files, mostly due to small speed-ups on the big generated insn-* files. I'm going to bootstrap&test this on x86_64-unknown-linux-gnu and powerpc64-unknown-linux-gnu. OK if that passes? Ciao! Steven gcc/ 2012-10-11 Steven Bosscher * ira-build.c (ira_loop_tree_body_rev_postorder): New function. (ira_traverse_loop_tree): Traverse a loop's basic blocks in reverse post-order of the reversed control-flow direction. * ira-conflicts.c (ira_build_conflicts): Pass add_copies as the pre-order function to ira_traverse_loop_tree to preserve the existing semantics. * ira-lives.c (remove_some_program_points_and_update_live_ranges): Squeeze out live range chain elements if their program points are connected. --- gcc-4.7.2/gcc/ira-build.c.~1~ 2012-08-21 15:34:19.000000000 +0200 +++ gcc-4.7.2/gcc/ira-build.c 2012-10-13 10:48:56.000000000 +0200 @@ -1457,6 +1457,96 @@ finish_cost_vectors (void) +/* Compute a post-ordering of the reverse control flow of the loop body + designated by the children nodes of LOOP_NODE, whose body nodes in + pre-order are input as LOOP_PREORDER. Return a VEC with a post-order + of the reverse loop body. + + For the post-order of the reverse CFG, we visit the basic blocks in + LOOP_PREORDER array in the reverse order of where they appear. + This is important: We do not just want to compute a post-order of + the reverse CFG, we want to make a best-guess for a visiting order that + minimizes the number of chain elements per allocno live range. If the + blocks would be visited in a different order, we would still compute a + correct post-ordering but it would be less likely that two nodes + connected by an edge in the CFG are neighbours in the topsort. */ + +static VEC (ira_loop_tree_node_t, heap) * +ira_loop_tree_body_rev_postorder (ira_loop_tree_node_t loop_node ATTRIBUTE_UNUSED, + VEC (ira_loop_tree_node_t, heap) *loop_preorder) +{ + VEC (ira_loop_tree_node_t, heap) *topsort_nodes = NULL; + unsigned int n_loop_preorder; + + n_loop_preorder = VEC_length (ira_loop_tree_node_t, loop_preorder); + if (n_loop_preorder != 0) + { + ira_loop_tree_node_t subloop_node; + unsigned int i; + VEC (ira_loop_tree_node_t, heap) *dfs_stack; + + /* This is a bit of strange abuse of the BB_VISITED flag: We use + the flag to mark blocks we still have to visit to add them to + our post-order. Define an alias to avoid confusion. */ +#define BB_TO_VISIT BB_VISITED + + FOR_EACH_VEC_ELT (ira_loop_tree_node_t, loop_preorder, i, subloop_node) + { + gcc_checking_assert (! (subloop_node->bb->flags & BB_TO_VISIT)); + subloop_node->bb->flags |= BB_TO_VISIT; + } + + topsort_nodes = VEC_alloc (ira_loop_tree_node_t, heap, n_loop_preorder); + dfs_stack = VEC_alloc (ira_loop_tree_node_t, heap, n_loop_preorder); + + FOR_EACH_VEC_ELT_REVERSE (ira_loop_tree_node_t, loop_preorder, + i, subloop_node) + { + if (! (subloop_node->bb->flags & BB_TO_VISIT)) + continue; + + subloop_node->bb->flags &= ~BB_TO_VISIT; + VEC_quick_push (ira_loop_tree_node_t, dfs_stack, subloop_node); + while (! VEC_empty (ira_loop_tree_node_t, dfs_stack)) + { + edge e; + edge_iterator ei; + + ira_loop_tree_node_t n = VEC_last (ira_loop_tree_node_t, + dfs_stack); + FOR_EACH_EDGE (e, ei, n->bb->preds) + { + ira_loop_tree_node_t pred_node; + basic_block pred_bb = e->src; + + if (e->src == ENTRY_BLOCK_PTR) + continue; + + pred_node = IRA_BB_NODE_BY_INDEX (pred_bb->index); + if (pred_node != n + && (pred_node->bb->flags & BB_TO_VISIT)) + { + pred_node->bb->flags &= ~BB_TO_VISIT; + VEC_quick_push (ira_loop_tree_node_t, dfs_stack, pred_node); + } + } + if (n == VEC_last (ira_loop_tree_node_t, dfs_stack)) + { + VEC_pop (ira_loop_tree_node_t, dfs_stack); + VEC_quick_push (ira_loop_tree_node_t, topsort_nodes, n); + } + } + } + +#undef BB_TO_VISIT + VEC_free (ira_loop_tree_node_t, heap, dfs_stack); + } + + gcc_assert (VEC_length (ira_loop_tree_node_t, topsort_nodes) + == n_loop_preorder); + return topsort_nodes; +} + /* The current loop tree node and its regno allocno map. */ ira_loop_tree_node_t ira_curr_loop_tree_node; ira_allocno_t *ira_curr_regno_allocno_map; @@ -1466,7 +1556,16 @@ ira_allocno_t *ira_curr_regno_allocno_ma correspondingly in preorder and postorder. The function sets up IRA_CURR_LOOP_TREE_NODE and IRA_CURR_REGNO_ALLOCNO_MAP. If BB_P, basic block nodes of LOOP_NODE is also processed (before its - subloop nodes). */ + subloop nodes). + + If BB_P is set and POSTORDER_FUNC is given, the basic blocks in + the loop are passed in the *reverse* post-order of the *reverse* + CFG. This is only used by ira_create_allocno_live_ranges, which + wants to visit basic blocks in this order to minimize the number + of elements per live range chain. + Note that the loop tree nodes are still visited in the normal, + forward post-order of the loop tree. */ + void ira_traverse_loop_tree (bool bb_p, ira_loop_tree_node_t loop_node, void (*preorder_func) (ira_loop_tree_node_t), @@ -1482,18 +1581,37 @@ ira_traverse_loop_tree (bool bb_p, ira_l (*preorder_func) (loop_node); if (bb_p) - for (subloop_node = loop_node->children; - subloop_node != NULL; - subloop_node = subloop_node->next) - if (subloop_node->bb != NULL) - { - if (preorder_func != NULL) - (*preorder_func) (subloop_node); + { + VEC (ira_loop_tree_node_t, heap) *loop_preorder = NULL; + unsigned int i; - if (postorder_func != NULL) + /* Add all nodes to the set of nodes to visit. The IRA loop tree + is set up such that nodes in the loop body appear in a pre-order + of their place in the CFG. */ + for (subloop_node = loop_node->children; + subloop_node != NULL; + subloop_node = subloop_node->next) + if (subloop_node->bb != NULL) + VEC_safe_push (ira_loop_tree_node_t, heap, + loop_preorder, subloop_node); + + if (preorder_func != NULL) + FOR_EACH_VEC_ELT (ira_loop_tree_node_t, loop_preorder, i, subloop_node) + (*preorder_func) (subloop_node); + + if (postorder_func != NULL) + { + VEC (ira_loop_tree_node_t, heap) *loop_rev_postorder = + ira_loop_tree_body_rev_postorder (loop_node, loop_preorder); + FOR_EACH_VEC_ELT_REVERSE (ira_loop_tree_node_t, loop_rev_postorder, + i, subloop_node) (*postorder_func) (subloop_node); + VEC_free (ira_loop_tree_node_t, heap, loop_rev_postorder); } + VEC_free (ira_loop_tree_node_t, heap, loop_preorder); + } + for (subloop_node = loop_node->subloops; subloop_node != NULL; subloop_node = subloop_node->subloop_next) --- gcc-4.7.2/gcc/ira-conflicts.c.~1~ 2012-01-19 21:46:31.000000000 +0100 +++ gcc-4.7.2/gcc/ira-conflicts.c 2012-10-13 10:48:56.000000000 +0200 @@ -860,7 +860,7 @@ ira_build_conflicts (void) ira_object_iterator oi; build_conflicts (); - ira_traverse_loop_tree (true, ira_loop_tree_root, NULL, add_copies); + ira_traverse_loop_tree (true, ira_loop_tree_root, add_copies, NULL); /* We need finished conflict table for the subsequent call. */ if (flag_ira_region == IRA_REGION_ALL || flag_ira_region == IRA_REGION_MIXED) --- gcc-4.7.2/gcc/ira-lives.c.~1~ 2012-01-19 21:46:31.000000000 +0100 +++ gcc-4.7.2/gcc/ira-lives.c 2012-10-13 10:48:56.000000000 +0200 @@ -1368,7 +1368,7 @@ remove_some_program_points_and_update_li int *map; ira_object_t obj; ira_object_iterator oi; - live_range_t r; + live_range_t r, prev_r, next_r; sbitmap born_or_dead, born, dead; sbitmap_iterator sbi; bool born_p, dead_p, prev_born_p, prev_dead_p; @@ -1412,10 +1412,19 @@ remove_some_program_points_and_update_li ira_max_point = n; FOR_EACH_OBJECT (obj, oi) - for (r = OBJECT_LIVE_RANGES (obj); r != NULL; r = r->next) + for (r = OBJECT_LIVE_RANGES (obj), prev_r = NULL; r != NULL; r = next_r) { + next_r = r->next; r->start = map[r->start]; r->finish = map[r->finish]; + if (prev_r == NULL || prev_r->start > r->finish + 1) + { + prev_r = r; + continue; + } + prev_r->start = r->start; + prev_r->next = next_r; + ira_finish_live_range (r); } ira_free (map);