[backport proposed gcc-4.7 ARM NEON scheduling tweak to gcc-4.6 (doesn't apply cleanly to gcc-4.5 alas) ] Date: Tue, 22 Feb 2011 12:27:08 +0000 From: Andrew Stubbs Subject: [PATCH][ARM] NEON scheduling tweak List-Archive: The attached is an old patch, originally by Mark Shinwell, that has been used in CodeSourcery toolchains for some time. I've forward ported it to GCC 4.6, and benchmarked it. I find that it gives >2% (geomean) speed improvement in a popular benchmark running on Cortex-A9 NEON (Panda board). OK for stage 1? Andrew 2011-02-22 Andrew Stubbs Mark Shinwell gcc/ * config/arm/vfp.md (*arm_movsi_vfp, *thumb2_movsi_vfp) (*arm_movdi_vfp, *thumb2_movdi_vfp, *movsf_vfp, *thumb2_movsf_vfp) (*movdf_vfp, *thumb2_movdf_vfp, *movsfcc_vfp, *thumb2_movsfcc_vfp) (*movdfcc_vfp, *thumb2_movdfcc_vfp): Add neon_type. * config/arm/arm.md (neon_type): Update comment. --- gcc-4.6-20110212/gcc/config/arm/arm.md.~1~ 2011-01-20 23:03:29.000000000 +0100 +++ gcc-4.6-20110212/gcc/config/arm/arm.md 2011-02-26 17:44:07.000000000 +0100 @@ -337,8 +337,6 @@ (define_attr "type" (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) ;; Classification of NEON instructions for scheduling purposes. -;; Do not set this attribute and the "type" attribute together in -;; any one instruction pattern. (define_attr "neon_type" "neon_int_1,\ neon_int_2,\ --- gcc-4.6-20110212/gcc/config/arm/vfp.md.~1~ 2011-01-20 23:03:29.000000000 +0100 +++ gcc-4.6-20110212/gcc/config/arm/vfp.md 2011-02-26 17:44:07.000000000 +0100 @@ -83,6 +83,7 @@ (define_insn "*arm_movsi_vfp" " [(set_attr "predicable" "yes") (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*") (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] @@ -125,6 +126,7 @@ (define_insn "*thumb2_movsi_vfp" " [(set_attr "predicable" "yes") (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*") (set_attr "pool_range" "*,*,*,*,1020,4096,*,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] @@ -163,6 +165,7 @@ (define_insn "*arm_movdi_vfp" } " [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") + (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) (eq_attr "alternative" "5") (if_then_else @@ -201,6 +204,7 @@ (define_insn "*thumb2_movdi_vfp" } " [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") + (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) (eq_attr "alternative" "5") (if_then_else @@ -355,6 +359,7 @@ (define_insn "*movsf_vfp" [(set_attr "predicable" "yes") (set_attr "type" "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") + (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") (set_attr "insn" "*,*,*,*,*,*,*,*,mov") (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")] @@ -392,6 +397,7 @@ (define_insn "*thumb2_movsf_vfp" [(set_attr "predicable" "yes") (set_attr "type" "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") + (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") (set_attr "insn" "*,*,*,*,*,*,*,*,mov") (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] @@ -435,6 +441,7 @@ (define_insn "*movdf_vfp" " [(set_attr "type" "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") + (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*") (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) (eq_attr "alternative" "7") (if_then_else @@ -479,6 +486,7 @@ (define_insn "*thumb2_movdf_vfp" " [(set_attr "type" "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*") + (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*") (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) (eq_attr "alternative" "7") (if_then_else @@ -514,7 +522,8 @@ (define_insn "*movsfcc_vfp" fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") + (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")] ) (define_insn "*thumb2_movsfcc_vfp" @@ -537,7 +546,8 @@ (define_insn "*thumb2_movsfcc_vfp" ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") + (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")] ) (define_insn "*movdfcc_vfp" @@ -560,7 +570,8 @@ (define_insn "*movdfcc_vfp" fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") + (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")] ) (define_insn "*thumb2_movdfcc_vfp" @@ -583,7 +594,8 @@ (define_insn "*thumb2_movdfcc_vfp" ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" [(set_attr "conds" "use") (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") + (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")] )