Revision | 6a5f6dde04a8e287c3552315127d9bc85124d769 (tree) |
---|---|
Zeit | 2015-04-24 22:05:50 |
Autor | uros <uros@138b...> |
Commiter | uros |
* config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New.
* config/i386/i386.c (extract_base_offset_in_addr): New function.
(ix86_operands_ok_for_move_multiple): Ditto.
* config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern.
(movlpd/movhpd to movupd peephole2): Ditto.
testsuite/ChangeLog:
* gcc.target/i386/sse2-load-multi.c: New test.
* gcc.target/i386/sse2-store-multi.c: Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222410 138bc75d-0d04-0410-961f-82ee72b054a4
@@ -1,3 +1,12 @@ | ||
1 | +2015-04-24 Uros Bizjak <ubizjak@gmail.com> | |
2 | + Wei Mi <wmi@google.com> | |
3 | + | |
4 | + * config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New. | |
5 | + * config/i386/i386.c (extract_base_offset_in_addr): New function. | |
6 | + (ix86_operands_ok_for_move_multiple): Ditto. | |
7 | + * config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern. | |
8 | + (movlpd/movhpd to movupd peephole2): Ditto. | |
9 | + | |
1 | 10 | 2015-04-24 Marek Polacek <polacek@redhat.com> |
2 | 11 | |
3 | 12 | PR c/61534 |
@@ -304,6 +304,8 @@ extern enum attr_cpu ix86_schedule; | ||
304 | 304 | #endif |
305 | 305 | |
306 | 306 | extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op); |
307 | +extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load, | |
308 | + enum machine_mode mode); | |
307 | 309 | |
308 | 310 | #ifdef RTX_CODE |
309 | 311 | /* Target data for multipass lookahead scheduling. |
@@ -51726,6 +51726,92 @@ ix86_binds_local_p (const_tree exp) | ||
51726 | 51726 | } |
51727 | 51727 | #endif |
51728 | 51728 | |
51729 | +/* If MEM is in the form of [base+offset], extract the two parts | |
51730 | + of address and set to BASE and OFFSET, otherwise return false. */ | |
51731 | + | |
51732 | +static bool | |
51733 | +extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) | |
51734 | +{ | |
51735 | + rtx addr; | |
51736 | + | |
51737 | + gcc_assert (MEM_P (mem)); | |
51738 | + | |
51739 | + addr = XEXP (mem, 0); | |
51740 | + | |
51741 | + if (GET_CODE (addr) == CONST) | |
51742 | + addr = XEXP (addr, 0); | |
51743 | + | |
51744 | + if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF) | |
51745 | + { | |
51746 | + *base = addr; | |
51747 | + *offset = const0_rtx; | |
51748 | + return true; | |
51749 | + } | |
51750 | + | |
51751 | + if (GET_CODE (addr) == PLUS | |
51752 | + && (REG_P (XEXP (addr, 0)) | |
51753 | + || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) | |
51754 | + && CONST_INT_P (XEXP (addr, 1))) | |
51755 | + { | |
51756 | + *base = XEXP (addr, 0); | |
51757 | + *offset = XEXP (addr, 1); | |
51758 | + return true; | |
51759 | + } | |
51760 | + | |
51761 | + return false; | |
51762 | +} | |
51763 | + | |
51764 | +/* Given OPERANDS of consecutive load/store, check if we can merge | |
51765 | + them into move multiple. LOAD is true if they are load instructions. | |
51766 | + MODE is the mode of memory operands. */ | |
51767 | + | |
51768 | +bool | |
51769 | +ix86_operands_ok_for_move_multiple (rtx *operands, bool load, | |
51770 | + enum machine_mode mode) | |
51771 | +{ | |
51772 | + HOST_WIDE_INT offval_1, offval_2, msize; | |
51773 | + rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2; | |
51774 | + | |
51775 | + if (load) | |
51776 | + { | |
51777 | + mem_1 = operands[1]; | |
51778 | + mem_2 = operands[3]; | |
51779 | + reg_1 = operands[0]; | |
51780 | + reg_2 = operands[2]; | |
51781 | + } | |
51782 | + else | |
51783 | + { | |
51784 | + mem_1 = operands[0]; | |
51785 | + mem_2 = operands[2]; | |
51786 | + reg_1 = operands[1]; | |
51787 | + reg_2 = operands[3]; | |
51788 | + } | |
51789 | + | |
51790 | + gcc_assert (REG_P (reg_1) && REG_P (reg_2)); | |
51791 | + | |
51792 | + if (REGNO (reg_1) != REGNO (reg_2)) | |
51793 | + return false; | |
51794 | + | |
51795 | + /* Check if the addresses are in the form of [base+offset]. */ | |
51796 | + if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1)) | |
51797 | + return false; | |
51798 | + if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2)) | |
51799 | + return false; | |
51800 | + | |
51801 | + /* Check if the bases are the same. */ | |
51802 | + if (!rtx_equal_p (base_1, base_2)) | |
51803 | + return false; | |
51804 | + | |
51805 | + offval_1 = INTVAL (offset_1); | |
51806 | + offval_2 = INTVAL (offset_2); | |
51807 | + msize = GET_MODE_SIZE (mode); | |
51808 | + /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */ | |
51809 | + if (offval_1 + msize != offval_2) | |
51810 | + return false; | |
51811 | + | |
51812 | + return true; | |
51813 | +} | |
51814 | + | |
51729 | 51815 | /* Initialize the GCC target structure. */ |
51730 | 51816 | #undef TARGET_RETURN_IN_MEMORY |
51731 | 51817 | #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory |
@@ -1183,6 +1183,21 @@ | ||
1183 | 1183 | ] |
1184 | 1184 | (const_string "<MODE>")))]) |
1185 | 1185 | |
1186 | +;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets. | |
1187 | +(define_peephole2 | |
1188 | + [(set (match_operand:V2DF 0 "register_operand") | |
1189 | + (vec_concat:V2DF (match_operand:DF 1 "memory_operand") | |
1190 | + (match_operand:DF 4 "const0_operand"))) | |
1191 | + (set (match_operand:V2DF 2 "register_operand") | |
1192 | + (vec_concat:V2DF (vec_select:DF (match_dup 2) | |
1193 | + (parallel [(const_int 0)])) | |
1194 | + (match_operand:DF 3 "memory_operand")))] | |
1195 | + "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL | |
1196 | + && ix86_operands_ok_for_move_multiple (operands, true, DFmode)" | |
1197 | + [(set (match_dup 2) | |
1198 | + (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))] | |
1199 | + "operands[4] = adjust_address (operands[1], V2DFmode, 0);") | |
1200 | + | |
1186 | 1201 | (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>" |
1187 | 1202 | [(set (match_operand:VF 0 "memory_operand" "=m") |
1188 | 1203 | (unspec:VF |
@@ -1242,6 +1257,20 @@ | ||
1242 | 1257 | (set_attr "prefix" "evex") |
1243 | 1258 | (set_attr "mode" "<sseinsnmode>")]) |
1244 | 1259 | |
1260 | +;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets. | |
1261 | +(define_peephole2 | |
1262 | + [(set (match_operand:DF 0 "memory_operand") | |
1263 | + (vec_select:DF (match_operand:V2DF 1 "register_operand") | |
1264 | + (parallel [(const_int 0)]))) | |
1265 | + (set (match_operand:DF 2 "memory_operand") | |
1266 | + (vec_select:DF (match_operand:V2DF 3 "register_operand") | |
1267 | + (parallel [(const_int 1)])))] | |
1268 | + "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL | |
1269 | + && ix86_operands_ok_for_move_multiple (operands, false, DFmode)" | |
1270 | + [(set (match_dup 4) | |
1271 | + (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))] | |
1272 | + "operands[4] = adjust_address (operands[0], V2DFmode, 0);") | |
1273 | + | |
1245 | 1274 | /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads |
1246 | 1275 | just fine if misaligned_operand is true, and without the UNSPEC it can |
1247 | 1276 | be combined with arithmetic instructions. If misaligned_operand is |
@@ -1,3 +1,9 @@ | ||
1 | +2015-04-24 Uros Bizjak <ubizjak@gmail.com> | |
2 | + Wei Mi <wmi@google.com> | |
3 | + | |
4 | + * gcc.target/i386/sse2-load-multi.c: New test. | |
5 | + * gcc.target/i386/sse2-store-multi.c: Ditto. | |
6 | + | |
1 | 7 | 2015-04-24 Marek Polacek <polacek@redhat.com> |
2 | 8 | |
3 | 9 | PR c/65830 |
@@ -0,0 +1,24 @@ | ||
1 | +/* { dg-do compile } */ | |
2 | +/* { dg-options "-march=corei7 -O2" } */ | |
3 | + | |
4 | +#include <emmintrin.h> | |
5 | + | |
6 | +double a[8]; | |
7 | + | |
8 | +__m128d load_1 () | |
9 | +{ | |
10 | + __m128d res; | |
11 | + res = _mm_load_sd (&a[1]); | |
12 | + res = _mm_loadh_pd (res, &a[2]); | |
13 | + return res; | |
14 | +} | |
15 | + | |
16 | +__m128d load_2 (double *a) | |
17 | +{ | |
18 | + __m128d res; | |
19 | + res = _mm_load_sd (&a[1]); | |
20 | + res = _mm_loadh_pd (res, &a[2]); | |
21 | + return res; | |
22 | +} | |
23 | + | |
24 | +/* { dg-final { scan-assembler-times "movup" 2 } } */ |
@@ -0,0 +1,20 @@ | ||
1 | +/* { dg-do compile } */ | |
2 | +/* { dg-options "-march=corei7 -O2" } */ | |
3 | + | |
4 | +#include <emmintrin.h> | |
5 | + | |
6 | +double a[8]; | |
7 | + | |
8 | +void store_1 (__m128d val) | |
9 | +{ | |
10 | + _mm_store_sd (&a[1], val); | |
11 | + _mm_storeh_pd (&a[2], val); | |
12 | +} | |
13 | + | |
14 | +void store_2 (__m128d val, double *a) | |
15 | +{ | |
16 | + _mm_store_sd (&a[1], val); | |
17 | + _mm_storeh_pd (&a[2], val); | |
18 | +} | |
19 | + | |
20 | +/* { dg-final { scan-assembler-times "movup" 2 } } */ |