• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
Keine Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision6a5f6dde04a8e287c3552315127d9bc85124d769 (tree)
Zeit2015-04-24 22:05:50
Autoruros <uros@138b...>
Commiteruros

Log Message

* config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New.
* config/i386/i386.c (extract_base_offset_in_addr): New function.
(ix86_operands_ok_for_move_multiple): Ditto.
* config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern.
(movlpd/movhpd to movupd peephole2): Ditto.

testsuite/ChangeLog:

* gcc.target/i386/sse2-load-multi.c: New test.
* gcc.target/i386/sse2-store-multi.c: Ditto.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222410 138bc75d-0d04-0410-961f-82ee72b054a4

Ändern Zusammenfassung

Diff

--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
1+2015-04-24 Uros Bizjak <ubizjak@gmail.com>
2+ Wei Mi <wmi@google.com>
3+
4+ * config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New.
5+ * config/i386/i386.c (extract_base_offset_in_addr): New function.
6+ (ix86_operands_ok_for_move_multiple): Ditto.
7+ * config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern.
8+ (movlpd/movhpd to movupd peephole2): Ditto.
9+
110 2015-04-24 Marek Polacek <polacek@redhat.com>
211
312 PR c/61534
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -304,6 +304,8 @@ extern enum attr_cpu ix86_schedule;
304304 #endif
305305
306306 extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
307+extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
308+ enum machine_mode mode);
307309
308310 #ifdef RTX_CODE
309311 /* Target data for multipass lookahead scheduling.
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -51726,6 +51726,92 @@ ix86_binds_local_p (const_tree exp)
5172651726 }
5172751727 #endif
5172851728
51729+/* If MEM is in the form of [base+offset], extract the two parts
51730+ of address and set to BASE and OFFSET, otherwise return false. */
51731+
51732+static bool
51733+extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51734+{
51735+ rtx addr;
51736+
51737+ gcc_assert (MEM_P (mem));
51738+
51739+ addr = XEXP (mem, 0);
51740+
51741+ if (GET_CODE (addr) == CONST)
51742+ addr = XEXP (addr, 0);
51743+
51744+ if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51745+ {
51746+ *base = addr;
51747+ *offset = const0_rtx;
51748+ return true;
51749+ }
51750+
51751+ if (GET_CODE (addr) == PLUS
51752+ && (REG_P (XEXP (addr, 0))
51753+ || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51754+ && CONST_INT_P (XEXP (addr, 1)))
51755+ {
51756+ *base = XEXP (addr, 0);
51757+ *offset = XEXP (addr, 1);
51758+ return true;
51759+ }
51760+
51761+ return false;
51762+}
51763+
51764+/* Given OPERANDS of consecutive load/store, check if we can merge
51765+ them into move multiple. LOAD is true if they are load instructions.
51766+ MODE is the mode of memory operands. */
51767+
51768+bool
51769+ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51770+ enum machine_mode mode)
51771+{
51772+ HOST_WIDE_INT offval_1, offval_2, msize;
51773+ rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51774+
51775+ if (load)
51776+ {
51777+ mem_1 = operands[1];
51778+ mem_2 = operands[3];
51779+ reg_1 = operands[0];
51780+ reg_2 = operands[2];
51781+ }
51782+ else
51783+ {
51784+ mem_1 = operands[0];
51785+ mem_2 = operands[2];
51786+ reg_1 = operands[1];
51787+ reg_2 = operands[3];
51788+ }
51789+
51790+ gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51791+
51792+ if (REGNO (reg_1) != REGNO (reg_2))
51793+ return false;
51794+
51795+ /* Check if the addresses are in the form of [base+offset]. */
51796+ if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51797+ return false;
51798+ if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51799+ return false;
51800+
51801+ /* Check if the bases are the same. */
51802+ if (!rtx_equal_p (base_1, base_2))
51803+ return false;
51804+
51805+ offval_1 = INTVAL (offset_1);
51806+ offval_2 = INTVAL (offset_2);
51807+ msize = GET_MODE_SIZE (mode);
51808+ /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51809+ if (offval_1 + msize != offval_2)
51810+ return false;
51811+
51812+ return true;
51813+}
51814+
5172951815 /* Initialize the GCC target structure. */
5173051816 #undef TARGET_RETURN_IN_MEMORY
5173151817 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1183,6 +1183,21 @@
11831183 ]
11841184 (const_string "<MODE>")))])
11851185
1186+;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1187+(define_peephole2
1188+ [(set (match_operand:V2DF 0 "register_operand")
1189+ (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1190+ (match_operand:DF 4 "const0_operand")))
1191+ (set (match_operand:V2DF 2 "register_operand")
1192+ (vec_concat:V2DF (vec_select:DF (match_dup 2)
1193+ (parallel [(const_int 0)]))
1194+ (match_operand:DF 3 "memory_operand")))]
1195+ "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1196+ && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1197+ [(set (match_dup 2)
1198+ (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
1199+ "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
1200+
11861201 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
11871202 [(set (match_operand:VF 0 "memory_operand" "=m")
11881203 (unspec:VF
@@ -1242,6 +1257,20 @@
12421257 (set_attr "prefix" "evex")
12431258 (set_attr "mode" "<sseinsnmode>")])
12441259
1260+;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1261+(define_peephole2
1262+ [(set (match_operand:DF 0 "memory_operand")
1263+ (vec_select:DF (match_operand:V2DF 1 "register_operand")
1264+ (parallel [(const_int 0)])))
1265+ (set (match_operand:DF 2 "memory_operand")
1266+ (vec_select:DF (match_operand:V2DF 3 "register_operand")
1267+ (parallel [(const_int 1)])))]
1268+ "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1269+ && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1270+ [(set (match_dup 4)
1271+ (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
1272+ "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1273+
12451274 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
12461275 just fine if misaligned_operand is true, and without the UNSPEC it can
12471276 be combined with arithmetic instructions. If misaligned_operand is
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
1+2015-04-24 Uros Bizjak <ubizjak@gmail.com>
2+ Wei Mi <wmi@google.com>
3+
4+ * gcc.target/i386/sse2-load-multi.c: New test.
5+ * gcc.target/i386/sse2-store-multi.c: Ditto.
6+
17 2015-04-24 Marek Polacek <polacek@redhat.com>
28
39 PR c/65830
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-load-multi.c
@@ -0,0 +1,24 @@
1+/* { dg-do compile } */
2+/* { dg-options "-march=corei7 -O2" } */
3+
4+#include <emmintrin.h>
5+
6+double a[8];
7+
8+__m128d load_1 ()
9+{
10+ __m128d res;
11+ res = _mm_load_sd (&a[1]);
12+ res = _mm_loadh_pd (res, &a[2]);
13+ return res;
14+}
15+
16+__m128d load_2 (double *a)
17+{
18+ __m128d res;
19+ res = _mm_load_sd (&a[1]);
20+ res = _mm_loadh_pd (res, &a[2]);
21+ return res;
22+}
23+
24+/* { dg-final { scan-assembler-times "movup" 2 } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-store-multi.c
@@ -0,0 +1,20 @@
1+/* { dg-do compile } */
2+/* { dg-options "-march=corei7 -O2" } */
3+
4+#include <emmintrin.h>
5+
6+double a[8];
7+
8+void store_1 (__m128d val)
9+{
10+ _mm_store_sd (&a[1], val);
11+ _mm_storeh_pd (&a[2], val);
12+}
13+
14+void store_2 (__m128d val, double *a)
15+{
16+ _mm_store_sd (&a[1], val);
17+ _mm_storeh_pd (&a[2], val);
18+}
19+
20+/* { dg-final { scan-assembler-times "movup" 2 } } */