#! /bin/sh -e

# DP: Emit cld instruction when stringops are used (i386).

dir=
if [ $# -eq 3 -a "$2" = '-d' ]; then
    pdir="-d $3"
    dir="$3/"
elif [ $# -ne 1 ]; then
    echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
    exit 1
fi
case "$1" in
    -patch)
        patch $pdir -f --no-backup-if-mismatch -p0 < $0
        ;;
    -unpatch)
        patch $pdir -f --no-backup-if-mismatch -R -p0 < $0
        ;;
    *)
        echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
        exit 1
esac
exit 0

2008-03-06  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.h (TARGET_CLD): New define.
	(struct machine_function): Add needs_cld field.
	(ix86_current_function_needs_cld): New define.
	* config/i386/i386.md (UNSPEC_CLD): New unspec volatile constant.
	("cld"): New isns pattern.
	("strmov_singleop"): Set ix86_current_function_needs_cld flag.
	("rep_mov"): Ditto.
	("strset_singleop"): Ditto.
	("rep_stos"): Ditto.
	("cmpstrnqi_nz_1"): Ditto.
	("cmpstrnqi_1"): Ditto.
	("strlenqi_1"): Ditto.
	* config/i386/i386.c (ix86_expand_prologue): Emit cld insn for
	TARGET_CLD when ix86_current_function_needs_cld is set.

Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h	(revision 132966)
+++ gcc/config/i386/i386.h	(working copy)
@@ -388,6 +388,7 @@ extern unsigned int ix86_arch_features[X
 
 extern int x86_prefetch_sse;
 
+#define TARGET_CLD		1
 #define TARGET_ABM		x86_abm
 #define TARGET_CMPXCHG16B	x86_cmpxchg16b
 #define TARGET_POPCNT		x86_popcnt
@@ -2446,8 +2447,9 @@ struct machine_function GTY(())
   int save_varrargs_registers;
   int accesses_prev_frame;
   int optimize_mode_switching[MAX_386_ENTITIES];
-  /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to
-     determine the style used.  */
+  int needs_cld;
+  /* Set by ix86_compute_frame_layout and used by prologue/epilogue
+     expander to determine the style used.  */
   int use_fast_prologue_epilogue;
   /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed
      for.  */
@@ -2467,6 +2469,7 @@ struct machine_function GTY(())
 #define ix86_stack_locals (cfun->machine->stack_locals)
 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
 #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_current_function_needs_cld (cfun->machine->needs_cld)
 #define ix86_tls_descriptor_calls_expanded_in_cfun \
   (cfun->machine->tls_descriptor_call_expanded_p)
 /* Since tls_descriptor_call_expanded is not cleared, even if all TLS
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md	(revision 132966)
+++ gcc/config/i386/i386.md	(working copy)
@@ -205,6 +205,7 @@
    (UNSPECV_XCHG		12)
    (UNSPECV_LOCK		13)
    (UNSPECV_PROLOGUE_USE	14)
+   (UNSPECV_CLD			15)
   ])
 
 ;; Constants to represent pcomtrue/pcomfalse variants
@@ -18519,6 +18520,14 @@
 
 ;; Block operation instructions
 
+(define_insn "cld"
+  [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
+  ""
+  "cld"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
 (define_expand "movmemsi"
   [(use (match_operand:BLK 0 "memory_operand" ""))
    (use (match_operand:BLK 1 "memory_operand" ""))
@@ -18591,7 +18600,7 @@
 	      (set (match_operand 2 "register_operand" "")
 		   (match_operand 5 "" ""))])]
   "TARGET_SINGLE_STRINGOP || optimize_size"
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*strmovdi_rex_1"
   [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
@@ -18708,7 +18717,7 @@
 		   (match_operand 3 "memory_operand" ""))
 	      (use (match_dup 4))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*rep_movdi_rex64"
   [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
@@ -18868,7 +18877,7 @@
 	      (set (match_operand 0 "register_operand" "")
 		   (match_operand 3 "" ""))])]
   "TARGET_SINGLE_STRINGOP || optimize_size"
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*strsetdi_rex_1"
   [(set (mem:DI (match_operand:DI 1 "register_operand" "0"))
@@ -18962,7 +18971,7 @@
 	      (use (match_operand 3 "register_operand" ""))
 	      (use (match_dup 1))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*rep_stosdi_rex64"
   [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
@@ -19138,7 +19147,7 @@
 	      (clobber (match_operand 1 "register_operand" ""))
 	      (clobber (match_dup 2))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*cmpstrnqi_nz_1"
   [(set (reg:CC FLAGS_REG)
@@ -19185,7 +19194,7 @@
 	      (clobber (match_operand 1 "register_operand" ""))
 	      (clobber (match_dup 2))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*cmpstrnqi_1"
   [(set (reg:CC FLAGS_REG)
@@ -19254,7 +19263,7 @@
 	      (clobber (match_operand 1 "register_operand" ""))
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*strlenqi_1"
   [(set (match_operand:SI 0 "register_operand" "=&c")
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 132966)
+++ gcc/config/i386/i386.c	(working copy)
@@ -6498,6 +6498,10 @@ ix86_expand_prologue (void)
 	emit_insn (gen_prologue_use (pic_offset_table_rtx));
       emit_insn (gen_blockage ());
     }
+
+  /* Emit cld instruction if stringops are used in the function.  */
+  if (TARGET_CLD && ix86_current_function_needs_cld)
+    emit_insn (gen_cld ());
 }
 
 /* Emit code to restore saved registers using MOV insns.  First register