From: Mans Rullgard Date: Tue, 29 Jul 2008 21:13:14 +0000 (+0100) Subject: ARM: work around Cortex-A8 erratum 451034 X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=c6bbb0c33f6f681b8265a43f8744735de5a9d45e ARM: work around Cortex-A8 erratum 451034 On Cortex-A8 r1p0 and r1p1, executing a NEON store with an integer store in the store buffer, can cause a processor deadlock under certain conditions. A DMB instruction at the start of every NEON function ensures that the integer store buffer is always empty before executing any NEON store, thus avoiding the deadlock condition. See ARM Cortex-A8 Errata Notice (PR120-PRDC-008070) for full details. --- diff --git b/mythtv/libs/libavcodec/armv4l/dsputil_neon.c b/mythtv/libs/libavcodec/armv4l/dsputil_neon.c index fa0602d..4fbadfc 100644 --- b/mythtv/libs/libavcodec/armv4l/dsputil_neon.c +++ b/mythtv/libs/libavcodec/armv4l/dsputil_neon.c @@ -28,6 +28,7 @@ extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); #define PUT_PIXELS_16_X2(vhadd) \ + "dmb \n\t" \ "1: \n\t" \ "vld1.64 {d0,d1,d2}, [%[p]], %[line_size] \n\t" \ "vld1.64 {d4,d5,d6}, [%[p]], %[line_size] \n\t" \ @@ -46,6 +47,7 @@ extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, "lsl %[l2], %[line_size], #1 \n\t" \ "vld1.64 {d0,d1}, [%[p0]], %[l2] \n\t" \ "vld1.64 {d2,d3}, [%[p1]], %[l2] \n\t" \ + "dmb \n\t" \ "1: \n\t" \ "subs %[h], %[h], #2 \n\t" \ vhadd".u8 q2, q0, q1 \n\t" \ @@ -69,6 +71,7 @@ extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, "vaddl.u8 q10, d1, d3 \n\t" \ "vaddl.u8 q9, d4, d6 \n\t" \ "vaddl.u8 q11, d5, d7 \n\t" \ + "dmb \n\t" \ "1: \n\t" \ "subs %[h], %[h], #2 \n\t" \ "vld1.64 {d0,d1,d2}, [%[p0]], %[l2] \n\t" \ @@ -98,6 +101,7 @@ extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, "bgt 1b \n\t" #define PUT_PIXELS_8_X2(vhadd) \ + "dmb \n\t" \ "1: \n\t" \ "vld1.64 {d0,d1}, [%[p]], %[line_size] \n\t" \ "vld1.64 {d2,d3}, [%[p]], %[line_size] \n\t" \ @@ -116,6 +120,7 @@ extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, "lsl %[l2], %[line_size], #1 \n\t" \ "vld1.64 {d0}, [%[p0]], %[l2] \n\t" \ "vld1.64 {d1}, [%[p1]], %[l2] \n\t" \ + "dmb \n\t" \ "1: \n\t" \ "subs %[h], %[h], #2 \n\t" \ vhadd".u8 d4, d0, d1 \n\t" \ @@ -137,6 +142,7 @@ extern void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, "vext.8 d6, d2, d3, #1 \n\t" \ "vaddl.u8 q8, d0, d4 \n\t" \ "vaddl.u8 q9, d2, d6 \n\t" \ + "dmb \n\t" \ "1: \n\t" \ "subs %[h], %[h], #2 \n\t" \ "vld1.64 {d0,d1}, [%[p0]], %[l2] \n\t" \ @@ -161,6 +167,7 @@ static void put_pixels16_neon(uint8_t *block, const uint8_t *pixels, int line_size, int h) { asm volatile( + "dmb \n\t" "1: \n\t" "vld1.64 {d0,d1}, [%[pixels]], %[line_size] \n\t" "vld1.64 {d2,d3}, [%[pixels]], %[line_size] \n\t" @@ -224,6 +231,7 @@ static void put_pixels8_neon(uint8_t *block, const uint8_t *pixels, int line_size, int h) { asm volatile( + "dmb \n\t" "1: \n\t" "vld1.64 {d0}, [%[p]], %[line_size] \n\t" "vld1.64 {d1}, [%[p]], %[line_size] \n\t" diff --git b/mythtv/libs/libavcodec/armv4l/h264dsp_neon.S b/mythtv/libs/libavcodec/armv4l/h264dsp_neon.S index a766867..8171ee2 100644 --- b/mythtv/libs/libavcodec/armv4l/h264dsp_neon.S +++ b/mythtv/libs/libavcodec/armv4l/h264dsp_neon.S @@ -40,6 +40,8 @@ ff_put_h264_chroma_mc8_neon: sub r4, r4, r5, lsl #3 add r4, r4, #64 + dmb + beq 2f add r5, r1, r2 diff --git b/mythtv/libs/libavcodec/armv4l/simple_idct_neon.S b/mythtv/libs/libavcodec/armv4l/simple_idct_neon.S index 943e04f..abda6b2 100644 --- b/mythtv/libs/libavcodec/armv4l/simple_idct_neon.S +++ b/mythtv/libs/libavcodec/armv4l/simple_idct_neon.S @@ -307,9 +307,10 @@ idct_col4_st8: const: .short W1, W2, W3, W4, W5, W6, W7, W4c .macro idct_start data + push {v1-v3, lr} pld [\data] pld [\data, #64] - push {v1-v3, lr} + dmb vpush {d8-d15} adr a4, const vld1.64 {d0,d1}, [a4,:128]