Browse code

avoid an unpredictable branch 20% faster predictor_update_filter, .4-4% faster ape decoding on core2

Originally committed as revision 20720 to svn://svn.ffmpeg.org/ffmpeg/trunk

Loren Merritt authored on 2009/12/04 02:48:54
Showing 1 changed files
... ...
@@ -519,7 +519,7 @@ static inline int APESIGN(int32_t x) {
519 519
 
520 520
 static av_always_inline int predictor_update_filter(APEPredictor *p, const int decoded, const int filter, const int delayA, const int delayB, const int adaptA, const int adaptB)
521 521
 {
522
-    int32_t predictionA, predictionB;
522
+    int32_t predictionA, predictionB, sign;
523 523
 
524 524
     p->buf[delayA]     = p->lastA[filter];
525 525
     p->buf[adaptA]     = APESIGN(p->buf[delayA]);
... ...
@@ -547,32 +547,17 @@ static av_always_inline int predictor_update_filter(APEPredictor *p, const int d
547 547
     p->lastA[filter] = decoded + ((predictionA + (predictionB >> 1)) >> 10);
548 548
     p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5);
549 549
 
550
-    if (!decoded) // no need updating filter coefficients
551
-        return p->filterA[filter];
550
+    sign = APESIGN(decoded);
551
+    p->coeffsA[filter][0] += p->buf[adaptA    ] * sign;
552
+    p->coeffsA[filter][1] += p->buf[adaptA - 1] * sign;
553
+    p->coeffsA[filter][2] += p->buf[adaptA - 2] * sign;
554
+    p->coeffsA[filter][3] += p->buf[adaptA - 3] * sign;
555
+    p->coeffsB[filter][0] += p->buf[adaptB    ] * sign;
556
+    p->coeffsB[filter][1] += p->buf[adaptB - 1] * sign;
557
+    p->coeffsB[filter][2] += p->buf[adaptB - 2] * sign;
558
+    p->coeffsB[filter][3] += p->buf[adaptB - 3] * sign;
559
+    p->coeffsB[filter][4] += p->buf[adaptB - 4] * sign;
552 560
 
553
-    if (decoded > 0) {
554
-        p->coeffsA[filter][0] -= p->buf[adaptA    ];
555
-        p->coeffsA[filter][1] -= p->buf[adaptA - 1];
556
-        p->coeffsA[filter][2] -= p->buf[adaptA - 2];
557
-        p->coeffsA[filter][3] -= p->buf[adaptA - 3];
558
-
559
-        p->coeffsB[filter][0] -= p->buf[adaptB    ];
560
-        p->coeffsB[filter][1] -= p->buf[adaptB - 1];
561
-        p->coeffsB[filter][2] -= p->buf[adaptB - 2];
562
-        p->coeffsB[filter][3] -= p->buf[adaptB - 3];
563
-        p->coeffsB[filter][4] -= p->buf[adaptB - 4];
564
-    } else {
565
-        p->coeffsA[filter][0] += p->buf[adaptA    ];
566
-        p->coeffsA[filter][1] += p->buf[adaptA - 1];
567
-        p->coeffsA[filter][2] += p->buf[adaptA - 2];
568
-        p->coeffsA[filter][3] += p->buf[adaptA - 3];
569
-
570
-        p->coeffsB[filter][0] += p->buf[adaptB    ];
571
-        p->coeffsB[filter][1] += p->buf[adaptB - 1];
572
-        p->coeffsB[filter][2] += p->buf[adaptB - 2];
573
-        p->coeffsB[filter][3] += p->buf[adaptB - 3];
574
-        p->coeffsB[filter][4] += p->buf[adaptB - 4];
575
-    }
576 561
     return p->filterA[filter];
577 562
 }
578 563
 
... ...
@@ -604,7 +589,7 @@ static void predictor_decode_mono(APEContext * ctx, int count)
604 604
 {
605 605
     APEPredictor *p = &ctx->predictor;
606 606
     int32_t *decoded0 = ctx->decoded0;
607
-    int32_t predictionA, currentA, A;
607
+    int32_t predictionA, currentA, A, sign;
608 608
 
609 609
     currentA = p->lastA[0];
610 610
 
... ...
@@ -624,17 +609,11 @@ static void predictor_decode_mono(APEContext * ctx, int count)
624 624
         p->buf[YADAPTCOEFFSA]     = APESIGN(p->buf[YDELAYA    ]);
625 625
         p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]);
626 626
 
627
-        if (A > 0) {
628
-            p->coeffsA[0][0] -= p->buf[YADAPTCOEFFSA    ];
629
-            p->coeffsA[0][1] -= p->buf[YADAPTCOEFFSA - 1];
630
-            p->coeffsA[0][2] -= p->buf[YADAPTCOEFFSA - 2];
631
-            p->coeffsA[0][3] -= p->buf[YADAPTCOEFFSA - 3];
632
-        } else if (A < 0) {
633
-            p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA    ];
634
-            p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1];
635
-            p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2];
636
-            p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3];
637
-        }
627
+        sign = APESIGN(A);
628
+        p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA    ] * sign;
629
+        p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1] * sign;
630
+        p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2] * sign;
631
+        p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3] * sign;
638 632
 
639 633
         p->buf++;
640 634