00001
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #define OVERRIDE_SPEEX_AUTOCORR
00037 void _spx_autocorr(
00038 const spx_word16_t *x,
00039 spx_word16_t *ac,
00040 int lag,
00041 int n
00042 )
00043 {
00044 spx_word32_t d;
00045 const spx_word16_t *xs;
00046 int i, j;
00047 spx_word32_t ac0=1;
00048 spx_word32_t ac32[11], *ac32top;
00049 int shift, ac_shift;
00050 ac32top = ac32+lag-1;
00051 int lag_1, N_lag;
00052 int nshift;
00053 lag_1 = lag-1;
00054 N_lag = n-lag_1;
00055 for (j=0;j<n;j++)
00056 ac0 = ADD32(ac0,SHR32(MULT16_16(x[j],x[j]),8));
00057 ac0 = ADD32(ac0,n);
00058 shift = 8;
00059 while (shift && ac0<0x40000000)
00060 {
00061 shift--;
00062 ac0 <<= 1;
00063 }
00064 ac_shift = 18;
00065 while (ac_shift && ac0<0x40000000)
00066 {
00067 ac_shift--;
00068 ac0 <<= 1;
00069 }
00070
00071 xs = x+lag-1;
00072 nshift = -shift;
00073 __asm__ __volatile__
00074 (
00075 "P2 = %0;\n\t"
00076 "I0 = P2;\n\t"
00077 "B0 = P2;\n\t"
00078 "R0 = %3;\n\t"
00079 "P3 = %3;\n\t"
00080 "P4 = %4;\n\t"
00081 "R1 = R0 << 1;\n\t"
00082 "L0 = R1;\n\t"
00083 "P0 = %1;\n\t"
00084 "P1 = %2;\n\t"
00085 "B1 = P1;\n\t"
00086 "R4 = %5;\n\t"
00087 "L1 = 0;\n\t"
00088
00089 "r0 = [I0++];\n\t"
00090 "R2 = 0;R3=0;"
00091 "LOOP pitch%= LC0 = P4 >> 1;\n\t"
00092 "LOOP_BEGIN pitch%=;\n\t"
00093 "I1 = P0;\n\t"
00094 "A1 = A0 = 0;\n\t"
00095 "R1 = [I1++];\n\t"
00096 "LOOP inner_prod%= LC1 = P3 >> 1;\n\t"
00097 "LOOP_BEGIN inner_prod%=;\n\t"
00098 "A1 += R0.L*R1.H, A0 += R0.L*R1.L (IS) || R1.L = W[I1++];\n\t"
00099 "A1 += R0.H*R1.L, A0 += R0.H*R1.H (IS) || R1.H = W[I1++] || R0 = [I0++];\n\t"
00100 "LOOP_END inner_prod%=;\n\t"
00101 "A0 = ASHIFT A0 by R4.L;\n\t"
00102 "A1 = ASHIFT A1 by R4.L;\n\t"
00103
00104 "R2 = A0, R3 = A1;\n\t"
00105 "[P1--] = R2;\n\t"
00106 "[P1--] = R3;\n\t"
00107 "P0 += 4;\n\t"
00108 "LOOP_END pitch%=;\n\t"
00109 : : "m" (xs), "m" (x), "m" (ac32top), "m" (N_lag), "m" (lag_1), "m" (nshift)
00110 : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "R4", "I0", "I1", "L0", "L1", "B0", "B1", "memory"
00111 );
00112 d=0;
00113 for (j=0;j<n;j++)
00114 {
00115 d = ADD32(d,SHR32(MULT16_16(x[j],x[j]), shift));
00116 }
00117 ac32[0] = d;
00118
00119 for (i=0;i<lag;i++)
00120 {
00121 d=0;
00122 for (j=i;j<lag_1;j++)
00123 {
00124 d = ADD32(d,SHR32(MULT16_16(x[j],x[j-i]), shift));
00125 }
00126 if (i)
00127 ac32[i] += d;
00128 ac[i] = SHR32(ac32[i], ac_shift);
00129 }
00130 }
00131