00001
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #define OVERRIDE_INNER_PROD
00036 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
00037 {
00038 spx_word32_t sum1=0,sum2=0;
00039 spx_word16_t *deadx, *deady;
00040 int deadlen, dead1, dead2, dead3, dead4, dead5, dead6;
00041 __asm__ __volatile__ (
00042 "\tldrsh %5, [%0], #2 \n"
00043 "\tldrsh %6, [%1], #2 \n"
00044 ".inner_prod_loop%=:\n"
00045 "\tsub %7, %7, %7\n"
00046 "\tsub %10, %10, %10\n"
00047
00048 "\tldrsh %8, [%0], #2 \n"
00049 "\tldrsh %9, [%1], #2 \n"
00050 "\tmla %7, %5, %6, %7\n"
00051 "\tldrsh %5, [%0], #2 \n"
00052 "\tldrsh %6, [%1], #2 \n"
00053 "\tmla %10, %8, %9, %10\n"
00054 "\tldrsh %8, [%0], #2 \n"
00055 "\tldrsh %9, [%1], #2 \n"
00056 "\tmla %7, %5, %6, %7\n"
00057 "\tldrsh %5, [%0], #2 \n"
00058 "\tldrsh %6, [%1], #2 \n"
00059 "\tmla %10, %8, %9, %10\n"
00060
00061 "\tldrsh %8, [%0], #2 \n"
00062 "\tldrsh %9, [%1], #2 \n"
00063 "\tmla %7, %5, %6, %7\n"
00064 "\tldrsh %5, [%0], #2 \n"
00065 "\tldrsh %6, [%1], #2 \n"
00066 "\tmla %10, %8, %9, %10\n"
00067 "\tldrsh %8, [%0], #2 \n"
00068 "\tldrsh %9, [%1], #2 \n"
00069 "\tmla %7, %5, %6, %7\n"
00070 "\tldrsh %5, [%0], #2 \n"
00071 "\tldrsh %6, [%1], #2 \n"
00072 "\tmla %10, %8, %9, %10\n"
00073
00074 "\tsubs %4, %4, #1\n"
00075 "\tadd %2, %2, %7, asr #5\n"
00076 "\tadd %3, %3, %10, asr #5\n"
00077 "\tbne .inner_prod_loop%=\n"
00078 : "=r" (deadx), "=r" (deady), "=r" (sum1), "=r" (sum2), "=r" (deadlen),
00079 "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r" (dead6)
00080 : "0" (x), "1" (y), "2" (sum1), "3" (sum2), "4" (len>>3)
00081 : "cc"
00082 );
00083 return (sum1+sum2)>>1;
00084 }
00085
00086 #define OVERRIDE_PITCH_XCORR
00087 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
00088 {
00089 int i,j;
00090 for (i=0;i<nb_pitch;i+=4)
00091 {
00092
00093
00094 spx_word32_t sum1=0;
00095 spx_word32_t sum2=0;
00096 spx_word32_t sum3=0;
00097 spx_word32_t sum4=0;
00098 const spx_word16_t *y = _y+i;
00099 const spx_word16_t *x = _x;
00100 spx_word32_t y0, y1, y2, y3;
00101 y0=*y++;
00102 y1=*y++;
00103 y2=*y++;
00104 y3=*y++;
00105 for (j=0;j<len;j+=4)
00106 {
00107 spx_word32_t part1, part2, part3, part4, x0;
00108 spx_word32_t dead1;
00109 __asm__ __volatile__ (
00110 #ifdef SHORTCUTS
00111 "\tldrsh %10, [%8], #4 \n"
00112 "\tmul %4, %10, %0 \n"
00113 "\tldrsh %15, [%8], #4 \n"
00114 "\tmul %5, %10, %1 \n"
00115 "\tldrsh %0, [%9], #2 \n"
00116 "\tmul %6, %10, %2 \n"
00117 "\tldrsh %1, [%9], #2 \n"
00118 "\tmul %7, %10, %3 \n"
00119
00120
00121 "\tmla %4, %15, %2, %4 \n"
00122 "\tldrsh %2, [%9], #2 \n"
00123 "\tmla %5, %15, %3, %5 \n"
00124 "\tldrsh %3, [%9], #2 \n"
00125 "\tmla %6, %15, %0, %6 \n"
00126 "\tmla %7, %15, %1, %7 \n"
00127
00128 #else
00129 "\tldrsh %10, [%8], #2 \n"
00130 "\tmul %4, %10, %0 \n"
00131 "\tmul %5, %10, %1 \n"
00132 "\tmul %6, %10, %2 \n"
00133 "\tmul %7, %10, %3 \n"
00134
00135 "\tldrsh %10, [%8], #2 \n"
00136 "\tldrsh %0, [%9], #2 \n"
00137 "\tmla %4, %10, %1, %4 \n"
00138 "\tmla %5, %10, %2, %5 \n"
00139 "\tmla %6, %10, %3, %6 \n"
00140 "\tmla %7, %10, %0, %7 \n"
00141
00142 "\tldrsh %10, [%8], #2 \n"
00143 "\tldrsh %1, [%9], #2 \n"
00144 "\tmla %4, %10, %2, %4 \n"
00145 "\tmla %5, %10, %3, %5 \n"
00146 "\tmla %6, %10, %0, %6 \n"
00147 "\tmla %7, %10, %1, %7 \n"
00148
00149 "\tldrsh %10, [%8], #2 \n"
00150 "\tldrsh %2, [%9], #2 \n"
00151 "\tmla %4, %10, %3, %4 \n"
00152 "\tmla %5, %10, %0, %5 \n"
00153 "\tmla %6, %10, %1, %6 \n"
00154 "\tmla %7, %10, %2, %7 \n"
00155
00156 "\tldrsh %3, [%9], #2 \n"
00157 #endif
00158
00159 "\tldr %10, %11 \n"
00160 "\tldr %15, %12 \n"
00161 "\tadd %4, %10, %4, asr #6 \n"
00162 "\tstr %4, %11 \n"
00163 "\tldr %10, %13 \n"
00164 "\tadd %5, %15, %5, asr #6 \n"
00165 "\tstr %5, %12 \n"
00166 "\tldr %15, %14 \n"
00167 "\tadd %6, %10, %6, asr #6 \n"
00168 "\tadd %7, %15, %7, asr #6 \n"
00169 "\tstr %6, %13 \n"
00170 "\tstr %7, %14 \n"
00171
00172 : "=r" (y0), "=r" (y1), "=r" (y2), "=r" (y3),
00173 "=r" (part1), "=r" (part2), "=r" (part3), "=r" (part4),
00174 "=r" (x), "=r" (y), "=r" (x0),
00175 "=m" (sum1), "=m" (sum2), "=m" (sum3), "=m" (sum4), "=r" (dead1)
00176 : "0" (y0), "1" (y1), "2" (y2), "3" (y3),
00177 "8" (x), "9" (y),
00178 "11" (sum1), "12" (sum2), "13" (sum3), "14" (sum4)
00179 : "cc", "memory"
00180 );
00181 }
00182 corr[nb_pitch-1-i]=sum1;
00183 corr[nb_pitch-2-i]=sum2;
00184 corr[nb_pitch-3-i]=sum3;
00185 corr[nb_pitch-4-i]=sum4;
00186 }
00187
00188 }