Main Page | Class List | Directories | File List | Class Members | File Members

ltp_arm4.h

Go to the documentation of this file.
00001 /* Copyright (C) 2004 Jean-Marc Valin */
00006 /*
00007    Redistribution and use in source and binary forms, with or without
00008    modification, are permitted provided that the following conditions
00009    are met:
00010    
00011    - Redistributions of source code must retain the above copyright
00012    notice, this list of conditions and the following disclaimer.
00013    
00014    - Redistributions in binary form must reproduce the above copyright
00015    notice, this list of conditions and the following disclaimer in the
00016    documentation and/or other materials provided with the distribution.
00017    
00018    - Neither the name of the Xiph.org Foundation nor the names of its
00019    contributors may be used to endorse or promote products derived from
00020    this software without specific prior written permission.
00021    
00022    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00023    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00024    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00025    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
00026    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00027    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00028    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00029    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00030    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00031    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00032    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 */
00034 
00035 #define OVERRIDE_INNER_PROD
00036 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
00037 {
00038    spx_word32_t sum1=0,sum2=0;
00039    spx_word16_t *deadx, *deady;
00040    int deadlen, dead1, dead2, dead3, dead4, dead5, dead6;
00041    __asm__ __volatile__ (
00042          "\tldrsh %5, [%0], #2 \n"
00043          "\tldrsh %6, [%1], #2 \n"
00044          ".inner_prod_loop%=:\n"
00045          "\tsub %7, %7, %7\n"
00046          "\tsub %10, %10, %10\n"
00047 
00048          "\tldrsh %8, [%0], #2 \n"
00049          "\tldrsh %9, [%1], #2 \n"
00050          "\tmla %7, %5, %6, %7\n"
00051          "\tldrsh %5, [%0], #2 \n"
00052          "\tldrsh %6, [%1], #2 \n"
00053          "\tmla %10, %8, %9, %10\n"
00054          "\tldrsh %8, [%0], #2 \n"
00055          "\tldrsh %9, [%1], #2 \n"
00056          "\tmla %7, %5, %6, %7\n"
00057          "\tldrsh %5, [%0], #2 \n"
00058          "\tldrsh %6, [%1], #2 \n"
00059          "\tmla %10, %8, %9, %10\n"
00060 
00061          "\tldrsh %8, [%0], #2 \n"
00062          "\tldrsh %9, [%1], #2 \n"
00063          "\tmla %7, %5, %6, %7\n"
00064          "\tldrsh %5, [%0], #2 \n"
00065          "\tldrsh %6, [%1], #2 \n"
00066          "\tmla %10, %8, %9, %10\n"
00067          "\tldrsh %8, [%0], #2 \n"
00068          "\tldrsh %9, [%1], #2 \n"
00069          "\tmla %7, %5, %6, %7\n"
00070          "\tldrsh %5, [%0], #2 \n"
00071          "\tldrsh %6, [%1], #2 \n"
00072          "\tmla %10, %8, %9, %10\n"
00073 
00074          "\tsubs %4, %4, #1\n"
00075          "\tadd %2, %2, %7, asr #5\n"
00076          "\tadd %3, %3, %10, asr #5\n"
00077          "\tbne .inner_prod_loop%=\n"
00078    : "=r" (deadx), "=r" (deady), "=r" (sum1),  "=r" (sum2), "=r" (deadlen),
00079    "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r" (dead6)
00080    : "0" (x), "1" (y), "2" (sum1), "3" (sum2), "4" (len>>3)
00081    : "cc"
00082                         );
00083    return (sum1+sum2)>>1;
00084 }
00085 
00086 #define OVERRIDE_PITCH_XCORR
00087 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
00088 {
00089    int i,j;
00090    for (i=0;i<nb_pitch;i+=4)
00091    {
00092       /* Compute correlation*/
00093       //corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);
00094       spx_word32_t sum1=0;
00095       spx_word32_t sum2=0;
00096       spx_word32_t sum3=0;
00097       spx_word32_t sum4=0;
00098       const spx_word16_t *y = _y+i;
00099       const spx_word16_t *x = _x;
00100       spx_word32_t y0, y1, y2, y3;
00101       y0=*y++;
00102       y1=*y++;
00103       y2=*y++;
00104       y3=*y++;
00105       for (j=0;j<len;j+=4)
00106       {
00107          spx_word32_t part1, part2, part3, part4, x0;
00108          spx_word32_t dead1;
00109          __asm__ __volatile__ (
00110 #ifdef SHORTCUTS
00111                "\tldrsh %10, [%8], #4 \n"
00112                "\tmul %4, %10, %0 \n"
00113                "\tldrsh %15, [%8], #4 \n"
00114                "\tmul %5, %10, %1 \n"
00115                "\tldrsh %0, [%9], #2 \n"
00116                "\tmul %6, %10, %2 \n"
00117                "\tldrsh %1, [%9], #2 \n"
00118                "\tmul %7, %10, %3 \n"
00119                
00120                
00121                "\tmla %4, %15, %2, %4 \n"
00122                "\tldrsh %2, [%9], #2 \n"
00123                "\tmla %5, %15, %3, %5 \n"
00124                "\tldrsh %3, [%9], #2 \n"
00125                "\tmla %6, %15, %0, %6 \n"
00126                "\tmla %7, %15, %1, %7 \n"
00127 
00128 #else
00129                "\tldrsh %10, [%8], #2 \n"
00130                "\tmul %4, %10, %0 \n"
00131                "\tmul %5, %10, %1 \n"
00132                "\tmul %6, %10, %2 \n"
00133                "\tmul %7, %10, %3 \n"
00134 
00135                "\tldrsh %10, [%8], #2 \n"
00136                "\tldrsh %0, [%9], #2 \n"
00137                "\tmla %4, %10, %1, %4 \n"
00138                "\tmla %5, %10, %2, %5 \n"
00139                "\tmla %6, %10, %3, %6 \n"
00140                "\tmla %7, %10, %0, %7 \n"
00141 
00142                "\tldrsh %10, [%8], #2 \n"
00143                "\tldrsh %1, [%9], #2 \n"
00144                "\tmla %4, %10, %2, %4 \n"
00145                "\tmla %5, %10, %3, %5 \n"
00146                "\tmla %6, %10, %0, %6 \n"
00147                "\tmla %7, %10, %1, %7 \n"
00148 
00149                "\tldrsh %10, [%8], #2 \n"
00150                "\tldrsh %2, [%9], #2 \n"
00151                "\tmla %4, %10, %3, %4 \n"
00152                "\tmla %5, %10, %0, %5 \n"
00153                "\tmla %6, %10, %1, %6 \n"
00154                "\tmla %7, %10, %2, %7 \n"
00155 
00156                "\tldrsh %3, [%9], #2 \n"
00157 #endif
00158 
00159                "\tldr %10, %11 \n"
00160                "\tldr %15, %12 \n"
00161                "\tadd %4, %10, %4, asr #6 \n"
00162                "\tstr %4, %11 \n"
00163                "\tldr %10, %13 \n"
00164                "\tadd %5, %15, %5, asr #6 \n"
00165                "\tstr %5, %12 \n"
00166                "\tldr %15, %14 \n"
00167                "\tadd %6, %10, %6, asr #6 \n"
00168                "\tadd %7, %15, %7, asr #6 \n"
00169                "\tstr %6, %13 \n"
00170                "\tstr %7, %14 \n"
00171 
00172             : "=r" (y0), "=r" (y1), "=r" (y2), "=r" (y3),
00173          "=r" (part1),  "=r" (part2),  "=r" (part3),  "=r" (part4),
00174          "=r" (x), "=r" (y), "=r" (x0),
00175          "=m" (sum1), "=m" (sum2), "=m" (sum3), "=m" (sum4), "=r" (dead1)
00176             : "0" (y0), "1" (y1), "2" (y2), "3" (y3),
00177             "8" (x), "9" (y),
00178             "11" (sum1), "12" (sum2), "13" (sum3), "14" (sum4)
00179             : "cc", "memory"
00180                               );
00181       }
00182       corr[nb_pitch-1-i]=sum1;
00183       corr[nb_pitch-2-i]=sum2;
00184       corr[nb_pitch-3-i]=sum3;
00185       corr[nb_pitch-4-i]=sum4;
00186    }
00187 
00188 }

Generated on Thu Jun 15 13:41:59 2006 for speex by  doxygen 1.4.2