• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/dct-test.c

Go to the documentation of this file.
00001 /*
00002  * (c) 2001 Fabrice Bellard
00003  *     2007 Marc Hoffman <marc.hoffman@analog.com>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <sys/time.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #include "libavutil/common.h"
00036 
00037 #include "simple_idct.h"
00038 #include "aandcttab.h"
00039 #include "faandct.h"
00040 #include "faanidct.h"
00041 #include "x86/idct_xvid.h"
00042 
00043 #undef printf
00044 #undef random
00045 
00046 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
00047 
00048 /* reference fdct/idct */
00049 void ff_ref_fdct(DCTELEM *block);
00050 void ff_ref_idct(DCTELEM *block);
00051 void ff_ref_dct_init(void);
00052 
00053 void ff_mmx_idct(DCTELEM *data);
00054 void ff_mmxext_idct(DCTELEM *data);
00055 
00056 void odivx_idct_c(short *block);
00057 
00058 // BFIN
00059 void ff_bfin_idct(DCTELEM *block);
00060 void ff_bfin_fdct(DCTELEM *block);
00061 
00062 // ALTIVEC
00063 void fdct_altivec(DCTELEM *block);
00064 //void idct_altivec(DCTELEM *block);?? no routine
00065 
00066 // ARM
00067 void j_rev_dct_ARM(DCTELEM *data);
00068 void simple_idct_ARM(DCTELEM *data);
00069 void simple_idct_armv5te(DCTELEM *data);
00070 void ff_simple_idct_armv6(DCTELEM *data);
00071 void ff_simple_idct_neon(DCTELEM *data);
00072 
00073 void ff_simple_idct_axp(DCTELEM *data);
00074 
00075 struct algo {
00076   const char *name;
00077   enum { FDCT, IDCT } is_idct;
00078   void (* func) (DCTELEM *block);
00079   void (* ref)  (DCTELEM *block);
00080   enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
00081   int  mm_support;
00082 };
00083 
00084 #ifndef FAAN_POSTSCALE
00085 #define FAAN_SCALE SCALE_PERM
00086 #else
00087 #define FAAN_SCALE NO_PERM
00088 #endif
00089 
00090 static int cpu_flags;
00091 
00092 struct algo algos[] = {
00093   {"REF-DBL",         0, ff_ref_fdct,        ff_ref_fdct, NO_PERM},
00094   {"FAAN",            0, ff_faandct,         ff_ref_fdct, FAAN_SCALE},
00095   {"FAANI",           1, ff_faanidct,        ff_ref_idct, NO_PERM},
00096   {"IJG-AAN-INT",     0, fdct_ifast,         ff_ref_fdct, SCALE_PERM},
00097   {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
00098   {"REF-DBL",         1, ff_ref_idct,        ff_ref_idct, NO_PERM},
00099   {"INT",             1, j_rev_dct,          ff_ref_idct, MMX_PERM},
00100   {"SIMPLE-C",        1, ff_simple_idct,     ff_ref_idct, NO_PERM},
00101 
00102 #if HAVE_MMX
00103   {"MMX",             0, ff_fdct_mmx,        ff_ref_fdct, NO_PERM, FF_MM_MMX},
00104 #if HAVE_MMX2
00105   {"MMX2",            0, ff_fdct_mmx2,       ff_ref_fdct, NO_PERM, FF_MM_MMXEXT},
00106   {"SSE2",            0, ff_fdct_sse2,       ff_ref_fdct, NO_PERM, FF_MM_SSE2},
00107 #endif
00108 
00109 #if CONFIG_GPL
00110   {"LIBMPEG2-MMX",    1, ff_mmx_idct,        ff_ref_idct, MMX_PERM, FF_MM_MMX},
00111   {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     ff_ref_idct, MMX_PERM, FF_MM_MMXEXT},
00112 #endif
00113   {"SIMPLE-MMX",      1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
00114   {"XVID-MMX",        1, ff_idct_xvid_mmx,   ff_ref_idct, NO_PERM, FF_MM_MMX},
00115   {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  ff_ref_idct, NO_PERM, FF_MM_MMXEXT},
00116   {"XVID-SSE2",       1, ff_idct_xvid_sse2,  ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
00117 #endif
00118 
00119 #if HAVE_ALTIVEC
00120   {"altivecfdct",     0, fdct_altivec,       ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
00121 #endif
00122 
00123 #if ARCH_BFIN
00124   {"BFINfdct",        0, ff_bfin_fdct,       ff_ref_fdct, NO_PERM},
00125   {"BFINidct",        1, ff_bfin_idct,       ff_ref_idct, NO_PERM},
00126 #endif
00127 
00128 #if ARCH_ARM
00129   {"SIMPLE-ARM",      1, simple_idct_ARM,    ff_ref_idct, NO_PERM },
00130   {"INT-ARM",         1, j_rev_dct_ARM,      ff_ref_idct, MMX_PERM },
00131 #if HAVE_ARMV5TE
00132   {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
00133 #endif
00134 #if HAVE_ARMV6
00135   {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
00136 #endif
00137 #if HAVE_NEON
00138   {"SIMPLE-NEON",     1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
00139 #endif
00140 #endif /* ARCH_ARM */
00141 
00142 #if ARCH_ALPHA
00143   {"SIMPLE-ALPHA",    1, ff_simple_idct_axp,  ff_ref_idct, NO_PERM },
00144 #endif
00145 
00146   { 0 }
00147 };
00148 
00149 #define AANSCALE_BITS 12
00150 
00151 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
00152 
00153 int64_t gettime(void)
00154 {
00155     struct timeval tv;
00156     gettimeofday(&tv,NULL);
00157     return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00158 }
00159 
00160 #define NB_ITS 20000
00161 #define NB_ITS_SPEED 50000
00162 
00163 static short idct_mmx_perm[64];
00164 
00165 static short idct_simple_mmx_perm[64]={
00166         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00167         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00168         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00169         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00170         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00171         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00172         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00173         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00174 };
00175 
00176 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00177 
00178 void idct_mmx_init(void)
00179 {
00180     int i;
00181 
00182     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
00183     for (i = 0; i < 64; i++) {
00184         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00185 //        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
00186     }
00187 }
00188 
00189 static DCTELEM block[64] __attribute__ ((aligned (16)));
00190 static DCTELEM block1[64] __attribute__ ((aligned (8)));
00191 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
00192 
00193 static inline void mmx_emms(void)
00194 {
00195 #if HAVE_MMX
00196     if (cpu_flags & FF_MM_MMX)
00197         __asm__ volatile ("emms\n\t");
00198 #endif
00199 }
00200 
00201 void dct_error(const char *name, int is_idct,
00202                void (*fdct_func)(DCTELEM *block),
00203                void (*fdct_ref)(DCTELEM *block), int form, int test)
00204 {
00205     int it, i, scale;
00206     int err_inf, v;
00207     int64_t err2, ti, ti1, it1;
00208     int64_t sysErr[64], sysErrMax=0;
00209     int maxout=0;
00210     int blockSumErrMax=0, blockSumErr;
00211 
00212     srandom(0);
00213 
00214     err_inf = 0;
00215     err2 = 0;
00216     for(i=0; i<64; i++) sysErr[i]=0;
00217     for(it=0;it<NB_ITS;it++) {
00218         for(i=0;i<64;i++)
00219             block1[i] = 0;
00220         switch(test){
00221         case 0:
00222             for(i=0;i<64;i++)
00223                 block1[i] = (random() % 512) -256;
00224             if (is_idct){
00225                 ff_ref_fdct(block1);
00226 
00227                 for(i=0;i<64;i++)
00228                     block1[i]>>=3;
00229             }
00230         break;
00231         case 1:{
00232             int num= (random()%10)+1;
00233             for(i=0;i<num;i++)
00234                 block1[random()%64] = (random() % 512) -256;
00235         }break;
00236         case 2:
00237             block1[0]= (random()%4096)-2048;
00238             block1[63]= (block1[0]&1)^1;
00239         break;
00240         }
00241 
00242 #if 0 // simulate mismatch control
00243 { int sum=0;
00244         for(i=0;i<64;i++)
00245            sum+=block1[i];
00246 
00247         if((sum&1)==0) block1[63]^=1;
00248 }
00249 #endif
00250 
00251         for(i=0; i<64; i++)
00252             block_org[i]= block1[i];
00253 
00254         if (form == MMX_PERM) {
00255             for(i=0;i<64;i++)
00256                 block[idct_mmx_perm[i]] = block1[i];
00257             } else if (form == MMX_SIMPLE_PERM) {
00258             for(i=0;i<64;i++)
00259                 block[idct_simple_mmx_perm[i]] = block1[i];
00260 
00261         } else if (form == SSE2_PERM) {
00262             for(i=0; i<64; i++)
00263                 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
00264         } else if (form == PARTTRANS_PERM) {
00265             for(i=0; i<64; i++)
00266                 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
00267         } else {
00268             for(i=0; i<64; i++)
00269                 block[i]= block1[i];
00270         }
00271 #if 0 // simulate mismatch control for tested IDCT but not the ref
00272 { int sum=0;
00273         for(i=0;i<64;i++)
00274            sum+=block[i];
00275 
00276         if((sum&1)==0) block[63]^=1;
00277 }
00278 #endif
00279 
00280         fdct_func(block);
00281         mmx_emms();
00282 
00283         if (form == SCALE_PERM) {
00284             for(i=0; i<64; i++) {
00285                 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
00286                 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
00287             }
00288         }
00289 
00290         fdct_ref(block1);
00291 
00292         blockSumErr=0;
00293         for(i=0;i<64;i++) {
00294             v = abs(block[i] - block1[i]);
00295             if (v > err_inf)
00296                 err_inf = v;
00297             err2 += v * v;
00298             sysErr[i] += block[i] - block1[i];
00299             blockSumErr += v;
00300             if( abs(block[i])>maxout) maxout=abs(block[i]);
00301         }
00302         if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
00303 #if 0 // print different matrix pairs
00304         if(blockSumErr){
00305             printf("\n");
00306             for(i=0; i<64; i++){
00307                 if((i&7)==0) printf("\n");
00308                 printf("%4d ", block_org[i]);
00309             }
00310             for(i=0; i<64; i++){
00311                 if((i&7)==0) printf("\n");
00312                 printf("%4d ", block[i] - block1[i]);
00313             }
00314         }
00315 #endif
00316     }
00317     for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
00318 
00319 #if 1 // dump systematic errors
00320     for(i=0; i<64; i++){
00321         if(i%8==0) printf("\n");
00322         printf("%5d ", (int)sysErr[i]);
00323     }
00324     printf("\n");
00325 #endif
00326 
00327     printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
00328            is_idct ? "IDCT" : "DCT",
00329            name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
00330 #if 1 //Speed test
00331     /* speed test */
00332     for(i=0;i<64;i++)
00333         block1[i] = 0;
00334     switch(test){
00335     case 0:
00336         for(i=0;i<64;i++)
00337             block1[i] = (random() % 512) -256;
00338         if (is_idct){
00339             ff_ref_fdct(block1);
00340 
00341             for(i=0;i<64;i++)
00342                 block1[i]>>=3;
00343         }
00344     break;
00345     case 1:{
00346     case 2:
00347         block1[0] = (random() % 512) -256;
00348         block1[1] = (random() % 512) -256;
00349         block1[2] = (random() % 512) -256;
00350         block1[3] = (random() % 512) -256;
00351     }break;
00352     }
00353 
00354     if (form == MMX_PERM) {
00355         for(i=0;i<64;i++)
00356             block[idct_mmx_perm[i]] = block1[i];
00357     } else if(form == MMX_SIMPLE_PERM) {
00358         for(i=0;i<64;i++)
00359             block[idct_simple_mmx_perm[i]] = block1[i];
00360     } else {
00361         for(i=0; i<64; i++)
00362             block[i]= block1[i];
00363     }
00364 
00365     ti = gettime();
00366     it1 = 0;
00367     do {
00368         for(it=0;it<NB_ITS_SPEED;it++) {
00369             for(i=0; i<64; i++)
00370                 block[i]= block1[i];
00371 //            memcpy(block, block1, sizeof(DCTELEM) * 64);
00372 // do not memcpy especially not fastmemcpy because it does movntq !!!
00373             fdct_func(block);
00374         }
00375         it1 += NB_ITS_SPEED;
00376         ti1 = gettime() - ti;
00377     } while (ti1 < 1000000);
00378     mmx_emms();
00379 
00380     printf("%s %s: %0.1f kdct/s\n",
00381            is_idct ? "IDCT" : "DCT",
00382            name, (double)it1 * 1000.0 / (double)ti1);
00383 #endif
00384 }
00385 
00386 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
00387 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
00388 
00389 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
00390 {
00391     static int init;
00392     static double c8[8][8];
00393     static double c4[4][4];
00394     double block1[64], block2[64], block3[64];
00395     double s, sum, v;
00396     int i, j, k;
00397 
00398     if (!init) {
00399         init = 1;
00400 
00401         for(i=0;i<8;i++) {
00402             sum = 0;
00403             for(j=0;j<8;j++) {
00404                 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
00405                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
00406                 sum += c8[i][j] * c8[i][j];
00407             }
00408         }
00409 
00410         for(i=0;i<4;i++) {
00411             sum = 0;
00412             for(j=0;j<4;j++) {
00413                 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
00414                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
00415                 sum += c4[i][j] * c4[i][j];
00416             }
00417         }
00418     }
00419 
00420     /* butterfly */
00421     s = 0.5 * sqrt(2.0);
00422     for(i=0;i<4;i++) {
00423         for(j=0;j<8;j++) {
00424             block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
00425             block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
00426         }
00427     }
00428 
00429     /* idct8 on lines */
00430     for(i=0;i<8;i++) {
00431         for(j=0;j<8;j++) {
00432             sum = 0;
00433             for(k=0;k<8;k++)
00434                 sum += c8[k][j] * block1[8*i+k];
00435             block2[8*i+j] = sum;
00436         }
00437     }
00438 
00439     /* idct4 */
00440     for(i=0;i<8;i++) {
00441         for(j=0;j<4;j++) {
00442             /* top */
00443             sum = 0;
00444             for(k=0;k<4;k++)
00445                 sum += c4[k][j] * block2[8*(2*k)+i];
00446             block3[8*(2*j)+i] = sum;
00447 
00448             /* bottom */
00449             sum = 0;
00450             for(k=0;k<4;k++)
00451                 sum += c4[k][j] * block2[8*(2*k+1)+i];
00452             block3[8*(2*j+1)+i] = sum;
00453         }
00454     }
00455 
00456     /* clamp and store the result */
00457     for(i=0;i<8;i++) {
00458         for(j=0;j<8;j++) {
00459             v = block3[8*i+j];
00460             if (v < 0)
00461                 v = 0;
00462             else if (v > 255)
00463                 v = 255;
00464             dest[i * linesize + j] = (int)rint(v);
00465         }
00466     }
00467 }
00468 
00469 void idct248_error(const char *name,
00470                     void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
00471 {
00472     int it, i, it1, ti, ti1, err_max, v;
00473 
00474     srandom(0);
00475 
00476     /* just one test to see if code is correct (precision is less
00477        important here) */
00478     err_max = 0;
00479     for(it=0;it<NB_ITS;it++) {
00480 
00481         /* XXX: use forward transform to generate values */
00482         for(i=0;i<64;i++)
00483             block1[i] = (random() % 256) - 128;
00484         block1[0] += 1024;
00485 
00486         for(i=0; i<64; i++)
00487             block[i]= block1[i];
00488         idct248_ref(img_dest1, 8, block);
00489 
00490         for(i=0; i<64; i++)
00491             block[i]= block1[i];
00492         idct248_put(img_dest, 8, block);
00493 
00494         for(i=0;i<64;i++) {
00495             v = abs((int)img_dest[i] - (int)img_dest1[i]);
00496             if (v == 255)
00497                 printf("%d %d\n", img_dest[i], img_dest1[i]);
00498             if (v > err_max)
00499                 err_max = v;
00500         }
00501 #if 0
00502         printf("ref=\n");
00503         for(i=0;i<8;i++) {
00504             int j;
00505             for(j=0;j<8;j++) {
00506                 printf(" %3d", img_dest1[i*8+j]);
00507             }
00508             printf("\n");
00509         }
00510 
00511         printf("out=\n");
00512         for(i=0;i<8;i++) {
00513             int j;
00514             for(j=0;j<8;j++) {
00515                 printf(" %3d", img_dest[i*8+j]);
00516             }
00517             printf("\n");
00518         }
00519 #endif
00520     }
00521     printf("%s %s: err_inf=%d\n",
00522            1 ? "IDCT248" : "DCT248",
00523            name, err_max);
00524 
00525     ti = gettime();
00526     it1 = 0;
00527     do {
00528         for(it=0;it<NB_ITS_SPEED;it++) {
00529             for(i=0; i<64; i++)
00530                 block[i]= block1[i];
00531 //            memcpy(block, block1, sizeof(DCTELEM) * 64);
00532 // do not memcpy especially not fastmemcpy because it does movntq !!!
00533             idct248_put(img_dest, 8, block);
00534         }
00535         it1 += NB_ITS_SPEED;
00536         ti1 = gettime() - ti;
00537     } while (ti1 < 1000000);
00538     mmx_emms();
00539 
00540     printf("%s %s: %0.1f kdct/s\n",
00541            1 ? "IDCT248" : "DCT248",
00542            name, (double)it1 * 1000.0 / (double)ti1);
00543 }
00544 
00545 void help(void)
00546 {
00547     printf("dct-test [-i] [<test-number>]\n"
00548            "test-number 0 -> test with random matrixes\n"
00549            "            1 -> test with random sparse matrixes\n"
00550            "            2 -> do 3. test from mpeg4 std\n"
00551            "-i          test IDCT implementations\n"
00552            "-4          test IDCT248 implementations\n");
00553 }
00554 
00555 int main(int argc, char **argv)
00556 {
00557     int test_idct = 0, test_248_dct = 0;
00558     int c,i;
00559     int test=1;
00560     cpu_flags = mm_support();
00561 
00562     ff_ref_dct_init();
00563     idct_mmx_init();
00564 
00565     for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
00566     for(i=0;i<MAX_NEG_CROP;i++) {
00567         cropTbl[i] = 0;
00568         cropTbl[i + MAX_NEG_CROP + 256] = 255;
00569     }
00570 
00571     for(;;) {
00572         c = getopt(argc, argv, "ih4");
00573         if (c == -1)
00574             break;
00575         switch(c) {
00576         case 'i':
00577             test_idct = 1;
00578             break;
00579         case '4':
00580             test_248_dct = 1;
00581             break;
00582         default :
00583         case 'h':
00584             help();
00585             return 0;
00586         }
00587     }
00588 
00589     if(optind <argc) test= atoi(argv[optind]);
00590 
00591     printf("ffmpeg DCT/IDCT test\n");
00592 
00593     if (test_248_dct) {
00594         idct248_error("SIMPLE-C", ff_simple_idct248_put);
00595     } else {
00596       for (i=0;algos[i].name;i++)
00597         if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
00598           dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
00599         }
00600     }
00601     return 0;
00602 }

Generated on Sat Feb 16 2013 09:23:11 for ffmpeg by  doxygen 1.7.1