• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/h264.c

Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "internal.h"
00029 #include "dsputil.h"
00030 #include "avcodec.h"
00031 #include "mpegvideo.h"
00032 #include "h264.h"
00033 #include "h264data.h"
00034 #include "h264_parser.h"
00035 #include "golomb.h"
00036 #include "mathops.h"
00037 #include "rectangle.h"
00038 #include "vdpau_internal.h"
00039 
00040 #include "cabac.h"
00041 #if ARCH_X86
00042 #include "x86/h264_i386.h"
00043 #endif
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00052 #define DELAYED_PIC_REF 4
00053 
00054 static VLC coeff_token_vlc[4];
00055 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
00056 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
00057 
00058 static VLC chroma_dc_coeff_token_vlc;
00059 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
00060 static const int chroma_dc_coeff_token_vlc_table_size = 256;
00061 
00062 static VLC total_zeros_vlc[15];
00063 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
00064 static const int total_zeros_vlc_tables_size = 512;
00065 
00066 static VLC chroma_dc_total_zeros_vlc[3];
00067 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
00068 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
00069 
00070 static VLC run_vlc[6];
00071 static VLC_TYPE run_vlc_tables[6][8][2];
00072 static const int run_vlc_tables_size = 8;
00073 
00074 static VLC run7_vlc;
00075 static VLC_TYPE run7_vlc_table[96][2];
00076 static const int run7_vlc_table_size = 96;
00077 
00078 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
00079 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
00080 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00081 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00082 static Picture * remove_long(H264Context *h, int i, int ref_mask);
00083 
00084 static av_always_inline uint32_t pack16to32(int a, int b){
00085 #ifdef WORDS_BIGENDIAN
00086    return (b&0xFFFF) + (a<<16);
00087 #else
00088    return (a&0xFFFF) + (b<<16);
00089 #endif
00090 }
00091 
00092 static const uint8_t rem6[52]={
00093 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00094 };
00095 
00096 static const uint8_t div6[52]={
00097 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
00098 };
00099 
00100 static const uint8_t left_block_options[4][8]={
00101     {0,1,2,3,7,10,8,11},
00102     {2,2,3,3,8,11,8,11},
00103     {0,0,1,1,7,10,7,10},
00104     {0,2,0,2,7,10,7,10}
00105 };
00106 
00107 #define LEVEL_TAB_BITS 8
00108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
00109 
00110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
00111     MpegEncContext * const s = &h->s;
00112     const int mb_xy= h->mb_xy;
00113     int topleft_xy, top_xy, topright_xy, left_xy[2];
00114     int topleft_type, top_type, topright_type, left_type[2];
00115     const uint8_t * left_block;
00116     int topleft_partition= -1;
00117     int i;
00118 
00119     top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
00120 
00121     //FIXME deblocking could skip the intra and nnz parts.
00122     if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
00123         return;
00124 
00125     /* Wow, what a mess, why didn't they simplify the interlacing & intra
00126      * stuff, I can't imagine that these complex rules are worth it. */
00127 
00128     topleft_xy = top_xy - 1;
00129     topright_xy= top_xy + 1;
00130     left_xy[1] = left_xy[0] = mb_xy-1;
00131     left_block = left_block_options[0];
00132     if(FRAME_MBAFF){
00133         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
00134         const int top_pair_xy      = pair_xy     - s->mb_stride;
00135         const int topleft_pair_xy  = top_pair_xy - 1;
00136         const int topright_pair_xy = top_pair_xy + 1;
00137         const int topleft_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
00138         const int top_mb_field_flag      = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
00139         const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
00140         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
00141         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
00142         const int bottom = (s->mb_y & 1);
00143         tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
00144 
00145         if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
00146             top_xy -= s->mb_stride;
00147         }
00148         if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
00149             topleft_xy -= s->mb_stride;
00150         } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
00151             topleft_xy += s->mb_stride;
00152             // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
00153             topleft_partition = 0;
00154         }
00155         if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
00156             topright_xy -= s->mb_stride;
00157         }
00158         if (left_mb_field_flag != curr_mb_field_flag) {
00159             left_xy[1] = left_xy[0] = pair_xy - 1;
00160             if (curr_mb_field_flag) {
00161                 left_xy[1] += s->mb_stride;
00162                 left_block = left_block_options[3];
00163             } else {
00164                 left_block= left_block_options[2 - bottom];
00165             }
00166         }
00167     }
00168 
00169     h->top_mb_xy = top_xy;
00170     h->left_mb_xy[0] = left_xy[0];
00171     h->left_mb_xy[1] = left_xy[1];
00172     if(for_deblock){
00173         topleft_type = 0;
00174         topright_type = 0;
00175         top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
00176         left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
00177         left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
00178 
00179         if(MB_MBAFF && !IS_INTRA(mb_type)){
00180             int list;
00181             for(list=0; list<h->list_count; list++){
00182                 //These values where changed for ease of performing MC, we need to change them back
00183                 //FIXME maybe we can make MC and loop filter use the same values or prevent
00184                 //the MC code from changing ref_cache and rather use a temporary array.
00185                 if(USES_LIST(mb_type,list)){
00186                     int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
00187                     *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
00188                     *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
00189                     ref += h->b8_stride;
00190                     *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
00191                     *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
00192                 }
00193             }
00194         }
00195     }else{
00196         topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
00197         top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
00198         topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
00199         left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
00200         left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
00201 
00202     if(IS_INTRA(mb_type)){
00203         int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
00204         h->topleft_samples_available=
00205         h->top_samples_available=
00206         h->left_samples_available= 0xFFFF;
00207         h->topright_samples_available= 0xEEEA;
00208 
00209         if(!(top_type & type_mask)){
00210             h->topleft_samples_available= 0xB3FF;
00211             h->top_samples_available= 0x33FF;
00212             h->topright_samples_available= 0x26EA;
00213         }
00214         if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
00215             if(IS_INTERLACED(mb_type)){
00216                 if(!(left_type[0] & type_mask)){
00217                     h->topleft_samples_available&= 0xDFFF;
00218                     h->left_samples_available&= 0x5FFF;
00219                 }
00220                 if(!(left_type[1] & type_mask)){
00221                     h->topleft_samples_available&= 0xFF5F;
00222                     h->left_samples_available&= 0xFF5F;
00223                 }
00224             }else{
00225                 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
00226                                 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
00227                 assert(left_xy[0] == left_xy[1]);
00228                 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
00229                     h->topleft_samples_available&= 0xDF5F;
00230                     h->left_samples_available&= 0x5F5F;
00231                 }
00232             }
00233         }else{
00234             if(!(left_type[0] & type_mask)){
00235                 h->topleft_samples_available&= 0xDF5F;
00236                 h->left_samples_available&= 0x5F5F;
00237             }
00238         }
00239 
00240         if(!(topleft_type & type_mask))
00241             h->topleft_samples_available&= 0x7FFF;
00242 
00243         if(!(topright_type & type_mask))
00244             h->topright_samples_available&= 0xFBFF;
00245 
00246         if(IS_INTRA4x4(mb_type)){
00247             if(IS_INTRA4x4(top_type)){
00248                 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
00249                 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
00250                 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
00251                 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
00252             }else{
00253                 int pred;
00254                 if(!(top_type & type_mask))
00255                     pred= -1;
00256                 else{
00257                     pred= 2;
00258                 }
00259                 h->intra4x4_pred_mode_cache[4+8*0]=
00260                 h->intra4x4_pred_mode_cache[5+8*0]=
00261                 h->intra4x4_pred_mode_cache[6+8*0]=
00262                 h->intra4x4_pred_mode_cache[7+8*0]= pred;
00263             }
00264             for(i=0; i<2; i++){
00265                 if(IS_INTRA4x4(left_type[i])){
00266                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
00267                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
00268                 }else{
00269                     int pred;
00270                     if(!(left_type[i] & type_mask))
00271                         pred= -1;
00272                     else{
00273                         pred= 2;
00274                     }
00275                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
00276                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
00277                 }
00278             }
00279         }
00280     }
00281     }
00282 
00283 
00284 /*
00285 0 . T T. T T T T
00286 1 L . .L . . . .
00287 2 L . .L . . . .
00288 3 . T TL . . . .
00289 4 L . .L . . . .
00290 5 L . .. . . . .
00291 */
00292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
00293     if(top_type){
00294         h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
00295         h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
00296         h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
00297         h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
00298 
00299         h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
00300         h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
00301 
00302         h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
00303         h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
00304 
00305     }else{
00306         h->non_zero_count_cache[4+8*0]=
00307         h->non_zero_count_cache[5+8*0]=
00308         h->non_zero_count_cache[6+8*0]=
00309         h->non_zero_count_cache[7+8*0]=
00310 
00311         h->non_zero_count_cache[1+8*0]=
00312         h->non_zero_count_cache[2+8*0]=
00313 
00314         h->non_zero_count_cache[1+8*3]=
00315         h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
00316 
00317     }
00318 
00319     for (i=0; i<2; i++) {
00320         if(left_type[i]){
00321             h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
00322             h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
00323             h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
00324             h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
00325         }else{
00326             h->non_zero_count_cache[3+8*1 + 2*8*i]=
00327             h->non_zero_count_cache[3+8*2 + 2*8*i]=
00328             h->non_zero_count_cache[0+8*1 +   8*i]=
00329             h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
00330         }
00331     }
00332 
00333     if( h->pps.cabac ) {
00334         // top_cbp
00335         if(top_type) {
00336             h->top_cbp = h->cbp_table[top_xy];
00337         } else if(IS_INTRA(mb_type)) {
00338             h->top_cbp = 0x1C0;
00339         } else {
00340             h->top_cbp = 0;
00341         }
00342         // left_cbp
00343         if (left_type[0]) {
00344             h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
00345         } else if(IS_INTRA(mb_type)) {
00346             h->left_cbp = 0x1C0;
00347         } else {
00348             h->left_cbp = 0;
00349         }
00350         if (left_type[0]) {
00351             h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
00352         }
00353         if (left_type[1]) {
00354             h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
00355         }
00356     }
00357 
00358 #if 1
00359     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
00360         int list;
00361         for(list=0; list<h->list_count; list++){
00362             if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
00363                 /*if(!h->mv_cache_clean[list]){
00364                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
00365                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
00366                     h->mv_cache_clean[list]= 1;
00367                 }*/
00368                 continue;
00369             }
00370             h->mv_cache_clean[list]= 0;
00371 
00372             if(USES_LIST(top_type, list)){
00373                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
00374                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
00375                 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
00376                 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
00377                 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
00378                 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
00379                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
00380                 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
00381                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
00382                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
00383             }else{
00384                 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
00385                 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
00386                 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
00387                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
00388                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
00389             }
00390 
00391             for(i=0; i<2; i++){
00392                 int cache_idx = scan8[0] - 1 + i*2*8;
00393                 if(USES_LIST(left_type[i], list)){
00394                     const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
00395                     const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
00396                     *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
00397                     *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
00398                     h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
00399                     h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
00400                 }else{
00401                     *(uint32_t*)h->mv_cache [list][cache_idx  ]=
00402                     *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
00403                     h->ref_cache[list][cache_idx  ]=
00404                     h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00405                 }
00406             }
00407 
00408             if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
00409                 continue;
00410 
00411             if(USES_LIST(topleft_type, list)){
00412                 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
00413                 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
00414                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
00415                 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
00416             }else{
00417                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
00418                 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00419             }
00420 
00421             if(USES_LIST(topright_type, list)){
00422                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
00423                 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
00424                 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
00425                 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
00426             }else{
00427                 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
00428                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00429             }
00430 
00431             if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
00432                 continue;
00433 
00434             h->ref_cache[list][scan8[5 ]+1] =
00435             h->ref_cache[list][scan8[7 ]+1] =
00436             h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
00437             h->ref_cache[list][scan8[4 ]] =
00438             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
00439             *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
00440             *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
00441             *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
00442             *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
00443             *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
00444 
00445             if( h->pps.cabac ) {
00446                 /* XXX beurk, Load mvd */
00447                 if(USES_LIST(top_type, list)){
00448                     const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
00449                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
00450                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
00451                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
00452                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
00453                 }else{
00454                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
00455                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
00456                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
00457                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
00458                 }
00459                 if(USES_LIST(left_type[0], list)){
00460                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
00461                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
00462                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
00463                 }else{
00464                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
00465                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
00466                 }
00467                 if(USES_LIST(left_type[1], list)){
00468                     const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
00469                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
00470                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
00471                 }else{
00472                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
00473                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
00474                 }
00475                 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
00476                 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
00477                 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
00478                 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
00479                 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
00480 
00481                 if(h->slice_type_nos == FF_B_TYPE){
00482                     fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
00483 
00484                     if(IS_DIRECT(top_type)){
00485                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
00486                     }else if(IS_8X8(top_type)){
00487                         int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
00488                         h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
00489                         h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
00490                     }else{
00491                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
00492                     }
00493 
00494                     if(IS_DIRECT(left_type[0]))
00495                         h->direct_cache[scan8[0] - 1 + 0*8]= 1;
00496                     else if(IS_8X8(left_type[0]))
00497                         h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
00498                     else
00499                         h->direct_cache[scan8[0] - 1 + 0*8]= 0;
00500 
00501                     if(IS_DIRECT(left_type[1]))
00502                         h->direct_cache[scan8[0] - 1 + 2*8]= 1;
00503                     else if(IS_8X8(left_type[1]))
00504                         h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
00505                     else
00506                         h->direct_cache[scan8[0] - 1 + 2*8]= 0;
00507                 }
00508             }
00509 
00510             if(FRAME_MBAFF){
00511 #define MAP_MVS\
00512                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
00513                     MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
00514                     MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
00515                     MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
00516                     MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
00517                     MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
00518                     MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
00519                     MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
00520                     MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
00521                     MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
00522                 if(MB_FIELD){
00523 #define MAP_F2F(idx, mb_type)\
00524                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
00525                         h->ref_cache[list][idx] <<= 1;\
00526                         h->mv_cache[list][idx][1] /= 2;\
00527                         h->mvd_cache[list][idx][1] /= 2;\
00528                     }
00529                     MAP_MVS
00530 #undef MAP_F2F
00531                 }else{
00532 #define MAP_F2F(idx, mb_type)\
00533                     if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
00534                         h->ref_cache[list][idx] >>= 1;\
00535                         h->mv_cache[list][idx][1] <<= 1;\
00536                         h->mvd_cache[list][idx][1] <<= 1;\
00537                     }
00538                     MAP_MVS
00539 #undef MAP_F2F
00540                 }
00541             }
00542         }
00543     }
00544 #endif
00545 
00546     h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
00547 }
00548 
00549 static inline void write_back_intra_pred_mode(H264Context *h){
00550     const int mb_xy= h->mb_xy;
00551 
00552     h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
00553     h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
00554     h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
00555     h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
00556     h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
00557     h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
00558     h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
00559 }
00560 
00564 static inline int check_intra4x4_pred_mode(H264Context *h){
00565     MpegEncContext * const s = &h->s;
00566     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00567     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00568     int i;
00569 
00570     if(!(h->top_samples_available&0x8000)){
00571         for(i=0; i<4; i++){
00572             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00573             if(status<0){
00574                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00575                 return -1;
00576             } else if(status){
00577                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00578             }
00579         }
00580     }
00581 
00582     if((h->left_samples_available&0x8888)!=0x8888){
00583         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00584         for(i=0; i<4; i++){
00585             if(!(h->left_samples_available&mask[i])){
00586                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00587                 if(status<0){
00588                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00589                     return -1;
00590                 } else if(status){
00591                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00592                 }
00593             }
00594         }
00595     }
00596 
00597     return 0;
00598 } //FIXME cleanup like next
00599 
00603 static inline int check_intra_pred_mode(H264Context *h, int mode){
00604     MpegEncContext * const s = &h->s;
00605     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00606     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00607 
00608     if(mode > 6U) {
00609         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00610         return -1;
00611     }
00612 
00613     if(!(h->top_samples_available&0x8000)){
00614         mode= top[ mode ];
00615         if(mode<0){
00616             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00617             return -1;
00618         }
00619     }
00620 
00621     if((h->left_samples_available&0x8080) != 0x8080){
00622         mode= left[ mode ];
00623         if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00624             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00625         }
00626         if(mode<0){
00627             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00628             return -1;
00629         }
00630     }
00631 
00632     return mode;
00633 }
00634 
00638 static inline int pred_intra_mode(H264Context *h, int n){
00639     const int index8= scan8[n];
00640     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
00641     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
00642     const int min= FFMIN(left, top);
00643 
00644     tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
00645 
00646     if(min<0) return DC_PRED;
00647     else      return min;
00648 }
00649 
00650 static inline void write_back_non_zero_count(H264Context *h){
00651     const int mb_xy= h->mb_xy;
00652 
00653     h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
00654     h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
00655     h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
00656     h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
00657     h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
00658     h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
00659     h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
00660 
00661     h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
00662     h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
00663     h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
00664 
00665     h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
00666     h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
00667     h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
00668 }
00669 
00674 static inline int pred_non_zero_count(H264Context *h, int n){
00675     const int index8= scan8[n];
00676     const int left= h->non_zero_count_cache[index8 - 1];
00677     const int top = h->non_zero_count_cache[index8 - 8];
00678     int i= left + top;
00679 
00680     if(i<64) i= (i+1)>>1;
00681 
00682     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
00683 
00684     return i&31;
00685 }
00686 
00687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
00688     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
00689     MpegEncContext *s = &h->s;
00690 
00691     /* there is no consistent mapping of mvs to neighboring locations that will
00692      * make mbaff happy, so we can't move all this logic to fill_caches */
00693     if(FRAME_MBAFF){
00694         const uint32_t *mb_types = s->current_picture_ptr->mb_type;
00695         const int16_t *mv;
00696         *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
00697         *C = h->mv_cache[list][scan8[0]-2];
00698 
00699         if(!MB_FIELD
00700            && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
00701             int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
00702             if(IS_INTERLACED(mb_types[topright_xy])){
00703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
00704                 const int x4 = X4, y4 = Y4;\
00705                 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
00706                 if(!USES_LIST(mb_type,list))\
00707                     return LIST_NOT_USED;\
00708                 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
00709                 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
00710                 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
00711                 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
00712 
00713                 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
00714             }
00715         }
00716         if(topright_ref == PART_NOT_AVAILABLE
00717            && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
00718            && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
00719             if(!MB_FIELD
00720                && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
00721                 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
00722             }
00723             if(MB_FIELD
00724                && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
00725                && i >= scan8[0]+8){
00726                 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
00727                 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
00728             }
00729         }
00730 #undef SET_DIAG_MV
00731     }
00732 
00733     if(topright_ref != PART_NOT_AVAILABLE){
00734         *C= h->mv_cache[list][ i - 8 + part_width ];
00735         return topright_ref;
00736     }else{
00737         tprintf(s->avctx, "topright MV not available\n");
00738 
00739         *C= h->mv_cache[list][ i - 8 - 1 ];
00740         return h->ref_cache[list][ i - 8 - 1 ];
00741     }
00742 }
00743 
00751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
00752     const int index8= scan8[n];
00753     const int top_ref=      h->ref_cache[list][ index8 - 8 ];
00754     const int left_ref=     h->ref_cache[list][ index8 - 1 ];
00755     const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
00756     const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
00757     const int16_t * C;
00758     int diagonal_ref, match_count;
00759 
00760     assert(part_width==1 || part_width==2 || part_width==4);
00761 
00762 /* mv_cache
00763   B . . A T T T T
00764   U . . L . . , .
00765   U . . L . . . .
00766   U . . L . . , .
00767   . . . L . . . .
00768 */
00769 
00770     diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
00771     match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
00772     tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
00773     if(match_count > 1){ //most common
00774         *mx= mid_pred(A[0], B[0], C[0]);
00775         *my= mid_pred(A[1], B[1], C[1]);
00776     }else if(match_count==1){
00777         if(left_ref==ref){
00778             *mx= A[0];
00779             *my= A[1];
00780         }else if(top_ref==ref){
00781             *mx= B[0];
00782             *my= B[1];
00783         }else{
00784             *mx= C[0];
00785             *my= C[1];
00786         }
00787     }else{
00788         if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
00789             *mx= A[0];
00790             *my= A[1];
00791         }else{
00792             *mx= mid_pred(A[0], B[0], C[0]);
00793             *my= mid_pred(A[1], B[1], C[1]);
00794         }
00795     }
00796 
00797     tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
00798 }
00799 
00806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
00807     if(n==0){
00808         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
00809         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
00810 
00811         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
00812 
00813         if(top_ref == ref){
00814             *mx= B[0];
00815             *my= B[1];
00816             return;
00817         }
00818     }else{
00819         const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
00820         const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
00821 
00822         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
00823 
00824         if(left_ref == ref){
00825             *mx= A[0];
00826             *my= A[1];
00827             return;
00828         }
00829     }
00830 
00831     //RARE
00832     pred_motion(h, n, 4, list, ref, mx, my);
00833 }
00834 
00841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
00842     if(n==0){
00843         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
00844         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
00845 
00846         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
00847 
00848         if(left_ref == ref){
00849             *mx= A[0];
00850             *my= A[1];
00851             return;
00852         }
00853     }else{
00854         const int16_t * C;
00855         int diagonal_ref;
00856 
00857         diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
00858 
00859         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
00860 
00861         if(diagonal_ref == ref){
00862             *mx= C[0];
00863             *my= C[1];
00864             return;
00865         }
00866     }
00867 
00868     //RARE
00869     pred_motion(h, n, 2, list, ref, mx, my);
00870 }
00871 
00872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
00873     const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
00874     const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
00875 
00876     tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
00877 
00878     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
00879        || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
00880        || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
00881 
00882         *mx = *my = 0;
00883         return;
00884     }
00885 
00886     pred_motion(h, 0, 4, 0, 0, mx, my);
00887 
00888     return;
00889 }
00890 
00891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
00892     int poc0 = h->ref_list[0][i].poc;
00893     int td = av_clip(poc1 - poc0, -128, 127);
00894     if(td == 0 || h->ref_list[0][i].long_ref){
00895         return 256;
00896     }else{
00897         int tb = av_clip(poc - poc0, -128, 127);
00898         int tx = (16384 + (FFABS(td) >> 1)) / td;
00899         return av_clip((tb*tx + 32) >> 6, -1024, 1023);
00900     }
00901 }
00902 
00903 static inline void direct_dist_scale_factor(H264Context * const h){
00904     MpegEncContext * const s = &h->s;
00905     const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
00906     const int poc1 = h->ref_list[1][0].poc;
00907     int i, field;
00908     for(field=0; field<2; field++){
00909         const int poc  = h->s.current_picture_ptr->field_poc[field];
00910         const int poc1 = h->ref_list[1][0].field_poc[field];
00911         for(i=0; i < 2*h->ref_count[0]; i++)
00912             h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
00913     }
00914 
00915     for(i=0; i<h->ref_count[0]; i++){
00916         h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
00917     }
00918 }
00919 
00920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
00921     MpegEncContext * const s = &h->s;
00922     Picture * const ref1 = &h->ref_list[1][0];
00923     int j, old_ref, rfield;
00924     int start= mbafi ? 16                      : 0;
00925     int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
00926     int interl= mbafi || s->picture_structure != PICT_FRAME;
00927 
00928     /* bogus; fills in for missing frames */
00929     memset(map[list], 0, sizeof(map[list]));
00930 
00931     for(rfield=0; rfield<2; rfield++){
00932         for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
00933             int poc = ref1->ref_poc[colfield][list][old_ref];
00934 
00935             if     (!interl)
00936                 poc |= 3;
00937             else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
00938                 poc= (poc&~3) + rfield + 1;
00939 
00940             for(j=start; j<end; j++){
00941                 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
00942                     int cur_ref= mbafi ? (j-16)^field : j;
00943                     map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
00944                     if(rfield == field)
00945                         map[list][old_ref] = cur_ref;
00946                     break;
00947                 }
00948             }
00949         }
00950     }
00951 }
00952 
00953 static inline void direct_ref_list_init(H264Context * const h){
00954     MpegEncContext * const s = &h->s;
00955     Picture * const ref1 = &h->ref_list[1][0];
00956     Picture * const cur = s->current_picture_ptr;
00957     int list, j, field;
00958     int sidx= (s->picture_structure&1)^1;
00959     int ref1sidx= (ref1->reference&1)^1;
00960 
00961     for(list=0; list<2; list++){
00962         cur->ref_count[sidx][list] = h->ref_count[list];
00963         for(j=0; j<h->ref_count[list]; j++)
00964             cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
00965     }
00966 
00967     if(s->picture_structure == PICT_FRAME){
00968         memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
00969         memcpy(cur->ref_poc  [1], cur->ref_poc  [0], sizeof(cur->ref_poc  [0]));
00970     }
00971 
00972     cur->mbaff= FRAME_MBAFF;
00973 
00974     if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
00975         return;
00976 
00977     for(list=0; list<2; list++){
00978         fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
00979         for(field=0; field<2; field++)
00980             fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
00981     }
00982 }
00983 
00984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
00985     MpegEncContext * const s = &h->s;
00986     int b8_stride = h->b8_stride;
00987     int b4_stride = h->b_stride;
00988     int mb_xy = h->mb_xy;
00989     int mb_type_col[2];
00990     const int16_t (*l1mv0)[2], (*l1mv1)[2];
00991     const int8_t *l1ref0, *l1ref1;
00992     const int is_b8x8 = IS_8X8(*mb_type);
00993     unsigned int sub_mb_type;
00994     int i8, i4;
00995 
00996 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
00997 
00998     if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
00999         if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
01000             int cur_poc = s->current_picture_ptr->poc;
01001             int *col_poc = h->ref_list[1]->field_poc;
01002             int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
01003             mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
01004             b8_stride = 0;
01005         }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
01006             int fieldoff= 2*(h->ref_list[1][0].reference)-3;
01007             mb_xy += s->mb_stride*fieldoff;
01008         }
01009         goto single_col;
01010     }else{                                               // AFL/AFR/FR/FL -> AFR/FR
01011         if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
01012             mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
01013             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
01014             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
01015             b8_stride *= 3;
01016             b4_stride *= 6;
01017             //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
01018             if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
01019                 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
01020                 && !is_b8x8){
01021                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01022                 *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
01023             }else{
01024                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01025                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01026             }
01027         }else{                                           //     AFR/FR    -> AFR/FR
01028 single_col:
01029             mb_type_col[0] =
01030             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
01031             if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
01032                 /* FIXME save sub mb types from previous frames (or derive from MVs)
01033                 * so we know exactly what block size to use */
01034                 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
01035                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01036             }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
01037                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01038                 *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
01039             }else{
01040                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01041                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01042             }
01043         }
01044     }
01045 
01046     l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
01047     l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
01048     l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
01049     l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
01050     if(!b8_stride){
01051         if(s->mb_y&1){
01052             l1ref0 += h->b8_stride;
01053             l1ref1 += h->b8_stride;
01054             l1mv0  +=  2*b4_stride;
01055             l1mv1  +=  2*b4_stride;
01056         }
01057     }
01058 
01059     if(h->direct_spatial_mv_pred){
01060         int ref[2];
01061         int mv[2][2];
01062         int list;
01063 
01064         /* FIXME interlacing + spatial direct uses wrong colocated block positions */
01065 
01066         /* ref = min(neighbors) */
01067         for(list=0; list<2; list++){
01068             int refa = h->ref_cache[list][scan8[0] - 1];
01069             int refb = h->ref_cache[list][scan8[0] - 8];
01070             int refc = h->ref_cache[list][scan8[0] - 8 + 4];
01071             if(refc == PART_NOT_AVAILABLE)
01072                 refc = h->ref_cache[list][scan8[0] - 8 - 1];
01073             ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
01074             if(ref[list] < 0)
01075                 ref[list] = -1;
01076         }
01077 
01078         if(ref[0] < 0 && ref[1] < 0){
01079             ref[0] = ref[1] = 0;
01080             mv[0][0] = mv[0][1] =
01081             mv[1][0] = mv[1][1] = 0;
01082         }else{
01083             for(list=0; list<2; list++){
01084                 if(ref[list] >= 0)
01085                     pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
01086                 else
01087                     mv[list][0] = mv[list][1] = 0;
01088             }
01089         }
01090 
01091         if(ref[1] < 0){
01092             if(!is_b8x8)
01093                 *mb_type &= ~MB_TYPE_L1;
01094             sub_mb_type &= ~MB_TYPE_L1;
01095         }else if(ref[0] < 0){
01096             if(!is_b8x8)
01097                 *mb_type &= ~MB_TYPE_L0;
01098             sub_mb_type &= ~MB_TYPE_L0;
01099         }
01100 
01101         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
01102             for(i8=0; i8<4; i8++){
01103                 int x8 = i8&1;
01104                 int y8 = i8>>1;
01105                 int xy8 = x8+y8*b8_stride;
01106                 int xy4 = 3*x8+y8*b4_stride;
01107                 int a=0, b=0;
01108 
01109                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01110                     continue;
01111                 h->sub_mb_type[i8] = sub_mb_type;
01112 
01113                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
01114                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
01115                 if(!IS_INTRA(mb_type_col[y8])
01116                    && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
01117                        || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
01118                     if(ref[0] > 0)
01119                         a= pack16to32(mv[0][0],mv[0][1]);
01120                     if(ref[1] > 0)
01121                         b= pack16to32(mv[1][0],mv[1][1]);
01122                 }else{
01123                     a= pack16to32(mv[0][0],mv[0][1]);
01124                     b= pack16to32(mv[1][0],mv[1][1]);
01125                 }
01126                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
01127                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
01128             }
01129         }else if(IS_16X16(*mb_type)){
01130             int a=0, b=0;
01131 
01132             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
01133             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
01134             if(!IS_INTRA(mb_type_col[0])
01135                && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
01136                    || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
01137                        && (h->x264_build>33 || !h->x264_build)))){
01138                 if(ref[0] > 0)
01139                     a= pack16to32(mv[0][0],mv[0][1]);
01140                 if(ref[1] > 0)
01141                     b= pack16to32(mv[1][0],mv[1][1]);
01142             }else{
01143                 a= pack16to32(mv[0][0],mv[0][1]);
01144                 b= pack16to32(mv[1][0],mv[1][1]);
01145             }
01146             fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
01147             fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
01148         }else{
01149             for(i8=0; i8<4; i8++){
01150                 const int x8 = i8&1;
01151                 const int y8 = i8>>1;
01152 
01153                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01154                     continue;
01155                 h->sub_mb_type[i8] = sub_mb_type;
01156 
01157                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
01158                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
01159                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
01160                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
01161 
01162                 /* col_zero_flag */
01163                 if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
01164                                               || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
01165                                                   && (h->x264_build>33 || !h->x264_build)))){
01166                     const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
01167                     if(IS_SUB_8X8(sub_mb_type)){
01168                         const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
01169                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
01170                             if(ref[0] == 0)
01171                                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01172                             if(ref[1] == 0)
01173                                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01174                         }
01175                     }else
01176                     for(i4=0; i4<4; i4++){
01177                         const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
01178                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
01179                             if(ref[0] == 0)
01180                                 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
01181                             if(ref[1] == 0)
01182                                 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
01183                         }
01184                     }
01185                 }
01186             }
01187         }
01188     }else{ /* direct temporal mv pred */
01189         const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
01190         const int *dist_scale_factor = h->dist_scale_factor;
01191         int ref_offset= 0;
01192 
01193         if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
01194             map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
01195             map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
01196             dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
01197         }
01198         if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
01199             ref_offset += 16;
01200 
01201         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
01202             /* FIXME assumes direct_8x8_inference == 1 */
01203             int y_shift  = 2*!IS_INTERLACED(*mb_type);
01204 
01205             for(i8=0; i8<4; i8++){
01206                 const int x8 = i8&1;
01207                 const int y8 = i8>>1;
01208                 int ref0, scale;
01209                 const int16_t (*l1mv)[2]= l1mv0;
01210 
01211                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01212                     continue;
01213                 h->sub_mb_type[i8] = sub_mb_type;
01214 
01215                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
01216                 if(IS_INTRA(mb_type_col[y8])){
01217                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
01218                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01219                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01220                     continue;
01221                 }
01222 
01223                 ref0 = l1ref0[x8 + y8*b8_stride];
01224                 if(ref0 >= 0)
01225                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
01226                 else{
01227                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
01228                     l1mv= l1mv1;
01229                 }
01230                 scale = dist_scale_factor[ref0];
01231                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
01232 
01233                 {
01234                     const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
01235                     int my_col = (mv_col[1]<<y_shift)/2;
01236                     int mx = (scale * mv_col[0] + 128) >> 8;
01237                     int my = (scale * my_col + 128) >> 8;
01238                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
01239                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
01240                 }
01241             }
01242             return;
01243         }
01244 
01245         /* one-to-one mv scaling */
01246 
01247         if(IS_16X16(*mb_type)){
01248             int ref, mv0, mv1;
01249 
01250             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
01251             if(IS_INTRA(mb_type_col[0])){
01252                 ref=mv0=mv1=0;
01253             }else{
01254                 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
01255                                                 : map_col_to_list0[1][l1ref1[0] + ref_offset];
01256                 const int scale = dist_scale_factor[ref0];
01257                 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
01258                 int mv_l0[2];
01259                 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
01260                 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
01261                 ref= ref0;
01262                 mv0= pack16to32(mv_l0[0],mv_l0[1]);
01263                 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
01264             }
01265             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
01266             fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
01267             fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
01268         }else{
01269             for(i8=0; i8<4; i8++){
01270                 const int x8 = i8&1;
01271                 const int y8 = i8>>1;
01272                 int ref0, scale;
01273                 const int16_t (*l1mv)[2]= l1mv0;
01274 
01275                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01276                     continue;
01277                 h->sub_mb_type[i8] = sub_mb_type;
01278                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
01279                 if(IS_INTRA(mb_type_col[0])){
01280                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
01281                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01282                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01283                     continue;
01284                 }
01285 
01286                 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
01287                 if(ref0 >= 0)
01288                     ref0 = map_col_to_list0[0][ref0];
01289                 else{
01290                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
01291                     l1mv= l1mv1;
01292                 }
01293                 scale = dist_scale_factor[ref0];
01294 
01295                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
01296                 if(IS_SUB_8X8(sub_mb_type)){
01297                     const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
01298                     int mx = (scale * mv_col[0] + 128) >> 8;
01299                     int my = (scale * mv_col[1] + 128) >> 8;
01300                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
01301                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
01302                 }else
01303                 for(i4=0; i4<4; i4++){
01304                     const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
01305                     int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
01306                     mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
01307                     mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
01308                     *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
01309                         pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
01310                 }
01311             }
01312         }
01313     }
01314 }
01315 
01316 static inline void write_back_motion(H264Context *h, int mb_type){
01317     MpegEncContext * const s = &h->s;
01318     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
01319     const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
01320     int list;
01321 
01322     if(!USES_LIST(mb_type, 0))
01323         fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
01324 
01325     for(list=0; list<h->list_count; list++){
01326         int y;
01327         if(!USES_LIST(mb_type, list))
01328             continue;
01329 
01330         for(y=0; y<4; y++){
01331             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
01332             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
01333         }
01334         if( h->pps.cabac ) {
01335             if(IS_SKIP(mb_type))
01336                 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
01337             else
01338             for(y=0; y<4; y++){
01339                 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
01340                 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
01341             }
01342         }
01343 
01344         {
01345             int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
01346             ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
01347             ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
01348             ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
01349             ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
01350         }
01351     }
01352 
01353     if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
01354         if(IS_8X8(mb_type)){
01355             uint8_t *direct_table = &h->direct_table[b8_xy];
01356             direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
01357             direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
01358             direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
01359         }
01360     }
01361 }
01362 
01363 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
01364     int i, si, di;
01365     uint8_t *dst;
01366     int bufidx;
01367 
01368 //    src[0]&0x80;                //forbidden bit
01369     h->nal_ref_idc= src[0]>>5;
01370     h->nal_unit_type= src[0]&0x1F;
01371 
01372     src++; length--;
01373 #if 0
01374     for(i=0; i<length; i++)
01375         printf("%2X ", src[i]);
01376 #endif
01377 
01378 #if HAVE_FAST_UNALIGNED
01379 # if HAVE_FAST_64BIT
01380 #   define RS 7
01381     for(i=0; i+1<length; i+=9){
01382         if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
01383 # else
01384 #   define RS 3
01385     for(i=0; i+1<length; i+=5){
01386         if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
01387 # endif
01388             continue;
01389         if(i>0 && !src[i]) i--;
01390         while(src[i]) i++;
01391 #else
01392 #   define RS 0
01393     for(i=0; i+1<length; i+=2){
01394         if(src[i]) continue;
01395         if(i>0 && src[i-1]==0) i--;
01396 #endif
01397         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
01398             if(src[i+2]!=3){
01399                 /* startcode, so we must be past the end */
01400                 length=i;
01401             }
01402             break;
01403         }
01404         i-= RS;
01405     }
01406 
01407     if(i>=length-1){ //no escaped 0
01408         *dst_length= length;
01409         *consumed= length+1; //+1 for the header
01410         return src;
01411     }
01412 
01413     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
01414     h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
01415     dst= h->rbsp_buffer[bufidx];
01416 
01417     if (dst == NULL){
01418         return NULL;
01419     }
01420 
01421 //printf("decoding esc\n");
01422     memcpy(dst, src, i);
01423     si=di=i;
01424     while(si+2<length){
01425         //remove escapes (very rare 1:2^22)
01426         if(src[si+2]>3){
01427             dst[di++]= src[si++];
01428             dst[di++]= src[si++];
01429         }else if(src[si]==0 && src[si+1]==0){
01430             if(src[si+2]==3){ //escape
01431                 dst[di++]= 0;
01432                 dst[di++]= 0;
01433                 si+=3;
01434                 continue;
01435             }else //next start code
01436                 goto nsc;
01437         }
01438 
01439         dst[di++]= src[si++];
01440     }
01441     while(si<length)
01442         dst[di++]= src[si++];
01443 nsc:
01444 
01445     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
01446 
01447     *dst_length= di;
01448     *consumed= si + 1;//+1 for the header
01449 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
01450     return dst;
01451 }
01452 
01453 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
01454     int v= *src;
01455     int r;
01456 
01457     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
01458 
01459     for(r=1; r<9; r++){
01460         if(v&1) return r;
01461         v>>=1;
01462     }
01463     return 0;
01464 }
01465 
01470 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
01471 #define stride 16
01472     int i;
01473     int temp[16]; //FIXME check if this is a good idea
01474     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
01475     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
01476 
01477 //memset(block, 64, 2*256);
01478 //return;
01479     for(i=0; i<4; i++){
01480         const int offset= y_offset[i];
01481         const int z0= block[offset+stride*0] + block[offset+stride*4];
01482         const int z1= block[offset+stride*0] - block[offset+stride*4];
01483         const int z2= block[offset+stride*1] - block[offset+stride*5];
01484         const int z3= block[offset+stride*1] + block[offset+stride*5];
01485 
01486         temp[4*i+0]= z0+z3;
01487         temp[4*i+1]= z1+z2;
01488         temp[4*i+2]= z1-z2;
01489         temp[4*i+3]= z0-z3;
01490     }
01491 
01492     for(i=0; i<4; i++){
01493         const int offset= x_offset[i];
01494         const int z0= temp[4*0+i] + temp[4*2+i];
01495         const int z1= temp[4*0+i] - temp[4*2+i];
01496         const int z2= temp[4*1+i] - temp[4*3+i];
01497         const int z3= temp[4*1+i] + temp[4*3+i];
01498 
01499         block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
01500         block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
01501         block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
01502         block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
01503     }
01504 }
01505 
01506 #if 0
01507 
01511 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
01512 //    const int qmul= dequant_coeff[qp][0];
01513     int i;
01514     int temp[16]; //FIXME check if this is a good idea
01515     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
01516     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
01517 
01518     for(i=0; i<4; i++){
01519         const int offset= y_offset[i];
01520         const int z0= block[offset+stride*0] + block[offset+stride*4];
01521         const int z1= block[offset+stride*0] - block[offset+stride*4];
01522         const int z2= block[offset+stride*1] - block[offset+stride*5];
01523         const int z3= block[offset+stride*1] + block[offset+stride*5];
01524 
01525         temp[4*i+0]= z0+z3;
01526         temp[4*i+1]= z1+z2;
01527         temp[4*i+2]= z1-z2;
01528         temp[4*i+3]= z0-z3;
01529     }
01530 
01531     for(i=0; i<4; i++){
01532         const int offset= x_offset[i];
01533         const int z0= temp[4*0+i] + temp[4*2+i];
01534         const int z1= temp[4*0+i] - temp[4*2+i];
01535         const int z2= temp[4*1+i] - temp[4*3+i];
01536         const int z3= temp[4*1+i] + temp[4*3+i];
01537 
01538         block[stride*0 +offset]= (z0 + z3)>>1;
01539         block[stride*2 +offset]= (z1 + z2)>>1;
01540         block[stride*8 +offset]= (z1 - z2)>>1;
01541         block[stride*10+offset]= (z0 - z3)>>1;
01542     }
01543 }
01544 #endif
01545 
01546 #undef xStride
01547 #undef stride
01548 
01549 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
01550     const int stride= 16*2;
01551     const int xStride= 16;
01552     int a,b,c,d,e;
01553 
01554     a= block[stride*0 + xStride*0];
01555     b= block[stride*0 + xStride*1];
01556     c= block[stride*1 + xStride*0];
01557     d= block[stride*1 + xStride*1];
01558 
01559     e= a-b;
01560     a= a+b;
01561     b= c-d;
01562     c= c+d;
01563 
01564     block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
01565     block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
01566     block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
01567     block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
01568 }
01569 
01570 #if 0
01571 static void chroma_dc_dct_c(DCTELEM *block){
01572     const int stride= 16*2;
01573     const int xStride= 16;
01574     int a,b,c,d,e;
01575 
01576     a= block[stride*0 + xStride*0];
01577     b= block[stride*0 + xStride*1];
01578     c= block[stride*1 + xStride*0];
01579     d= block[stride*1 + xStride*1];
01580 
01581     e= a-b;
01582     a= a+b;
01583     b= c-d;
01584     c= c+d;
01585 
01586     block[stride*0 + xStride*0]= (a+c);
01587     block[stride*0 + xStride*1]= (e+b);
01588     block[stride*1 + xStride*0]= (a-c);
01589     block[stride*1 + xStride*1]= (e-b);
01590 }
01591 #endif
01592 
01596 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
01597     return h->pps.chroma_qp_table[t][qscale];
01598 }
01599 
01600 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
01601                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01602                            int src_x_offset, int src_y_offset,
01603                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
01604     MpegEncContext * const s = &h->s;
01605     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
01606     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
01607     const int luma_xy= (mx&3) + ((my&3)<<2);
01608     uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
01609     uint8_t * src_cb, * src_cr;
01610     int extra_width= h->emu_edge_width;
01611     int extra_height= h->emu_edge_height;
01612     int emu=0;
01613     const int full_mx= mx>>2;
01614     const int full_my= my>>2;
01615     const int pic_width  = 16*s->mb_width;
01616     const int pic_height = 16*s->mb_height >> MB_FIELD;
01617 
01618     if(mx&7) extra_width -= 3;
01619     if(my&7) extra_height -= 3;
01620 
01621     if(   full_mx < 0-extra_width
01622        || full_my < 0-extra_height
01623        || full_mx + 16/*FIXME*/ > pic_width + extra_width
01624        || full_my + 16/*FIXME*/ > pic_height + extra_height){
01625         ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
01626             src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
01627         emu=1;
01628     }
01629 
01630     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
01631     if(!square){
01632         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
01633     }
01634 
01635     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
01636 
01637     if(MB_FIELD){
01638         // chroma offset when predicting from a field of opposite parity
01639         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
01640         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
01641     }
01642     src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
01643     src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
01644 
01645     if(emu){
01646         ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
01647             src_cb= s->edge_emu_buffer;
01648     }
01649     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
01650 
01651     if(emu){
01652         ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
01653             src_cr= s->edge_emu_buffer;
01654     }
01655     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
01656 }
01657 
01658 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
01659                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01660                            int x_offset, int y_offset,
01661                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01662                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
01663                            int list0, int list1){
01664     MpegEncContext * const s = &h->s;
01665     qpel_mc_func *qpix_op=  qpix_put;
01666     h264_chroma_mc_func chroma_op= chroma_put;
01667 
01668     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
01669     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
01670     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
01671     x_offset += 8*s->mb_x;
01672     y_offset += 8*(s->mb_y >> MB_FIELD);
01673 
01674     if(list0){
01675         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
01676         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
01677                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
01678                            qpix_op, chroma_op);
01679 
01680         qpix_op=  qpix_avg;
01681         chroma_op= chroma_avg;
01682     }
01683 
01684     if(list1){
01685         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
01686         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
01687                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
01688                            qpix_op, chroma_op);
01689     }
01690 }
01691 
01692 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
01693                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01694                            int x_offset, int y_offset,
01695                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01696                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
01697                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
01698                            int list0, int list1){
01699     MpegEncContext * const s = &h->s;
01700 
01701     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
01702     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
01703     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
01704     x_offset += 8*s->mb_x;
01705     y_offset += 8*(s->mb_y >> MB_FIELD);
01706 
01707     if(list0 && list1){
01708         /* don't optimize for luma-only case, since B-frames usually
01709          * use implicit weights => chroma too. */
01710         uint8_t *tmp_cb = s->obmc_scratchpad;
01711         uint8_t *tmp_cr = s->obmc_scratchpad + 8;
01712         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
01713         int refn0 = h->ref_cache[0][ scan8[n] ];
01714         int refn1 = h->ref_cache[1][ scan8[n] ];
01715 
01716         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
01717                     dest_y, dest_cb, dest_cr,
01718                     x_offset, y_offset, qpix_put, chroma_put);
01719         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
01720                     tmp_y, tmp_cb, tmp_cr,
01721                     x_offset, y_offset, qpix_put, chroma_put);
01722 
01723         if(h->use_weight == 2){
01724             int weight0 = h->implicit_weight[refn0][refn1];
01725             int weight1 = 64 - weight0;
01726             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
01727             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
01728             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
01729         }else{
01730             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
01731                             h->luma_weight[0][refn0], h->luma_weight[1][refn1],
01732                             h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
01733             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01734                             h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
01735                             h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
01736             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01737                             h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
01738                             h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
01739         }
01740     }else{
01741         int list = list1 ? 1 : 0;
01742         int refn = h->ref_cache[list][ scan8[n] ];
01743         Picture *ref= &h->ref_list[list][refn];
01744         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
01745                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
01746                     qpix_put, chroma_put);
01747 
01748         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
01749                        h->luma_weight[list][refn], h->luma_offset[list][refn]);
01750         if(h->use_weight_chroma){
01751             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01752                              h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
01753             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01754                              h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
01755         }
01756     }
01757 }
01758 
01759 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
01760                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01761                            int x_offset, int y_offset,
01762                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01763                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
01764                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
01765                            int list0, int list1){
01766     if((h->use_weight==2 && list0 && list1
01767         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
01768        || h->use_weight==1)
01769         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
01770                          x_offset, y_offset, qpix_put, chroma_put,
01771                          weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
01772     else
01773         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
01774                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
01775 }
01776 
01777 static inline void prefetch_motion(H264Context *h, int list){
01778     /* fetch pixels for estimated mv 4 macroblocks ahead
01779      * optimized for 64byte cache lines */
01780     MpegEncContext * const s = &h->s;
01781     const int refn = h->ref_cache[list][scan8[0]];
01782     if(refn >= 0){
01783         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
01784         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
01785         uint8_t **src= h->ref_list[list][refn].data;
01786         int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
01787         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01788         off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
01789         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01790     }
01791 }
01792 
01793 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01794                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
01795                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
01796                       h264_weight_func *weight_op, h264_biweight_func *weight_avg){
01797     MpegEncContext * const s = &h->s;
01798     const int mb_xy= h->mb_xy;
01799     const int mb_type= s->current_picture.mb_type[mb_xy];
01800 
01801     assert(IS_INTER(mb_type));
01802 
01803     prefetch_motion(h, 0);
01804 
01805     if(IS_16X16(mb_type)){
01806         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
01807                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
01808                 &weight_op[0], &weight_avg[0],
01809                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01810     }else if(IS_16X8(mb_type)){
01811         mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
01812                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
01813                 &weight_op[1], &weight_avg[1],
01814                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01815         mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
01816                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
01817                 &weight_op[1], &weight_avg[1],
01818                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
01819     }else if(IS_8X16(mb_type)){
01820         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
01821                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01822                 &weight_op[2], &weight_avg[2],
01823                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01824         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
01825                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01826                 &weight_op[2], &weight_avg[2],
01827                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
01828     }else{
01829         int i;
01830 
01831         assert(IS_8X8(mb_type));
01832 
01833         for(i=0; i<4; i++){
01834             const int sub_mb_type= h->sub_mb_type[i];
01835             const int n= 4*i;
01836             int x_offset= (i&1)<<2;
01837             int y_offset= (i&2)<<1;
01838 
01839             if(IS_SUB_8X8(sub_mb_type)){
01840                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01841                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01842                     &weight_op[3], &weight_avg[3],
01843                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01844             }else if(IS_SUB_8X4(sub_mb_type)){
01845                 mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01846                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
01847                     &weight_op[4], &weight_avg[4],
01848                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01849                 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
01850                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
01851                     &weight_op[4], &weight_avg[4],
01852                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01853             }else if(IS_SUB_4X8(sub_mb_type)){
01854                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01855                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01856                     &weight_op[5], &weight_avg[5],
01857                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01858                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
01859                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01860                     &weight_op[5], &weight_avg[5],
01861                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01862             }else{
01863                 int j;
01864                 assert(IS_SUB_4X4(sub_mb_type));
01865                 for(j=0; j<4; j++){
01866                     int sub_x_offset= x_offset + 2*(j&1);
01867                     int sub_y_offset= y_offset +   (j&2);
01868                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
01869                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01870                         &weight_op[6], &weight_avg[6],
01871                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01872                 }
01873             }
01874         }
01875     }
01876 
01877     prefetch_motion(h, 1);
01878 }
01879 
01880 static av_cold void init_cavlc_level_tab(void){
01881     int suffix_length, mask;
01882     unsigned int i;
01883 
01884     for(suffix_length=0; suffix_length<7; suffix_length++){
01885         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
01886             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
01887             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
01888 
01889             mask= -(level_code&1);
01890             level_code= (((2+level_code)>>1) ^ mask) - mask;
01891             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
01892                 cavlc_level_tab[suffix_length][i][0]= level_code;
01893                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
01894             }else if(prefix + 1 <= LEVEL_TAB_BITS){
01895                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
01896                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
01897             }else{
01898                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
01899                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
01900             }
01901         }
01902     }
01903 }
01904 
01905 static av_cold void decode_init_vlc(void){
01906     static int done = 0;
01907 
01908     if (!done) {
01909         int i;
01910         int offset;
01911         done = 1;
01912 
01913         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
01914         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
01915         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
01916                  &chroma_dc_coeff_token_len [0], 1, 1,
01917                  &chroma_dc_coeff_token_bits[0], 1, 1,
01918                  INIT_VLC_USE_NEW_STATIC);
01919 
01920         offset = 0;
01921         for(i=0; i<4; i++){
01922             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
01923             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
01924             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
01925                      &coeff_token_len [i][0], 1, 1,
01926                      &coeff_token_bits[i][0], 1, 1,
01927                      INIT_VLC_USE_NEW_STATIC);
01928             offset += coeff_token_vlc_tables_size[i];
01929         }
01930         /*
01931          * This is a one time safety check to make sure that
01932          * the packed static coeff_token_vlc table sizes
01933          * were initialized correctly.
01934          */
01935         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
01936 
01937         for(i=0; i<3; i++){
01938             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
01939             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
01940             init_vlc(&chroma_dc_total_zeros_vlc[i],
01941                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
01942                      &chroma_dc_total_zeros_len [i][0], 1, 1,
01943                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
01944                      INIT_VLC_USE_NEW_STATIC);
01945         }
01946         for(i=0; i<15; i++){
01947             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
01948             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
01949             init_vlc(&total_zeros_vlc[i],
01950                      TOTAL_ZEROS_VLC_BITS, 16,
01951                      &total_zeros_len [i][0], 1, 1,
01952                      &total_zeros_bits[i][0], 1, 1,
01953                      INIT_VLC_USE_NEW_STATIC);
01954         }
01955 
01956         for(i=0; i<6; i++){
01957             run_vlc[i].table = run_vlc_tables[i];
01958             run_vlc[i].table_allocated = run_vlc_tables_size;
01959             init_vlc(&run_vlc[i],
01960                      RUN_VLC_BITS, 7,
01961                      &run_len [i][0], 1, 1,
01962                      &run_bits[i][0], 1, 1,
01963                      INIT_VLC_USE_NEW_STATIC);
01964         }
01965         run7_vlc.table = run7_vlc_table,
01966         run7_vlc.table_allocated = run7_vlc_table_size;
01967         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
01968                  &run_len [6][0], 1, 1,
01969                  &run_bits[6][0], 1, 1,
01970                  INIT_VLC_USE_NEW_STATIC);
01971 
01972         init_cavlc_level_tab();
01973     }
01974 }
01975 
01976 static void free_tables(H264Context *h){
01977     int i;
01978     H264Context *hx;
01979     av_freep(&h->intra4x4_pred_mode);
01980     av_freep(&h->chroma_pred_mode_table);
01981     av_freep(&h->cbp_table);
01982     av_freep(&h->mvd_table[0]);
01983     av_freep(&h->mvd_table[1]);
01984     av_freep(&h->direct_table);
01985     av_freep(&h->non_zero_count);
01986     av_freep(&h->slice_table_base);
01987     h->slice_table= NULL;
01988 
01989     av_freep(&h->mb2b_xy);
01990     av_freep(&h->mb2b8_xy);
01991 
01992     for(i = 0; i < h->s.avctx->thread_count; i++) {
01993         hx = h->thread_context[i];
01994         if(!hx) continue;
01995         av_freep(&hx->top_borders[1]);
01996         av_freep(&hx->top_borders[0]);
01997         av_freep(&hx->s.obmc_scratchpad);
01998     }
01999 }
02000 
02001 static void init_dequant8_coeff_table(H264Context *h){
02002     int i,q,x;
02003     const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
02004     h->dequant8_coeff[0] = h->dequant8_buffer[0];
02005     h->dequant8_coeff[1] = h->dequant8_buffer[1];
02006 
02007     for(i=0; i<2; i++ ){
02008         if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
02009             h->dequant8_coeff[1] = h->dequant8_buffer[0];
02010             break;
02011         }
02012 
02013         for(q=0; q<52; q++){
02014             int shift = div6[q];
02015             int idx = rem6[q];
02016             for(x=0; x<64; x++)
02017                 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
02018                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
02019                     h->pps.scaling_matrix8[i][x]) << shift;
02020         }
02021     }
02022 }
02023 
02024 static void init_dequant4_coeff_table(H264Context *h){
02025     int i,j,q,x;
02026     const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
02027     for(i=0; i<6; i++ ){
02028         h->dequant4_coeff[i] = h->dequant4_buffer[i];
02029         for(j=0; j<i; j++){
02030             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
02031                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
02032                 break;
02033             }
02034         }
02035         if(j<i)
02036             continue;
02037 
02038         for(q=0; q<52; q++){
02039             int shift = div6[q] + 2;
02040             int idx = rem6[q];
02041             for(x=0; x<16; x++)
02042                 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
02043                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
02044                     h->pps.scaling_matrix4[i][x]) << shift;
02045         }
02046     }
02047 }
02048 
02049 static void init_dequant_tables(H264Context *h){
02050     int i,x;
02051     init_dequant4_coeff_table(h);
02052     if(h->pps.transform_8x8_mode)
02053         init_dequant8_coeff_table(h);
02054     if(h->sps.transform_bypass){
02055         for(i=0; i<6; i++)
02056             for(x=0; x<16; x++)
02057                 h->dequant4_coeff[i][0][x] = 1<<6;
02058         if(h->pps.transform_8x8_mode)
02059             for(i=0; i<2; i++)
02060                 for(x=0; x<64; x++)
02061                     h->dequant8_coeff[i][0][x] = 1<<6;
02062     }
02063 }
02064 
02065 
02070 static int alloc_tables(H264Context *h){
02071     MpegEncContext * const s = &h->s;
02072     const int big_mb_num= s->mb_stride * (s->mb_height+1);
02073     int x,y;
02074 
02075     CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
02076 
02077     CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
02078     CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
02079     CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
02080 
02081     CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
02082     CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
02083     CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
02084     CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
02085 
02086     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
02087     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
02088 
02089     CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
02090     CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
02091     for(y=0; y<s->mb_height; y++){
02092         for(x=0; x<s->mb_width; x++){
02093             const int mb_xy= x + y*s->mb_stride;
02094             const int b_xy = 4*x + 4*y*h->b_stride;
02095             const int b8_xy= 2*x + 2*y*h->b8_stride;
02096 
02097             h->mb2b_xy [mb_xy]= b_xy;
02098             h->mb2b8_xy[mb_xy]= b8_xy;
02099         }
02100     }
02101 
02102     s->obmc_scratchpad = NULL;
02103 
02104     if(!h->dequant4_coeff[0])
02105         init_dequant_tables(h);
02106 
02107     return 0;
02108 fail:
02109     free_tables(h);
02110     return -1;
02111 }
02112 
02116 static void clone_tables(H264Context *dst, H264Context *src){
02117     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
02118     dst->non_zero_count           = src->non_zero_count;
02119     dst->slice_table              = src->slice_table;
02120     dst->cbp_table                = src->cbp_table;
02121     dst->mb2b_xy                  = src->mb2b_xy;
02122     dst->mb2b8_xy                 = src->mb2b8_xy;
02123     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
02124     dst->mvd_table[0]             = src->mvd_table[0];
02125     dst->mvd_table[1]             = src->mvd_table[1];
02126     dst->direct_table             = src->direct_table;
02127 
02128     dst->s.obmc_scratchpad = NULL;
02129     ff_h264_pred_init(&dst->hpc, src->s.codec_id);
02130 }
02131 
02136 static int context_init(H264Context *h){
02137     CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
02138     CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
02139 
02140     return 0;
02141 fail:
02142     return -1; // free_tables will clean up for us
02143 }
02144 
02145 static av_cold void common_init(H264Context *h){
02146     MpegEncContext * const s = &h->s;
02147 
02148     s->width = s->avctx->width;
02149     s->height = s->avctx->height;
02150     s->codec_id= s->avctx->codec->id;
02151 
02152     ff_h264_pred_init(&h->hpc, s->codec_id);
02153 
02154     h->dequant_coeff_pps= -1;
02155     s->unrestricted_mv=1;
02156     s->decode=1; //FIXME
02157 
02158     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
02159 
02160     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
02161     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
02162 }
02163 
02169 static void reset_sei(H264Context *h) {
02170     h->sei_recovery_frame_cnt       = -1;
02171     h->sei_dpb_output_delay         =  0;
02172     h->sei_cpb_removal_delay        = -1;
02173     h->sei_buffering_period_present =  0;
02174 }
02175 
02176 static av_cold int decode_init(AVCodecContext *avctx){
02177     H264Context *h= avctx->priv_data;
02178     MpegEncContext * const s = &h->s;
02179 
02180     MPV_decode_defaults(s);
02181 
02182     s->avctx = avctx;
02183     common_init(h);
02184 
02185     s->out_format = FMT_H264;
02186     s->workaround_bugs= avctx->workaround_bugs;
02187 
02188     // set defaults
02189 //    s->decode_mb= ff_h263_decode_mb;
02190     s->quarter_sample = 1;
02191     if(!avctx->has_b_frames)
02192     s->low_delay= 1;
02193 
02194     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02195         avctx->pix_fmt= PIX_FMT_VDPAU_H264;
02196     else
02197         avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
02198     avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
02199 
02200     decode_init_vlc();
02201 
02202     if(avctx->extradata_size > 0 && avctx->extradata &&
02203        *(char *)avctx->extradata == 1){
02204         h->is_avc = 1;
02205         h->got_avcC = 0;
02206     } else {
02207         h->is_avc = 0;
02208     }
02209 
02210     h->thread_context[0] = h;
02211     h->outputed_poc = INT_MIN;
02212     h->prev_poc_msb= 1<<16;
02213     reset_sei(h);
02214     if(avctx->codec_id == CODEC_ID_H264){
02215         if(avctx->ticks_per_frame == 1){
02216             s->avctx->time_base.den *=2;
02217         }
02218         avctx->ticks_per_frame = 2;
02219     }
02220     return 0;
02221 }
02222 
02223 static int frame_start(H264Context *h){
02224     MpegEncContext * const s = &h->s;
02225     int i;
02226 
02227     if(MPV_frame_start(s, s->avctx) < 0)
02228         return -1;
02229     ff_er_frame_start(s);
02230     /*
02231      * MPV_frame_start uses pict_type to derive key_frame.
02232      * This is incorrect for H.264; IDR markings must be used.
02233      * Zero here; IDR markings per slice in frame or fields are ORed in later.
02234      * See decode_nal_units().
02235      */
02236     s->current_picture_ptr->key_frame= 0;
02237 
02238     assert(s->linesize && s->uvlinesize);
02239 
02240     for(i=0; i<16; i++){
02241         h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
02242         h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
02243     }
02244     for(i=0; i<4; i++){
02245         h->block_offset[16+i]=
02246         h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
02247         h->block_offset[24+16+i]=
02248         h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
02249     }
02250 
02251     /* can't be in alloc_tables because linesize isn't known there.
02252      * FIXME: redo bipred weight to not require extra buffer? */
02253     for(i = 0; i < s->avctx->thread_count; i++)
02254         if(!h->thread_context[i]->s.obmc_scratchpad)
02255             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
02256 
02257     /* some macroblocks will be accessed before they're available */
02258     if(FRAME_MBAFF || s->avctx->thread_count > 1)
02259         memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
02260 
02261 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
02262 
02263     // We mark the current picture as non-reference after allocating it, so
02264     // that if we break out due to an error it can be released automatically
02265     // in the next MPV_frame_start().
02266     // SVQ3 as well as most other codecs have only last/next/current and thus
02267     // get released even with set reference, besides SVQ3 and others do not
02268     // mark frames as reference later "naturally".
02269     if(s->codec_id != CODEC_ID_SVQ3)
02270         s->current_picture_ptr->reference= 0;
02271 
02272     s->current_picture_ptr->field_poc[0]=
02273     s->current_picture_ptr->field_poc[1]= INT_MAX;
02274     assert(s->current_picture_ptr->long_ref==0);
02275 
02276     return 0;
02277 }
02278 
02279 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
02280     MpegEncContext * const s = &h->s;
02281     int i;
02282     int step    = 1;
02283     int offset  = 1;
02284     int uvoffset= 1;
02285     int top_idx = 1;
02286     int skiplast= 0;
02287 
02288     src_y  -=   linesize;
02289     src_cb -= uvlinesize;
02290     src_cr -= uvlinesize;
02291 
02292     if(!simple && FRAME_MBAFF){
02293         if(s->mb_y&1){
02294             offset  = MB_MBAFF ? 1 : 17;
02295             uvoffset= MB_MBAFF ? 1 : 9;
02296             if(!MB_MBAFF){
02297                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
02298                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
02299                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02300                     *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
02301                     *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
02302                 }
02303             }
02304         }else{
02305             if(!MB_MBAFF){
02306                 h->left_border[0]= h->top_borders[0][s->mb_x][15];
02307                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02308                     h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
02309                     h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
02310                 }
02311                 skiplast= 1;
02312             }
02313             offset  =
02314             uvoffset=
02315             top_idx = MB_MBAFF ? 0 : 1;
02316         }
02317         step= MB_MBAFF ? 2 : 1;
02318     }
02319 
02320     // There are two lines saved, the line above the the top macroblock of a pair,
02321     // and the line above the bottom macroblock
02322     h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
02323     for(i=1; i<17 - skiplast; i++){
02324         h->left_border[offset+i*step]= src_y[15+i*  linesize];
02325     }
02326 
02327     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
02328     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
02329 
02330     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02331         h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
02332         h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
02333         for(i=1; i<9 - skiplast; i++){
02334             h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
02335             h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
02336         }
02337         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
02338         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
02339     }
02340 }
02341 
02342 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
02343     MpegEncContext * const s = &h->s;
02344     int temp8, i;
02345     uint64_t temp64;
02346     int deblock_left;
02347     int deblock_top;
02348     int mb_xy;
02349     int step    = 1;
02350     int offset  = 1;
02351     int uvoffset= 1;
02352     int top_idx = 1;
02353 
02354     if(!simple && FRAME_MBAFF){
02355         if(s->mb_y&1){
02356             offset  = MB_MBAFF ? 1 : 17;
02357             uvoffset= MB_MBAFF ? 1 : 9;
02358         }else{
02359             offset  =
02360             uvoffset=
02361             top_idx = MB_MBAFF ? 0 : 1;
02362         }
02363         step= MB_MBAFF ? 2 : 1;
02364     }
02365 
02366     if(h->deblocking_filter == 2) {
02367         mb_xy = h->mb_xy;
02368         deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
02369         deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
02370     } else {
02371         deblock_left = (s->mb_x > 0);
02372         deblock_top =  (s->mb_y > !!MB_FIELD);
02373     }
02374 
02375     src_y  -=   linesize + 1;
02376     src_cb -= uvlinesize + 1;
02377     src_cr -= uvlinesize + 1;
02378 
02379 #define XCHG(a,b,t,xchg)\
02380 t= a;\
02381 if(xchg)\
02382     a= b;\
02383 b= t;
02384 
02385     if(deblock_left){
02386         for(i = !deblock_top; i<16; i++){
02387             XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
02388         }
02389         XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
02390     }
02391 
02392     if(deblock_top){
02393         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
02394         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
02395         if(s->mb_x+1 < s->mb_width){
02396             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
02397         }
02398     }
02399 
02400     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02401         if(deblock_left){
02402             for(i = !deblock_top; i<8; i++){
02403                 XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
02404                 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
02405             }
02406             XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
02407             XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
02408         }
02409         if(deblock_top){
02410             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
02411             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
02412         }
02413     }
02414 }
02415 
02416 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
02417     MpegEncContext * const s = &h->s;
02418     const int mb_x= s->mb_x;
02419     const int mb_y= s->mb_y;
02420     const int mb_xy= h->mb_xy;
02421     const int mb_type= s->current_picture.mb_type[mb_xy];
02422     uint8_t  *dest_y, *dest_cb, *dest_cr;
02423     int linesize, uvlinesize /*dct_offset*/;
02424     int i;
02425     int *block_offset = &h->block_offset[0];
02426     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02427     /* is_h264 should always be true if SVQ3 is disabled. */
02428     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
02429     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
02430     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
02431 
02432     dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
02433     dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
02434     dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
02435 
02436     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
02437     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
02438 
02439     if (!simple && MB_FIELD) {
02440         linesize   = h->mb_linesize   = s->linesize * 2;
02441         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
02442         block_offset = &h->block_offset[24];
02443         if(mb_y&1){ //FIXME move out of this function?
02444             dest_y -= s->linesize*15;
02445             dest_cb-= s->uvlinesize*7;
02446             dest_cr-= s->uvlinesize*7;
02447         }
02448         if(FRAME_MBAFF) {
02449             int list;
02450             for(list=0; list<h->list_count; list++){
02451                 if(!USES_LIST(mb_type, list))
02452                     continue;
02453                 if(IS_16X16(mb_type)){
02454                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02455                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02456                 }else{
02457                     for(i=0; i<16; i+=4){
02458                         int ref = h->ref_cache[list][scan8[i]];
02459                         if(ref >= 0)
02460                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02461                     }
02462                 }
02463             }
02464         }
02465     } else {
02466         linesize   = h->mb_linesize   = s->linesize;
02467         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
02468 //        dct_offset = s->linesize * 16;
02469     }
02470 
02471     if (!simple && IS_INTRA_PCM(mb_type)) {
02472         for (i=0; i<16; i++) {
02473             memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
02474         }
02475         for (i=0; i<8; i++) {
02476             memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
02477             memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
02478         }
02479     } else {
02480         if(IS_INTRA(mb_type)){
02481             if(h->deblocking_filter)
02482                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
02483 
02484             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02485                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
02486                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
02487             }
02488 
02489             if(IS_INTRA4x4(mb_type)){
02490                 if(simple || !s->encoding){
02491                     if(IS_8x8DCT(mb_type)){
02492                         if(transform_bypass){
02493                             idct_dc_add =
02494                             idct_add    = s->dsp.add_pixels8;
02495                         }else{
02496                             idct_dc_add = s->dsp.h264_idct8_dc_add;
02497                             idct_add    = s->dsp.h264_idct8_add;
02498                         }
02499                         for(i=0; i<16; i+=4){
02500                             uint8_t * const ptr= dest_y + block_offset[i];
02501                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
02502                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
02503                                 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
02504                             }else{
02505                                 const int nnz = h->non_zero_count_cache[ scan8[i] ];
02506                                 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
02507                                                             (h->topright_samples_available<<i)&0x4000, linesize);
02508                                 if(nnz){
02509                                     if(nnz == 1 && h->mb[i*16])
02510                                         idct_dc_add(ptr, h->mb + i*16, linesize);
02511                                     else
02512                                         idct_add   (ptr, h->mb + i*16, linesize);
02513                                 }
02514                             }
02515                         }
02516                     }else{
02517                         if(transform_bypass){
02518                             idct_dc_add =
02519                             idct_add    = s->dsp.add_pixels4;
02520                         }else{
02521                             idct_dc_add = s->dsp.h264_idct_dc_add;
02522                             idct_add    = s->dsp.h264_idct_add;
02523                         }
02524                         for(i=0; i<16; i++){
02525                             uint8_t * const ptr= dest_y + block_offset[i];
02526                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
02527 
02528                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
02529                                 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
02530                             }else{
02531                                 uint8_t *topright;
02532                                 int nnz, tr;
02533                                 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
02534                                     const int topright_avail= (h->topright_samples_available<<i)&0x8000;
02535                                     assert(mb_y || linesize <= block_offset[i]);
02536                                     if(!topright_avail){
02537                                         tr= ptr[3 - linesize]*0x01010101;
02538                                         topright= (uint8_t*) &tr;
02539                                     }else
02540                                         topright= ptr + 4 - linesize;
02541                                 }else
02542                                     topright= NULL;
02543 
02544                                 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
02545                                 nnz = h->non_zero_count_cache[ scan8[i] ];
02546                                 if(nnz){
02547                                     if(is_h264){
02548                                         if(nnz == 1 && h->mb[i*16])
02549                                             idct_dc_add(ptr, h->mb + i*16, linesize);
02550                                         else
02551                                             idct_add   (ptr, h->mb + i*16, linesize);
02552                                     }else
02553                                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
02554                                 }
02555                             }
02556                         }
02557                     }
02558                 }
02559             }else{
02560                 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
02561                 if(is_h264){
02562                     if(!transform_bypass)
02563                         h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
02564                 }else
02565                     svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
02566             }
02567             if(h->deblocking_filter)
02568                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
02569         }else if(is_h264){
02570             hl_motion(h, dest_y, dest_cb, dest_cr,
02571                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02572                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02573                       s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
02574         }
02575 
02576 
02577         if(!IS_INTRA4x4(mb_type)){
02578             if(is_h264){
02579                 if(IS_INTRA16x16(mb_type)){
02580                     if(transform_bypass){
02581                         if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
02582                             h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
02583                         }else{
02584                             for(i=0; i<16; i++){
02585                                 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
02586                                     s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
02587                             }
02588                         }
02589                     }else{
02590                          s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
02591                     }
02592                 }else if(h->cbp&15){
02593                     if(transform_bypass){
02594                         const int di = IS_8x8DCT(mb_type) ? 4 : 1;
02595                         idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
02596                         for(i=0; i<16; i+=di){
02597                             if(h->non_zero_count_cache[ scan8[i] ]){
02598                                 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
02599                             }
02600                         }
02601                     }else{
02602                         if(IS_8x8DCT(mb_type)){
02603                             s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
02604                         }else{
02605                             s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
02606                         }
02607                     }
02608                 }
02609             }else{
02610                 for(i=0; i<16; i++){
02611                     if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
02612                         uint8_t * const ptr= dest_y + block_offset[i];
02613                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
02614                     }
02615                 }
02616             }
02617         }
02618 
02619         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
02620             uint8_t *dest[2] = {dest_cb, dest_cr};
02621             if(transform_bypass){
02622                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
02623                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
02624                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
02625                 }else{
02626                     idct_add = s->dsp.add_pixels4;
02627                     for(i=16; i<16+8; i++){
02628                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
02629                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
02630                     }
02631                 }
02632             }else{
02633                 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
02634                 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
02635                 if(is_h264){
02636                     idct_add = s->dsp.h264_idct_add;
02637                     idct_dc_add = s->dsp.h264_idct_dc_add;
02638                     for(i=16; i<16+8; i++){
02639                         if(h->non_zero_count_cache[ scan8[i] ])
02640                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
02641                         else if(h->mb[i*16])
02642                             idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
02643                     }
02644                 }else{
02645                     for(i=16; i<16+8; i++){
02646                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
02647                             uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
02648                             svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
02649                         }
02650                     }
02651                 }
02652             }
02653         }
02654     }
02655     if(h->cbp || IS_INTRA(mb_type))
02656         s->dsp.clear_blocks(h->mb);
02657 
02658     if(h->deblocking_filter) {
02659         backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
02660         fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
02661         h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
02662         h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
02663         if (!simple && FRAME_MBAFF) {
02664             filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
02665         } else {
02666             filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
02667         }
02668     }
02669 }
02670 
02674 static void hl_decode_mb_simple(H264Context *h){
02675     hl_decode_mb_internal(h, 1);
02676 }
02677 
02681 static void av_noinline hl_decode_mb_complex(H264Context *h){
02682     hl_decode_mb_internal(h, 0);
02683 }
02684 
02685 static void hl_decode_mb(H264Context *h){
02686     MpegEncContext * const s = &h->s;
02687     const int mb_xy= h->mb_xy;
02688     const int mb_type= s->current_picture.mb_type[mb_xy];
02689     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02690 
02691     if (is_complex)
02692         hl_decode_mb_complex(h);
02693     else hl_decode_mb_simple(h);
02694 }
02695 
02696 static void pic_as_field(Picture *pic, const int parity){
02697     int i;
02698     for (i = 0; i < 4; ++i) {
02699         if (parity == PICT_BOTTOM_FIELD)
02700             pic->data[i] += pic->linesize[i];
02701         pic->reference = parity;
02702         pic->linesize[i] *= 2;
02703     }
02704     pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
02705 }
02706 
02707 static int split_field_copy(Picture *dest, Picture *src,
02708                             int parity, int id_add){
02709     int match = !!(src->reference & parity);
02710 
02711     if (match) {
02712         *dest = *src;
02713         if(parity != PICT_FRAME){
02714             pic_as_field(dest, parity);
02715             dest->pic_id *= 2;
02716             dest->pic_id += id_add;
02717         }
02718     }
02719 
02720     return match;
02721 }
02722 
02723 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
02724     int i[2]={0};
02725     int index=0;
02726 
02727     while(i[0]<len || i[1]<len){
02728         while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
02729             i[0]++;
02730         while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
02731             i[1]++;
02732         if(i[0] < len){
02733             in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
02734             split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
02735         }
02736         if(i[1] < len){
02737             in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
02738             split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
02739         }
02740     }
02741 
02742     return index;
02743 }
02744 
02745 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
02746     int i, best_poc;
02747     int out_i= 0;
02748 
02749     for(;;){
02750         best_poc= dir ? INT_MIN : INT_MAX;
02751 
02752         for(i=0; i<len; i++){
02753             const int poc= src[i]->poc;
02754             if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
02755                 best_poc= poc;
02756                 sorted[out_i]= src[i];
02757             }
02758         }
02759         if(best_poc == (dir ? INT_MIN : INT_MAX))
02760             break;
02761         limit= sorted[out_i++]->poc - dir;
02762     }
02763     return out_i;
02764 }
02765 
02769 static int fill_default_ref_list(H264Context *h){
02770     MpegEncContext * const s = &h->s;
02771     int i, len;
02772 
02773     if(h->slice_type_nos==FF_B_TYPE){
02774         Picture *sorted[32];
02775         int cur_poc, list;
02776         int lens[2];
02777 
02778         if(FIELD_PICTURE)
02779             cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
02780         else
02781             cur_poc= s->current_picture_ptr->poc;
02782 
02783         for(list= 0; list<2; list++){
02784             len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
02785             len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
02786             assert(len<=32);
02787             len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
02788             len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
02789             assert(len<=32);
02790 
02791             if(len < h->ref_count[list])
02792                 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
02793             lens[list]= len;
02794         }
02795 
02796         if(lens[0] == lens[1] && lens[1] > 1){
02797             for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
02798             if(i == lens[0])
02799                 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
02800         }
02801     }else{
02802         len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
02803         len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
02804         assert(len <= 32);
02805         if(len < h->ref_count[0])
02806             memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
02807     }
02808 #ifdef TRACE
02809     for (i=0; i<h->ref_count[0]; i++) {
02810         tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
02811     }
02812     if(h->slice_type_nos==FF_B_TYPE){
02813         for (i=0; i<h->ref_count[1]; i++) {
02814             tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
02815         }
02816     }
02817 #endif
02818     return 0;
02819 }
02820 
02821 static void print_short_term(H264Context *h);
02822 static void print_long_term(H264Context *h);
02823 
02834 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
02835     MpegEncContext * const s = &h->s;
02836 
02837     *structure = s->picture_structure;
02838     if(FIELD_PICTURE){
02839         if (!(pic_num & 1))
02840             /* opposite field */
02841             *structure ^= PICT_FRAME;
02842         pic_num >>= 1;
02843     }
02844 
02845     return pic_num;
02846 }
02847 
02848 static int decode_ref_pic_list_reordering(H264Context *h){
02849     MpegEncContext * const s = &h->s;
02850     int list, index, pic_structure;
02851 
02852     print_short_term(h);
02853     print_long_term(h);
02854 
02855     for(list=0; list<h->list_count; list++){
02856         memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
02857 
02858         if(get_bits1(&s->gb)){
02859             int pred= h->curr_pic_num;
02860 
02861             for(index=0; ; index++){
02862                 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
02863                 unsigned int pic_id;
02864                 int i;
02865                 Picture *ref = NULL;
02866 
02867                 if(reordering_of_pic_nums_idc==3)
02868                     break;
02869 
02870                 if(index >= h->ref_count[list]){
02871                     av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
02872                     return -1;
02873                 }
02874 
02875                 if(reordering_of_pic_nums_idc<3){
02876                     if(reordering_of_pic_nums_idc<2){
02877                         const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
02878                         int frame_num;
02879 
02880                         if(abs_diff_pic_num > h->max_pic_num){
02881                             av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
02882                             return -1;
02883                         }
02884 
02885                         if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
02886                         else                                pred+= abs_diff_pic_num;
02887                         pred &= h->max_pic_num - 1;
02888 
02889                         frame_num = pic_num_extract(h, pred, &pic_structure);
02890 
02891                         for(i= h->short_ref_count-1; i>=0; i--){
02892                             ref = h->short_ref[i];
02893                             assert(ref->reference);
02894                             assert(!ref->long_ref);
02895                             if(
02896                                    ref->frame_num == frame_num &&
02897                                    (ref->reference & pic_structure)
02898                               )
02899                                 break;
02900                         }
02901                         if(i>=0)
02902                             ref->pic_id= pred;
02903                     }else{
02904                         int long_idx;
02905                         pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
02906 
02907                         long_idx= pic_num_extract(h, pic_id, &pic_structure);
02908 
02909                         if(long_idx>31){
02910                             av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
02911                             return -1;
02912                         }
02913                         ref = h->long_ref[long_idx];
02914                         assert(!(ref && !ref->reference));
02915                         if(ref && (ref->reference & pic_structure)){
02916                             ref->pic_id= pic_id;
02917                             assert(ref->long_ref);
02918                             i=0;
02919                         }else{
02920                             i=-1;
02921                         }
02922                     }
02923 
02924                     if (i < 0) {
02925                         av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
02926                         memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
02927                     } else {
02928                         for(i=index; i+1<h->ref_count[list]; i++){
02929                             if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
02930                                 break;
02931                         }
02932                         for(; i > index; i--){
02933                             h->ref_list[list][i]= h->ref_list[list][i-1];
02934                         }
02935                         h->ref_list[list][index]= *ref;
02936                         if (FIELD_PICTURE){
02937                             pic_as_field(&h->ref_list[list][index], pic_structure);
02938                         }
02939                     }
02940                 }else{
02941                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
02942                     return -1;
02943                 }
02944             }
02945         }
02946     }
02947     for(list=0; list<h->list_count; list++){
02948         for(index= 0; index < h->ref_count[list]; index++){
02949             if(!h->ref_list[list][index].data[0]){
02950                 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
02951                 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
02952             }
02953         }
02954     }
02955 
02956     return 0;
02957 }
02958 
02959 static void fill_mbaff_ref_list(H264Context *h){
02960     int list, i, j;
02961     for(list=0; list<2; list++){ //FIXME try list_count
02962         for(i=0; i<h->ref_count[list]; i++){
02963             Picture *frame = &h->ref_list[list][i];
02964             Picture *field = &h->ref_list[list][16+2*i];
02965             field[0] = *frame;
02966             for(j=0; j<3; j++)
02967                 field[0].linesize[j] <<= 1;
02968             field[0].reference = PICT_TOP_FIELD;
02969             field[0].poc= field[0].field_poc[0];
02970             field[1] = field[0];
02971             for(j=0; j<3; j++)
02972                 field[1].data[j] += frame->linesize[j];
02973             field[1].reference = PICT_BOTTOM_FIELD;
02974             field[1].poc= field[1].field_poc[1];
02975 
02976             h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
02977             h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
02978             for(j=0; j<2; j++){
02979                 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
02980                 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
02981             }
02982         }
02983     }
02984     for(j=0; j<h->ref_count[1]; j++){
02985         for(i=0; i<h->ref_count[0]; i++)
02986             h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
02987         memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
02988         memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
02989     }
02990 }
02991 
02992 static int pred_weight_table(H264Context *h){
02993     MpegEncContext * const s = &h->s;
02994     int list, i;
02995     int luma_def, chroma_def;
02996 
02997     h->use_weight= 0;
02998     h->use_weight_chroma= 0;
02999     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
03000     h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
03001     luma_def = 1<<h->luma_log2_weight_denom;
03002     chroma_def = 1<<h->chroma_log2_weight_denom;
03003 
03004     for(list=0; list<2; list++){
03005         h->luma_weight_flag[list]   = 0;
03006         h->chroma_weight_flag[list] = 0;
03007         for(i=0; i<h->ref_count[list]; i++){
03008             int luma_weight_flag, chroma_weight_flag;
03009 
03010             luma_weight_flag= get_bits1(&s->gb);
03011             if(luma_weight_flag){
03012                 h->luma_weight[list][i]= get_se_golomb(&s->gb);
03013                 h->luma_offset[list][i]= get_se_golomb(&s->gb);
03014                 if(   h->luma_weight[list][i] != luma_def
03015                    || h->luma_offset[list][i] != 0) {
03016                     h->use_weight= 1;
03017                     h->luma_weight_flag[list]= 1;
03018                 }
03019             }else{
03020                 h->luma_weight[list][i]= luma_def;
03021                 h->luma_offset[list][i]= 0;
03022             }
03023 
03024             if(CHROMA){
03025                 chroma_weight_flag= get_bits1(&s->gb);
03026                 if(chroma_weight_flag){
03027                     int j;
03028                     for(j=0; j<2; j++){
03029                         h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
03030                         h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
03031                         if(   h->chroma_weight[list][i][j] != chroma_def
03032                            || h->chroma_offset[list][i][j] != 0) {
03033                             h->use_weight_chroma= 1;
03034                             h->chroma_weight_flag[list]= 1;
03035                         }
03036                     }
03037                 }else{
03038                     int j;
03039                     for(j=0; j<2; j++){
03040                         h->chroma_weight[list][i][j]= chroma_def;
03041                         h->chroma_offset[list][i][j]= 0;
03042                     }
03043                 }
03044             }
03045         }
03046         if(h->slice_type_nos != FF_B_TYPE) break;
03047     }
03048     h->use_weight= h->use_weight || h->use_weight_chroma;
03049     return 0;
03050 }
03051 
03052 static void implicit_weight_table(H264Context *h){
03053     MpegEncContext * const s = &h->s;
03054     int ref0, ref1, i;
03055     int cur_poc = s->current_picture_ptr->poc;
03056 
03057     for (i = 0; i < 2; i++) {
03058         h->luma_weight_flag[i]   = 0;
03059         h->chroma_weight_flag[i] = 0;
03060     }
03061 
03062     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
03063        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
03064         h->use_weight= 0;
03065         h->use_weight_chroma= 0;
03066         return;
03067     }
03068 
03069     h->use_weight= 2;
03070     h->use_weight_chroma= 2;
03071     h->luma_log2_weight_denom= 5;
03072     h->chroma_log2_weight_denom= 5;
03073 
03074     for(ref0=0; ref0 < h->ref_count[0]; ref0++){
03075         int poc0 = h->ref_list[0][ref0].poc;
03076         for(ref1=0; ref1 < h->ref_count[1]; ref1++){
03077             int poc1 = h->ref_list[1][ref1].poc;
03078             int td = av_clip(poc1 - poc0, -128, 127);
03079             if(td){
03080                 int tb = av_clip(cur_poc - poc0, -128, 127);
03081                 int tx = (16384 + (FFABS(td) >> 1)) / td;
03082                 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
03083                 if(dist_scale_factor < -64 || dist_scale_factor > 128)
03084                     h->implicit_weight[ref0][ref1] = 32;
03085                 else
03086                     h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
03087             }else
03088                 h->implicit_weight[ref0][ref1] = 32;
03089         }
03090     }
03091 }
03092 
03104 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
03105     int i;
03106     if (pic->reference &= refmask) {
03107         return 0;
03108     } else {
03109         for(i = 0; h->delayed_pic[i]; i++)
03110             if(pic == h->delayed_pic[i]){
03111                 pic->reference=DELAYED_PIC_REF;
03112                 break;
03113             }
03114         return 1;
03115     }
03116 }
03117 
03121 static void idr(H264Context *h){
03122     int i;
03123 
03124     for(i=0; i<16; i++){
03125         remove_long(h, i, 0);
03126     }
03127     assert(h->long_ref_count==0);
03128 
03129     for(i=0; i<h->short_ref_count; i++){
03130         unreference_pic(h, h->short_ref[i], 0);
03131         h->short_ref[i]= NULL;
03132     }
03133     h->short_ref_count=0;
03134     h->prev_frame_num= 0;
03135     h->prev_frame_num_offset= 0;
03136     h->prev_poc_msb=
03137     h->prev_poc_lsb= 0;
03138 }
03139 
03140 /* forget old pics after a seek */
03141 static void flush_dpb(AVCodecContext *avctx){
03142     H264Context *h= avctx->priv_data;
03143     int i;
03144     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
03145         if(h->delayed_pic[i])
03146             h->delayed_pic[i]->reference= 0;
03147         h->delayed_pic[i]= NULL;
03148     }
03149     h->outputed_poc= INT_MIN;
03150     idr(h);
03151     if(h->s.current_picture_ptr)
03152         h->s.current_picture_ptr->reference= 0;
03153     h->s.first_field= 0;
03154     reset_sei(h);
03155     ff_mpeg_flush(avctx);
03156 }
03157 
03166 static Picture * find_short(H264Context *h, int frame_num, int *idx){
03167     MpegEncContext * const s = &h->s;
03168     int i;
03169 
03170     for(i=0; i<h->short_ref_count; i++){
03171         Picture *pic= h->short_ref[i];
03172         if(s->avctx->debug&FF_DEBUG_MMCO)
03173             av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
03174         if(pic->frame_num == frame_num) {
03175             *idx = i;
03176             return pic;
03177         }
03178     }
03179     return NULL;
03180 }
03181 
03188 static void remove_short_at_index(H264Context *h, int i){
03189     assert(i >= 0 && i < h->short_ref_count);
03190     h->short_ref[i]= NULL;
03191     if (--h->short_ref_count)
03192         memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
03193 }
03194 
03199 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
03200     MpegEncContext * const s = &h->s;
03201     Picture *pic;
03202     int i;
03203 
03204     if(s->avctx->debug&FF_DEBUG_MMCO)
03205         av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
03206 
03207     pic = find_short(h, frame_num, &i);
03208     if (pic){
03209         if(unreference_pic(h, pic, ref_mask))
03210         remove_short_at_index(h, i);
03211     }
03212 
03213     return pic;
03214 }
03215 
03221 static Picture * remove_long(H264Context *h, int i, int ref_mask){
03222     Picture *pic;
03223 
03224     pic= h->long_ref[i];
03225     if (pic){
03226         if(unreference_pic(h, pic, ref_mask)){
03227             assert(h->long_ref[i]->long_ref == 1);
03228             h->long_ref[i]->long_ref= 0;
03229             h->long_ref[i]= NULL;
03230             h->long_ref_count--;
03231         }
03232     }
03233 
03234     return pic;
03235 }
03236 
03240 static void print_short_term(H264Context *h) {
03241     uint32_t i;
03242     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
03243         av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
03244         for(i=0; i<h->short_ref_count; i++){
03245             Picture *pic= h->short_ref[i];
03246             av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
03247         }
03248     }
03249 }
03250 
03254 static void print_long_term(H264Context *h) {
03255     uint32_t i;
03256     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
03257         av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
03258         for(i = 0; i < 16; i++){
03259             Picture *pic= h->long_ref[i];
03260             if (pic) {
03261                 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
03262             }
03263         }
03264     }
03265 }
03266 
03270 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
03271     MpegEncContext * const s = &h->s;
03272     int i, j;
03273     int current_ref_assigned=0;
03274     Picture *av_uninit(pic);
03275 
03276     if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
03277         av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
03278 
03279     for(i=0; i<mmco_count; i++){
03280         int structure, av_uninit(frame_num);
03281         if(s->avctx->debug&FF_DEBUG_MMCO)
03282             av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
03283 
03284         if(   mmco[i].opcode == MMCO_SHORT2UNUSED
03285            || mmco[i].opcode == MMCO_SHORT2LONG){
03286             frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
03287             pic = find_short(h, frame_num, &j);
03288             if(!pic){
03289                 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
03290                    || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
03291                 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
03292                 continue;
03293             }
03294         }
03295 
03296         switch(mmco[i].opcode){
03297         case MMCO_SHORT2UNUSED:
03298             if(s->avctx->debug&FF_DEBUG_MMCO)
03299                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
03300             remove_short(h, frame_num, structure ^ PICT_FRAME);
03301             break;
03302         case MMCO_SHORT2LONG:
03303                 if (h->long_ref[mmco[i].long_arg] != pic)
03304                     remove_long(h, mmco[i].long_arg, 0);
03305 
03306                 remove_short_at_index(h, j);
03307                 h->long_ref[ mmco[i].long_arg ]= pic;
03308                 if (h->long_ref[ mmco[i].long_arg ]){
03309                     h->long_ref[ mmco[i].long_arg ]->long_ref=1;
03310                     h->long_ref_count++;
03311                 }
03312             break;
03313         case MMCO_LONG2UNUSED:
03314             j = pic_num_extract(h, mmco[i].long_arg, &structure);
03315             pic = h->long_ref[j];
03316             if (pic) {
03317                 remove_long(h, j, structure ^ PICT_FRAME);
03318             } else if(s->avctx->debug&FF_DEBUG_MMCO)
03319                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
03320             break;
03321         case MMCO_LONG:
03322                     // Comment below left from previous code as it is an interresting note.
03323                     /* First field in pair is in short term list or
03324                      * at a different long term index.
03325                      * This is not allowed; see 7.4.3.3, notes 2 and 3.
03326                      * Report the problem and keep the pair where it is,
03327                      * and mark this field valid.
03328                      */
03329 
03330             if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
03331                 remove_long(h, mmco[i].long_arg, 0);
03332 
03333                 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
03334                 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
03335                 h->long_ref_count++;
03336             }
03337 
03338             s->current_picture_ptr->reference |= s->picture_structure;
03339             current_ref_assigned=1;
03340             break;
03341         case MMCO_SET_MAX_LONG:
03342             assert(mmco[i].long_arg <= 16);
03343             // just remove the long term which index is greater than new max
03344             for(j = mmco[i].long_arg; j<16; j++){
03345                 remove_long(h, j, 0);
03346             }
03347             break;
03348         case MMCO_RESET:
03349             while(h->short_ref_count){
03350                 remove_short(h, h->short_ref[0]->frame_num, 0);
03351             }
03352             for(j = 0; j < 16; j++) {
03353                 remove_long(h, j, 0);
03354             }
03355             s->current_picture_ptr->poc=
03356             s->current_picture_ptr->field_poc[0]=
03357             s->current_picture_ptr->field_poc[1]=
03358             h->poc_lsb=
03359             h->poc_msb=
03360             h->frame_num=
03361             s->current_picture_ptr->frame_num= 0;
03362             break;
03363         default: assert(0);
03364         }
03365     }
03366 
03367     if (!current_ref_assigned) {
03368         /* Second field of complementary field pair; the first field of
03369          * which is already referenced. If short referenced, it
03370          * should be first entry in short_ref. If not, it must exist
03371          * in long_ref; trying to put it on the short list here is an
03372          * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
03373          */
03374         if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
03375             /* Just mark the second field valid */
03376             s->current_picture_ptr->reference = PICT_FRAME;
03377         } else if (s->current_picture_ptr->long_ref) {
03378             av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
03379                                              "assignment for second field "
03380                                              "in complementary field pair "
03381                                              "(first field is long term)\n");
03382         } else {
03383             pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
03384             if(pic){
03385                 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
03386             }
03387 
03388             if(h->short_ref_count)
03389                 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
03390 
03391             h->short_ref[0]= s->current_picture_ptr;
03392             h->short_ref_count++;
03393             s->current_picture_ptr->reference |= s->picture_structure;
03394         }
03395     }
03396 
03397     if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
03398 
03399         /* We have too many reference frames, probably due to corrupted
03400          * stream. Need to discard one frame. Prevents overrun of the
03401          * short_ref and long_ref buffers.
03402          */
03403         av_log(h->s.avctx, AV_LOG_ERROR,
03404                "number of reference frames exceeds max (probably "
03405                "corrupt input), discarding one\n");
03406 
03407         if (h->long_ref_count && !h->short_ref_count) {
03408             for (i = 0; i < 16; ++i)
03409                 if (h->long_ref[i])
03410                     break;
03411 
03412             assert(i < 16);
03413             remove_long(h, i, 0);
03414         } else {
03415             pic = h->short_ref[h->short_ref_count - 1];
03416             remove_short(h, pic->frame_num, 0);
03417         }
03418     }
03419 
03420     print_short_term(h);
03421     print_long_term(h);
03422     return 0;
03423 }
03424 
03425 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
03426     MpegEncContext * const s = &h->s;
03427     int i;
03428 
03429     h->mmco_index= 0;
03430     if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
03431         s->broken_link= get_bits1(gb) -1;
03432         if(get_bits1(gb)){
03433             h->mmco[0].opcode= MMCO_LONG;
03434             h->mmco[0].long_arg= 0;
03435             h->mmco_index= 1;
03436         }
03437     }else{
03438         if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
03439             for(i= 0; i<MAX_MMCO_COUNT; i++) {
03440                 MMCOOpcode opcode= get_ue_golomb_31(gb);
03441 
03442                 h->mmco[i].opcode= opcode;
03443                 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
03444                     h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
03445 /*                    if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
03446                         av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
03447                         return -1;
03448                     }*/
03449                 }
03450                 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
03451                     unsigned int long_arg= get_ue_golomb_31(gb);
03452                     if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
03453                         av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
03454                         return -1;
03455                     }
03456                     h->mmco[i].long_arg= long_arg;
03457                 }
03458 
03459                 if(opcode > (unsigned)MMCO_LONG){
03460                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
03461                     return -1;
03462                 }
03463                 if(opcode == MMCO_END)
03464                     break;
03465             }
03466             h->mmco_index= i;
03467         }else{
03468             assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
03469 
03470             if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
03471                     !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
03472                 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
03473                 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
03474                 h->mmco_index= 1;
03475                 if (FIELD_PICTURE) {
03476                     h->mmco[0].short_pic_num *= 2;
03477                     h->mmco[1].opcode= MMCO_SHORT2UNUSED;
03478                     h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
03479                     h->mmco_index= 2;
03480                 }
03481             }
03482         }
03483     }
03484 
03485     return 0;
03486 }
03487 
03488 static int init_poc(H264Context *h){
03489     MpegEncContext * const s = &h->s;
03490     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
03491     int field_poc[2];
03492     Picture *cur = s->current_picture_ptr;
03493 
03494     h->frame_num_offset= h->prev_frame_num_offset;
03495     if(h->frame_num < h->prev_frame_num)
03496         h->frame_num_offset += max_frame_num;
03497 
03498     if(h->sps.poc_type==0){
03499         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
03500 
03501         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
03502             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
03503         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
03504             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
03505         else
03506             h->poc_msb = h->prev_poc_msb;
03507 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
03508         field_poc[0] =
03509         field_poc[1] = h->poc_msb + h->poc_lsb;
03510         if(s->picture_structure == PICT_FRAME)
03511             field_poc[1] += h->delta_poc_bottom;
03512     }else if(h->sps.poc_type==1){
03513         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
03514         int i;
03515 
03516         if(h->sps.poc_cycle_length != 0)
03517             abs_frame_num = h->frame_num_offset + h->frame_num;
03518         else
03519             abs_frame_num = 0;
03520 
03521         if(h->nal_ref_idc==0 && abs_frame_num > 0)
03522             abs_frame_num--;
03523 
03524         expected_delta_per_poc_cycle = 0;
03525         for(i=0; i < h->sps.poc_cycle_length; i++)
03526             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
03527 
03528         if(abs_frame_num > 0){
03529             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
03530             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
03531 
03532             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
03533             for(i = 0; i <= frame_num_in_poc_cycle; i++)
03534                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
03535         } else
03536             expectedpoc = 0;
03537 
03538         if(h->nal_ref_idc == 0)
03539             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
03540 
03541         field_poc[0] = expectedpoc + h->delta_poc[0];
03542         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
03543 
03544         if(s->picture_structure == PICT_FRAME)
03545             field_poc[1] += h->delta_poc[1];
03546     }else{
03547         int poc= 2*(h->frame_num_offset + h->frame_num);
03548 
03549         if(!h->nal_ref_idc)
03550             poc--;
03551 
03552         field_poc[0]= poc;
03553         field_poc[1]= poc;
03554     }
03555 
03556     if(s->picture_structure != PICT_BOTTOM_FIELD)
03557         s->current_picture_ptr->field_poc[0]= field_poc[0];
03558     if(s->picture_structure != PICT_TOP_FIELD)
03559         s->current_picture_ptr->field_poc[1]= field_poc[1];
03560     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
03561 
03562     return 0;
03563 }
03564 
03565 
03569 static void init_scan_tables(H264Context *h){
03570     MpegEncContext * const s = &h->s;
03571     int i;
03572     if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
03573         memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
03574         memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
03575     }else{
03576         for(i=0; i<16; i++){
03577 #define T(x) (x>>2) | ((x<<2) & 0xF)
03578             h->zigzag_scan[i] = T(zigzag_scan[i]);
03579             h-> field_scan[i] = T( field_scan[i]);
03580 #undef T
03581         }
03582     }
03583     if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
03584         memcpy(h->zigzag_scan8x8,       ff_zigzag_direct,     64*sizeof(uint8_t));
03585         memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
03586         memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
03587         memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
03588     }else{
03589         for(i=0; i<64; i++){
03590 #define T(x) (x>>3) | ((x&7)<<3)
03591             h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
03592             h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
03593             h->field_scan8x8[i]        = T(field_scan8x8[i]);
03594             h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
03595 #undef T
03596         }
03597     }
03598     if(h->sps.transform_bypass){ //FIXME same ugly
03599         h->zigzag_scan_q0          = zigzag_scan;
03600         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
03601         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
03602         h->field_scan_q0           = field_scan;
03603         h->field_scan8x8_q0        = field_scan8x8;
03604         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
03605     }else{
03606         h->zigzag_scan_q0          = h->zigzag_scan;
03607         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
03608         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
03609         h->field_scan_q0           = h->field_scan;
03610         h->field_scan8x8_q0        = h->field_scan8x8;
03611         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
03612     }
03613 }
03614 
03618 static void clone_slice(H264Context *dst, H264Context *src)
03619 {
03620     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
03621     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
03622     dst->s.current_picture      = src->s.current_picture;
03623     dst->s.linesize             = src->s.linesize;
03624     dst->s.uvlinesize           = src->s.uvlinesize;
03625     dst->s.first_field          = src->s.first_field;
03626 
03627     dst->prev_poc_msb           = src->prev_poc_msb;
03628     dst->prev_poc_lsb           = src->prev_poc_lsb;
03629     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
03630     dst->prev_frame_num         = src->prev_frame_num;
03631     dst->short_ref_count        = src->short_ref_count;
03632 
03633     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
03634     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
03635     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
03636     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
03637 
03638     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
03639     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
03640 }
03641 
03651 static int decode_slice_header(H264Context *h, H264Context *h0){
03652     MpegEncContext * const s = &h->s;
03653     MpegEncContext * const s0 = &h0->s;
03654     unsigned int first_mb_in_slice;
03655     unsigned int pps_id;
03656     int num_ref_idx_active_override_flag;
03657     unsigned int slice_type, tmp, i, j;
03658     int default_ref_list_done = 0;
03659     int last_pic_structure;
03660 
03661     s->dropable= h->nal_ref_idc == 0;
03662 
03663     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
03664         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
03665         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
03666     }else{
03667         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
03668         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
03669     }
03670 
03671     first_mb_in_slice= get_ue_golomb(&s->gb);
03672 
03673     if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
03674         h0->current_slice = 0;
03675         if (!s0->first_field)
03676             s->current_picture_ptr= NULL;
03677     }
03678 
03679     slice_type= get_ue_golomb_31(&s->gb);
03680     if(slice_type > 9){
03681         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
03682         return -1;
03683     }
03684     if(slice_type > 4){
03685         slice_type -= 5;
03686         h->slice_type_fixed=1;
03687     }else
03688         h->slice_type_fixed=0;
03689 
03690     slice_type= golomb_to_pict_type[ slice_type ];
03691     if (slice_type == FF_I_TYPE
03692         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
03693         default_ref_list_done = 1;
03694     }
03695     h->slice_type= slice_type;
03696     h->slice_type_nos= slice_type & 3;
03697 
03698     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
03699     if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
03700         av_log(h->s.avctx, AV_LOG_ERROR,
03701                "B picture before any references, skipping\n");
03702         return -1;
03703     }
03704 
03705     pps_id= get_ue_golomb(&s->gb);
03706     if(pps_id>=MAX_PPS_COUNT){
03707         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
03708         return -1;
03709     }
03710     if(!h0->pps_buffers[pps_id]) {
03711         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
03712         return -1;
03713     }
03714     h->pps= *h0->pps_buffers[pps_id];
03715 
03716     if(!h0->sps_buffers[h->pps.sps_id]) {
03717         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
03718         return -1;
03719     }
03720     h->sps = *h0->sps_buffers[h->pps.sps_id];
03721 
03722     if(h == h0 && h->dequant_coeff_pps != pps_id){
03723         h->dequant_coeff_pps = pps_id;
03724         init_dequant_tables(h);
03725     }
03726 
03727     s->mb_width= h->sps.mb_width;
03728     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
03729 
03730     h->b_stride=  s->mb_width*4;
03731     h->b8_stride= s->mb_width*2;
03732 
03733     s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
03734     if(h->sps.frame_mbs_only_flag)
03735         s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
03736     else
03737         s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
03738 
03739     if (s->context_initialized
03740         && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
03741         if(h != h0)
03742             return -1;   // width / height changed during parallelized decoding
03743         free_tables(h);
03744         flush_dpb(s->avctx);
03745         MPV_common_end(s);
03746     }
03747     if (!s->context_initialized) {
03748         if(h != h0)
03749             return -1;  // we cant (re-)initialize context during parallel decoding
03750         if (MPV_common_init(s) < 0)
03751             return -1;
03752         s->first_field = 0;
03753 
03754         init_scan_tables(h);
03755         alloc_tables(h);
03756 
03757         for(i = 1; i < s->avctx->thread_count; i++) {
03758             H264Context *c;
03759             c = h->thread_context[i] = av_malloc(sizeof(H264Context));
03760             memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
03761             memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
03762             c->sps = h->sps;
03763             c->pps = h->pps;
03764             init_scan_tables(c);
03765             clone_tables(c, h);
03766         }
03767 
03768         for(i = 0; i < s->avctx->thread_count; i++)
03769             if(context_init(h->thread_context[i]) < 0)
03770                 return -1;
03771 
03772         s->avctx->width = s->width;
03773         s->avctx->height = s->height;
03774         s->avctx->sample_aspect_ratio= h->sps.sar;
03775         if(!s->avctx->sample_aspect_ratio.den)
03776             s->avctx->sample_aspect_ratio.den = 1;
03777 
03778         if(h->sps.timing_info_present_flag){
03779             s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
03780             if(h->x264_build > 0 && h->x264_build < 44)
03781                 s->avctx->time_base.den *= 2;
03782             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
03783                       s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
03784         }
03785     }
03786 
03787     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
03788 
03789     h->mb_mbaff = 0;
03790     h->mb_aff_frame = 0;
03791     last_pic_structure = s0->picture_structure;
03792     if(h->sps.frame_mbs_only_flag){
03793         s->picture_structure= PICT_FRAME;
03794     }else{
03795         if(get_bits1(&s->gb)) { //field_pic_flag
03796             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
03797         } else {
03798             s->picture_structure= PICT_FRAME;
03799             h->mb_aff_frame = h->sps.mb_aff;
03800         }
03801     }
03802     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
03803 
03804     if(h0->current_slice == 0){
03805         while(h->frame_num !=  h->prev_frame_num &&
03806               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
03807             av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
03808             if (frame_start(h) < 0)
03809                 return -1;
03810             h->prev_frame_num++;
03811             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
03812             s->current_picture_ptr->frame_num= h->prev_frame_num;
03813             execute_ref_pic_marking(h, NULL, 0);
03814         }
03815 
03816         /* See if we have a decoded first field looking for a pair... */
03817         if (s0->first_field) {
03818             assert(s0->current_picture_ptr);
03819             assert(s0->current_picture_ptr->data[0]);
03820             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
03821 
03822             /* figure out if we have a complementary field pair */
03823             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
03824                 /*
03825                  * Previous field is unmatched. Don't display it, but let it
03826                  * remain for reference if marked as such.
03827                  */
03828                 s0->current_picture_ptr = NULL;
03829                 s0->first_field = FIELD_PICTURE;
03830 
03831             } else {
03832                 if (h->nal_ref_idc &&
03833                         s0->current_picture_ptr->reference &&
03834                         s0->current_picture_ptr->frame_num != h->frame_num) {
03835                     /*
03836                      * This and previous field were reference, but had
03837                      * different frame_nums. Consider this field first in
03838                      * pair. Throw away previous field except for reference
03839                      * purposes.
03840                      */
03841                     s0->first_field = 1;
03842                     s0->current_picture_ptr = NULL;
03843 
03844                 } else {
03845                     /* Second field in complementary pair */
03846                     s0->first_field = 0;
03847                 }
03848             }
03849 
03850         } else {
03851             /* Frame or first field in a potentially complementary pair */
03852             assert(!s0->current_picture_ptr);
03853             s0->first_field = FIELD_PICTURE;
03854         }
03855 
03856         if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
03857             s0->first_field = 0;
03858             return -1;
03859         }
03860     }
03861     if(h != h0)
03862         clone_slice(h, h0);
03863 
03864     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
03865 
03866     assert(s->mb_num == s->mb_width * s->mb_height);
03867     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
03868        first_mb_in_slice                    >= s->mb_num){
03869         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
03870         return -1;
03871     }
03872     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
03873     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
03874     if (s->picture_structure == PICT_BOTTOM_FIELD)
03875         s->resync_mb_y = s->mb_y = s->mb_y + 1;
03876     assert(s->mb_y < s->mb_height);
03877 
03878     if(s->picture_structure==PICT_FRAME){
03879         h->curr_pic_num=   h->frame_num;
03880         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
03881     }else{
03882         h->curr_pic_num= 2*h->frame_num + 1;
03883         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
03884     }
03885 
03886     if(h->nal_unit_type == NAL_IDR_SLICE){
03887         get_ue_golomb(&s->gb); /* idr_pic_id */
03888     }
03889 
03890     if(h->sps.poc_type==0){
03891         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
03892 
03893         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
03894             h->delta_poc_bottom= get_se_golomb(&s->gb);
03895         }
03896     }
03897 
03898     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
03899         h->delta_poc[0]= get_se_golomb(&s->gb);
03900 
03901         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
03902             h->delta_poc[1]= get_se_golomb(&s->gb);
03903     }
03904 
03905     init_poc(h);
03906 
03907     if(h->pps.redundant_pic_cnt_present){
03908         h->redundant_pic_count= get_ue_golomb(&s->gb);
03909     }
03910 
03911     //set defaults, might be overridden a few lines later
03912     h->ref_count[0]= h->pps.ref_count[0];
03913     h->ref_count[1]= h->pps.ref_count[1];
03914 
03915     if(h->slice_type_nos != FF_I_TYPE){
03916         if(h->slice_type_nos == FF_B_TYPE){
03917             h->direct_spatial_mv_pred= get_bits1(&s->gb);
03918         }
03919         num_ref_idx_active_override_flag= get_bits1(&s->gb);
03920 
03921         if(num_ref_idx_active_override_flag){
03922             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
03923             if(h->slice_type_nos==FF_B_TYPE)
03924                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
03925 
03926             if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
03927                 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
03928                 h->ref_count[0]= h->ref_count[1]= 1;
03929                 return -1;
03930             }
03931         }
03932         if(h->slice_type_nos == FF_B_TYPE)
03933             h->list_count= 2;
03934         else
03935             h->list_count= 1;
03936     }else
03937         h->list_count= 0;
03938 
03939     if(!default_ref_list_done){
03940         fill_default_ref_list(h);
03941     }
03942 
03943     if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
03944         return -1;
03945 
03946     if(h->slice_type_nos!=FF_I_TYPE){
03947         s->last_picture_ptr= &h->ref_list[0][0];
03948         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
03949     }
03950     if(h->slice_type_nos==FF_B_TYPE){
03951         s->next_picture_ptr= &h->ref_list[1][0];
03952         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
03953     }
03954 
03955     if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
03956        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
03957         pred_weight_table(h);
03958     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
03959         implicit_weight_table(h);
03960     else {
03961         h->use_weight = 0;
03962         for (i = 0; i < 2; i++) {
03963             h->luma_weight_flag[i]   = 0;
03964             h->chroma_weight_flag[i] = 0;
03965         }
03966     }
03967 
03968     if(h->nal_ref_idc)
03969         decode_ref_pic_marking(h0, &s->gb);
03970 
03971     if(FRAME_MBAFF)
03972         fill_mbaff_ref_list(h);
03973 
03974     if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
03975         direct_dist_scale_factor(h);
03976     direct_ref_list_init(h);
03977 
03978     if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
03979         tmp = get_ue_golomb_31(&s->gb);
03980         if(tmp > 2){
03981             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
03982             return -1;
03983         }
03984         h->cabac_init_idc= tmp;
03985     }
03986 
03987     h->last_qscale_diff = 0;
03988     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
03989     if(tmp>51){
03990         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
03991         return -1;
03992     }
03993     s->qscale= tmp;
03994     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03995     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03996     //FIXME qscale / qp ... stuff
03997     if(h->slice_type == FF_SP_TYPE){
03998         get_bits1(&s->gb); /* sp_for_switch_flag */
03999     }
04000     if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
04001         get_se_golomb(&s->gb); /* slice_qs_delta */
04002     }
04003 
04004     h->deblocking_filter = 1;
04005     h->slice_alpha_c0_offset = 0;
04006     h->slice_beta_offset = 0;
04007     if( h->pps.deblocking_filter_parameters_present ) {
04008         tmp= get_ue_golomb_31(&s->gb);
04009         if(tmp > 2){
04010             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
04011             return -1;
04012         }
04013         h->deblocking_filter= tmp;
04014         if(h->deblocking_filter < 2)
04015             h->deblocking_filter^= 1; // 1<->0
04016 
04017         if( h->deblocking_filter ) {
04018             h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
04019             h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
04020         }
04021     }
04022 
04023     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
04024        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
04025        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == FF_B_TYPE)
04026        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
04027         h->deblocking_filter= 0;
04028 
04029     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
04030         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
04031             /* Cheat slightly for speed:
04032                Do not bother to deblock across slices. */
04033             h->deblocking_filter = 2;
04034         } else {
04035             h0->max_contexts = 1;
04036             if(!h0->single_decode_warning) {
04037                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
04038                 h0->single_decode_warning = 1;
04039             }
04040             if(h != h0)
04041                 return 1; // deblocking switched inside frame
04042         }
04043     }
04044 
04045 #if 0 //FMO
04046     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
04047         slice_group_change_cycle= get_bits(&s->gb, ?);
04048 #endif
04049 
04050     h0->last_slice_type = slice_type;
04051     h->slice_num = ++h0->current_slice;
04052     if(h->slice_num >= MAX_SLICES){
04053         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
04054     }
04055 
04056     for(j=0; j<2; j++){
04057         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
04058         ref2frm[0]=
04059         ref2frm[1]= -1;
04060         for(i=0; i<16; i++)
04061             ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
04062                           +(h->ref_list[j][i].reference&3);
04063         ref2frm[18+0]=
04064         ref2frm[18+1]= -1;
04065         for(i=16; i<48; i++)
04066             ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
04067                           +(h->ref_list[j][i].reference&3);
04068     }
04069 
04070     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
04071     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
04072 
04073     s->avctx->refs= h->sps.ref_frame_count;
04074 
04075     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
04076         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
04077                h->slice_num,
04078                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
04079                first_mb_in_slice,
04080                av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
04081                pps_id, h->frame_num,
04082                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
04083                h->ref_count[0], h->ref_count[1],
04084                s->qscale,
04085                h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
04086                h->use_weight,
04087                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
04088                h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
04089                );
04090     }
04091 
04092     return 0;
04093 }
04094 
04098 static inline int get_level_prefix(GetBitContext *gb){
04099     unsigned int buf;
04100     int log;
04101 
04102     OPEN_READER(re, gb);
04103     UPDATE_CACHE(re, gb);
04104     buf=GET_CACHE(re, gb);
04105 
04106     log= 32 - av_log2(buf);
04107 #ifdef TRACE
04108     print_bin(buf>>(32-log), log);
04109     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
04110 #endif
04111 
04112     LAST_SKIP_BITS(re, gb, log);
04113     CLOSE_READER(re, gb);
04114 
04115     return log-1;
04116 }
04117 
04118 static inline int get_dct8x8_allowed(H264Context *h){
04119     if(h->sps.direct_8x8_inference_flag)
04120         return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
04121     else
04122         return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
04123 }
04124 
04132 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
04133     MpegEncContext * const s = &h->s;
04134     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
04135     int level[16];
04136     int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
04137 
04138     //FIXME put trailing_onex into the context
04139 
04140     if(n == CHROMA_DC_BLOCK_INDEX){
04141         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
04142         total_coeff= coeff_token>>2;
04143     }else{
04144         if(n == LUMA_DC_BLOCK_INDEX){
04145             total_coeff= pred_non_zero_count(h, 0);
04146             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
04147             total_coeff= coeff_token>>2;
04148         }else{
04149             total_coeff= pred_non_zero_count(h, n);
04150             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
04151             total_coeff= coeff_token>>2;
04152             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
04153         }
04154     }
04155 
04156     //FIXME set last_non_zero?
04157 
04158     if(total_coeff==0)
04159         return 0;
04160     if(total_coeff > (unsigned)max_coeff) {
04161         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
04162         return -1;
04163     }
04164 
04165     trailing_ones= coeff_token&3;
04166     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
04167     assert(total_coeff<=16);
04168 
04169     i = show_bits(gb, 3);
04170     skip_bits(gb, trailing_ones);
04171     level[0] = 1-((i&4)>>1);
04172     level[1] = 1-((i&2)   );
04173     level[2] = 1-((i&1)<<1);
04174 
04175     if(trailing_ones<total_coeff) {
04176         int mask, prefix;
04177         int suffix_length = total_coeff > 10 && trailing_ones < 3;
04178         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
04179         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
04180 
04181         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
04182         if(level_code >= 100){
04183             prefix= level_code - 100;
04184             if(prefix == LEVEL_TAB_BITS)
04185                 prefix += get_level_prefix(gb);
04186 
04187             //first coefficient has suffix_length equal to 0 or 1
04188             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
04189                 if(suffix_length)
04190                     level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
04191                 else
04192                     level_code= (prefix<<suffix_length); //part
04193             }else if(prefix==14){
04194                 if(suffix_length)
04195                     level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
04196                 else
04197                     level_code= prefix + get_bits(gb, 4); //part
04198             }else{
04199                 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
04200                 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
04201                 if(prefix>=16)
04202                     level_code += (1<<(prefix-3))-4096;
04203             }
04204 
04205             if(trailing_ones < 3) level_code += 2;
04206 
04207             suffix_length = 2;
04208             mask= -(level_code&1);
04209             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
04210         }else{
04211             if(trailing_ones < 3) level_code += (level_code>>31)|1;
04212 
04213             suffix_length = 1;
04214             if(level_code + 3U > 6U)
04215                 suffix_length++;
04216             level[trailing_ones]= level_code;
04217         }
04218 
04219         //remaining coefficients have suffix_length > 0
04220         for(i=trailing_ones+1;i<total_coeff;i++) {
04221             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
04222             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
04223             level_code= cavlc_level_tab[suffix_length][bitsi][0];
04224 
04225             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
04226             if(level_code >= 100){
04227                 prefix= level_code - 100;
04228                 if(prefix == LEVEL_TAB_BITS){
04229                     prefix += get_level_prefix(gb);
04230                 }
04231                 if(prefix<15){
04232                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
04233                 }else{
04234                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
04235                     if(prefix>=16)
04236                         level_code += (1<<(prefix-3))-4096;
04237                 }
04238                 mask= -(level_code&1);
04239                 level_code= (((2+level_code)>>1) ^ mask) - mask;
04240             }
04241             level[i]= level_code;
04242 
04243             if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
04244                 suffix_length++;
04245         }
04246     }
04247 
04248     if(total_coeff == max_coeff)
04249         zeros_left=0;
04250     else{
04251         if(n == CHROMA_DC_BLOCK_INDEX)
04252             zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
04253         else
04254             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
04255     }
04256 
04257     coeff_num = zeros_left + total_coeff - 1;
04258     j = scantable[coeff_num];
04259     if(n > 24){
04260         block[j] = level[0];
04261         for(i=1;i<total_coeff;i++) {
04262             if(zeros_left <= 0)
04263                 run_before = 0;
04264             else if(zeros_left < 7){
04265                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
04266             }else{
04267                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
04268             }
04269             zeros_left -= run_before;
04270             coeff_num -= 1 + run_before;
04271             j= scantable[ coeff_num ];
04272 
04273             block[j]= level[i];
04274         }
04275     }else{
04276         block[j] = (level[0] * qmul[j] + 32)>>6;
04277         for(i=1;i<total_coeff;i++) {
04278             if(zeros_left <= 0)
04279                 run_before = 0;
04280             else if(zeros_left < 7){
04281                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
04282             }else{
04283                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
04284             }
04285             zeros_left -= run_before;
04286             coeff_num -= 1 + run_before;
04287             j= scantable[ coeff_num ];
04288 
04289             block[j]= (level[i] * qmul[j] + 32)>>6;
04290         }
04291     }
04292 
04293     if(zeros_left<0){
04294         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
04295         return -1;
04296     }
04297 
04298     return 0;
04299 }
04300 
04301 static void predict_field_decoding_flag(H264Context *h){
04302     MpegEncContext * const s = &h->s;
04303     const int mb_xy= h->mb_xy;
04304     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
04305                 ? s->current_picture.mb_type[mb_xy-1]
04306                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
04307                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
04308                 : 0;
04309     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
04310 }
04311 
04315 static void decode_mb_skip(H264Context *h){
04316     MpegEncContext * const s = &h->s;
04317     const int mb_xy= h->mb_xy;
04318     int mb_type=0;
04319 
04320     memset(h->non_zero_count[mb_xy], 0, 16);
04321     memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
04322 
04323     if(MB_FIELD)
04324         mb_type|= MB_TYPE_INTERLACED;
04325 
04326     if( h->slice_type_nos == FF_B_TYPE )
04327     {
04328         // just for fill_caches. pred_direct_motion will set the real mb_type
04329         mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
04330 
04331         fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
04332         pred_direct_motion(h, &mb_type);
04333         mb_type|= MB_TYPE_SKIP;
04334     }
04335     else
04336     {
04337         int mx, my;
04338         mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
04339 
04340         fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
04341         pred_pskip_motion(h, &mx, &my);
04342         fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
04343         fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
04344     }
04345 
04346     write_back_motion(h, mb_type);
04347     s->current_picture.mb_type[mb_xy]= mb_type;
04348     s->current_picture.qscale_table[mb_xy]= s->qscale;
04349     h->slice_table[ mb_xy ]= h->slice_num;
04350     h->prev_mb_skipped= 1;
04351 }
04352 
04357 static int decode_mb_cavlc(H264Context *h){
04358     MpegEncContext * const s = &h->s;
04359     int mb_xy;
04360     int partition_count;
04361     unsigned int mb_type, cbp;
04362     int dct8x8_allowed= h->pps.transform_8x8_mode;
04363 
04364     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
04365 
04366     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
04367     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
04368                 down the code */
04369     if(h->slice_type_nos != FF_I_TYPE){
04370         if(s->mb_skip_run==-1)
04371             s->mb_skip_run= get_ue_golomb(&s->gb);
04372 
04373         if (s->mb_skip_run--) {
04374             if(FRAME_MBAFF && (s->mb_y&1) == 0){
04375                 if(s->mb_skip_run==0)
04376                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
04377                 else
04378                     predict_field_decoding_flag(h);
04379             }
04380             decode_mb_skip(h);
04381             return 0;
04382         }
04383     }
04384     if(FRAME_MBAFF){
04385         if( (s->mb_y&1) == 0 )
04386             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
04387     }
04388 
04389     h->prev_mb_skipped= 0;
04390 
04391     mb_type= get_ue_golomb(&s->gb);
04392     if(h->slice_type_nos == FF_B_TYPE){
04393         if(mb_type < 23){
04394             partition_count= b_mb_type_info[mb_type].partition_count;
04395             mb_type=         b_mb_type_info[mb_type].type;
04396         }else{
04397             mb_type -= 23;
04398             goto decode_intra_mb;
04399         }
04400     }else if(h->slice_type_nos == FF_P_TYPE){
04401         if(mb_type < 5){
04402             partition_count= p_mb_type_info[mb_type].partition_count;
04403             mb_type=         p_mb_type_info[mb_type].type;
04404         }else{
04405             mb_type -= 5;
04406             goto decode_intra_mb;
04407         }
04408     }else{
04409        assert(h->slice_type_nos == FF_I_TYPE);
04410         if(h->slice_type == FF_SI_TYPE && mb_type)
04411             mb_type--;
04412 decode_intra_mb:
04413         if(mb_type > 25){
04414             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
04415             return -1;
04416         }
04417         partition_count=0;
04418         cbp= i_mb_type_info[mb_type].cbp;
04419         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
04420         mb_type= i_mb_type_info[mb_type].type;
04421     }
04422 
04423     if(MB_FIELD)
04424         mb_type |= MB_TYPE_INTERLACED;
04425 
04426     h->slice_table[ mb_xy ]= h->slice_num;
04427 
04428     if(IS_INTRA_PCM(mb_type)){
04429         unsigned int x;
04430 
04431         // We assume these blocks are very rare so we do not optimize it.
04432         align_get_bits(&s->gb);
04433 
04434         // The pixels are stored in the same order as levels in h->mb array.
04435         for(x=0; x < (CHROMA ? 384 : 256); x++){
04436             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
04437         }
04438 
04439         // In deblocking, the quantizer is 0
04440         s->current_picture.qscale_table[mb_xy]= 0;
04441         // All coeffs are present
04442         memset(h->non_zero_count[mb_xy], 16, 16);
04443 
04444         s->current_picture.mb_type[mb_xy]= mb_type;
04445         return 0;
04446     }
04447 
04448     if(MB_MBAFF){
04449         h->ref_count[0] <<= 1;
04450         h->ref_count[1] <<= 1;
04451     }
04452 
04453     fill_caches(h, mb_type, 0);
04454 
04455     //mb_pred
04456     if(IS_INTRA(mb_type)){
04457         int pred_mode;
04458 //            init_top_left_availability(h);
04459         if(IS_INTRA4x4(mb_type)){
04460             int i;
04461             int di = 1;
04462             if(dct8x8_allowed && get_bits1(&s->gb)){
04463                 mb_type |= MB_TYPE_8x8DCT;
04464                 di = 4;
04465             }
04466 
04467 //                fill_intra4x4_pred_table(h);
04468             for(i=0; i<16; i+=di){
04469                 int mode= pred_intra_mode(h, i);
04470 
04471                 if(!get_bits1(&s->gb)){
04472                     const int rem_mode= get_bits(&s->gb, 3);
04473                     mode = rem_mode + (rem_mode >= mode);
04474                 }
04475 
04476                 if(di==4)
04477                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
04478                 else
04479                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
04480             }
04481             write_back_intra_pred_mode(h);
04482             if( check_intra4x4_pred_mode(h) < 0)
04483                 return -1;
04484         }else{
04485             h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
04486             if(h->intra16x16_pred_mode < 0)
04487                 return -1;
04488         }
04489         if(CHROMA){
04490             pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
04491             if(pred_mode < 0)
04492                 return -1;
04493             h->chroma_pred_mode= pred_mode;
04494         }
04495     }else if(partition_count==4){
04496         int i, j, sub_partition_count[4], list, ref[2][4];
04497 
04498         if(h->slice_type_nos == FF_B_TYPE){
04499             for(i=0; i<4; i++){
04500                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
04501                 if(h->sub_mb_type[i] >=13){
04502                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
04503                     return -1;
04504                 }
04505                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
04506                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
04507             }
04508             if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
04509                || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
04510                 pred_direct_motion(h, &mb_type);
04511                 h->ref_cache[0][scan8[4]] =
04512                 h->ref_cache[1][scan8[4]] =
04513                 h->ref_cache[0][scan8[12]] =
04514                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
04515             }
04516         }else{
04517             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
04518             for(i=0; i<4; i++){
04519                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
04520                 if(h->sub_mb_type[i] >=4){
04521                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
04522                     return -1;
04523                 }
04524                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
04525                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
04526             }
04527         }
04528 
04529         for(list=0; list<h->list_count; list++){
04530             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
04531             for(i=0; i<4; i++){
04532                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
04533                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
04534                     unsigned int tmp;
04535                     if(ref_count == 1){
04536                         tmp= 0;
04537                     }else if(ref_count == 2){
04538                         tmp= get_bits1(&s->gb)^1;
04539                     }else{
04540                         tmp= get_ue_golomb_31(&s->gb);
04541                         if(tmp>=ref_count){
04542                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
04543                             return -1;
04544                         }
04545                     }
04546                     ref[list][i]= tmp;
04547                 }else{
04548                  //FIXME
04549                     ref[list][i] = -1;
04550                 }
04551             }
04552         }
04553 
04554         if(dct8x8_allowed)
04555             dct8x8_allowed = get_dct8x8_allowed(h);
04556 
04557         for(list=0; list<h->list_count; list++){
04558             for(i=0; i<4; i++){
04559                 if(IS_DIRECT(h->sub_mb_type[i])) {
04560                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
04561                     continue;
04562                 }
04563                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
04564                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
04565 
04566                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
04567                     const int sub_mb_type= h->sub_mb_type[i];
04568                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
04569                     for(j=0; j<sub_partition_count[i]; j++){
04570                         int mx, my;
04571                         const int index= 4*i + block_width*j;
04572                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
04573                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
04574                         mx += get_se_golomb(&s->gb);
04575                         my += get_se_golomb(&s->gb);
04576                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
04577 
04578                         if(IS_SUB_8X8(sub_mb_type)){
04579                             mv_cache[ 1 ][0]=
04580                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
04581                             mv_cache[ 1 ][1]=
04582                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
04583                         }else if(IS_SUB_8X4(sub_mb_type)){
04584                             mv_cache[ 1 ][0]= mx;
04585                             mv_cache[ 1 ][1]= my;
04586                         }else if(IS_SUB_4X8(sub_mb_type)){
04587                             mv_cache[ 8 ][0]= mx;
04588                             mv_cache[ 8 ][1]= my;
04589                         }
04590                         mv_cache[ 0 ][0]= mx;
04591                         mv_cache[ 0 ][1]= my;
04592                     }
04593                 }else{
04594                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
04595                     p[0] = p[1]=
04596                     p[8] = p[9]= 0;
04597                 }
04598             }
04599         }
04600     }else if(IS_DIRECT(mb_type)){
04601         pred_direct_motion(h, &mb_type);
04602         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
04603     }else{
04604         int list, mx, my, i;
04605          //FIXME we should set ref_idx_l? to 0 if we use that later ...
04606         if(IS_16X16(mb_type)){
04607             for(list=0; list<h->list_count; list++){
04608                     unsigned int val;
04609                     if(IS_DIR(mb_type, 0, list)){
04610                         if(h->ref_count[list]==1){
04611                             val= 0;
04612                         }else if(h->ref_count[list]==2){
04613                             val= get_bits1(&s->gb)^1;
04614                         }else{
04615                             val= get_ue_golomb_31(&s->gb);
04616                             if(val >= h->ref_count[list]){
04617                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
04618                                 return -1;
04619                             }
04620                         }
04621                     }else
04622                         val= LIST_NOT_USED&0xFF;
04623                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
04624             }
04625             for(list=0; list<h->list_count; list++){
04626                 unsigned int val;
04627                 if(IS_DIR(mb_type, 0, list)){
04628                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
04629                     mx += get_se_golomb(&s->gb);
04630                     my += get_se_golomb(&s->gb);
04631                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
04632 
04633                     val= pack16to32(mx,my);
04634                 }else
04635                     val=0;
04636                 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
04637             }
04638         }
04639         else if(IS_16X8(mb_type)){
04640             for(list=0; list<h->list_count; list++){
04641                     for(i=0; i<2; i++){
04642                         unsigned int val;
04643                         if(IS_DIR(mb_type, i, list)){
04644                             if(h->ref_count[list] == 1){
04645                                 val= 0;
04646                             }else if(h->ref_count[list] == 2){
04647                                 val= get_bits1(&s->gb)^1;
04648                             }else{
04649                                 val= get_ue_golomb_31(&s->gb);
04650                                 if(val >= h->ref_count[list]){
04651                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
04652                                     return -1;
04653                                 }
04654                             }
04655                         }else
04656                             val= LIST_NOT_USED&0xFF;
04657                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
04658                     }
04659             }
04660             for(list=0; list<h->list_count; list++){
04661                 for(i=0; i<2; i++){
04662                     unsigned int val;
04663                     if(IS_DIR(mb_type, i, list)){
04664                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
04665                         mx += get_se_golomb(&s->gb);
04666                         my += get_se_golomb(&s->gb);
04667                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
04668 
04669                         val= pack16to32(mx,my);
04670                     }else
04671                         val=0;
04672                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
04673                 }
04674             }
04675         }else{
04676             assert(IS_8X16(mb_type));
04677             for(list=0; list<h->list_count; list++){
04678                     for(i=0; i<2; i++){
04679                         unsigned int val;
04680                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
04681                             if(h->ref_count[list]==1){
04682                                 val= 0;
04683                             }else if(h->ref_count[list]==2){
04684                                 val= get_bits1(&s->gb)^1;
04685                             }else{
04686                                 val= get_ue_golomb_31(&s->gb);
04687                                 if(val >= h->ref_count[list]){
04688                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
04689                                     return -1;
04690                                 }
04691                             }
04692                         }else
04693                             val= LIST_NOT_USED&0xFF;
04694                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
04695                     }
04696             }
04697             for(list=0; list<h->list_count; list++){
04698                 for(i=0; i<2; i++){
04699                     unsigned int val;
04700                     if(IS_DIR(mb_type, i, list)){
04701                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
04702                         mx += get_se_golomb(&s->gb);
04703                         my += get_se_golomb(&s->gb);
04704                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
04705 
04706                         val= pack16to32(mx,my);
04707                     }else
04708                         val=0;
04709                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
04710                 }
04711             }
04712         }
04713     }
04714 
04715     if(IS_INTER(mb_type))
04716         write_back_motion(h, mb_type);
04717 
04718     if(!IS_INTRA16x16(mb_type)){
04719         cbp= get_ue_golomb(&s->gb);
04720         if(cbp > 47){
04721             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
04722             return -1;
04723         }
04724 
04725         if(CHROMA){
04726             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
04727             else                     cbp= golomb_to_inter_cbp   [cbp];
04728         }else{
04729             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
04730             else                     cbp= golomb_to_inter_cbp_gray[cbp];
04731         }
04732     }
04733     h->cbp = cbp;
04734 
04735     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
04736         if(get_bits1(&s->gb)){
04737             mb_type |= MB_TYPE_8x8DCT;
04738             h->cbp_table[mb_xy]= cbp;
04739         }
04740     }
04741     s->current_picture.mb_type[mb_xy]= mb_type;
04742 
04743     if(cbp || IS_INTRA16x16(mb_type)){
04744         int i8x8, i4x4, chroma_idx;
04745         int dquant;
04746         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
04747         const uint8_t *scan, *scan8x8, *dc_scan;
04748 
04749 //        fill_non_zero_count_cache(h);
04750 
04751         if(IS_INTERLACED(mb_type)){
04752             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
04753             scan= s->qscale ? h->field_scan : h->field_scan_q0;
04754             dc_scan= luma_dc_field_scan;
04755         }else{
04756             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
04757             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
04758             dc_scan= luma_dc_zigzag_scan;
04759         }
04760 
04761         dquant= get_se_golomb(&s->gb);
04762 
04763         if( dquant > 25 || dquant < -26 ){
04764             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
04765             return -1;
04766         }
04767 
04768         s->qscale += dquant;
04769         if(((unsigned)s->qscale) > 51){
04770             if(s->qscale<0) s->qscale+= 52;
04771             else            s->qscale-= 52;
04772         }
04773 
04774         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
04775         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
04776         if(IS_INTRA16x16(mb_type)){
04777             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
04778                 return -1; //FIXME continue if partitioned and other return -1 too
04779             }
04780 
04781             assert((cbp&15) == 0 || (cbp&15) == 15);
04782 
04783             if(cbp&15){
04784                 for(i8x8=0; i8x8<4; i8x8++){
04785                     for(i4x4=0; i4x4<4; i4x4++){
04786                         const int index= i4x4 + 4*i8x8;
04787                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
04788                             return -1;
04789                         }
04790                     }
04791                 }
04792             }else{
04793                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
04794             }
04795         }else{
04796             for(i8x8=0; i8x8<4; i8x8++){
04797                 if(cbp & (1<<i8x8)){
04798                     if(IS_8x8DCT(mb_type)){
04799                         DCTELEM *buf = &h->mb[64*i8x8];
04800                         uint8_t *nnz;
04801                         for(i4x4=0; i4x4<4; i4x4++){
04802                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
04803                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
04804                                 return -1;
04805                         }
04806                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
04807                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
04808                     }else{
04809                         for(i4x4=0; i4x4<4; i4x4++){
04810                             const int index= i4x4 + 4*i8x8;
04811 
04812                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
04813                                 return -1;
04814                             }
04815                         }
04816                     }
04817                 }else{
04818                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
04819                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
04820                 }
04821             }
04822         }
04823 
04824         if(cbp&0x30){
04825             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
04826                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
04827                     return -1;
04828                 }
04829         }
04830 
04831         if(cbp&0x20){
04832             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
04833                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
04834                 for(i4x4=0; i4x4<4; i4x4++){
04835                     const int index= 16 + 4*chroma_idx + i4x4;
04836                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
04837                         return -1;
04838                     }
04839                 }
04840             }
04841         }else{
04842             uint8_t * const nnz= &h->non_zero_count_cache[0];
04843             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
04844             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
04845         }
04846     }else{
04847         uint8_t * const nnz= &h->non_zero_count_cache[0];
04848         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
04849         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
04850         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
04851     }
04852     s->current_picture.qscale_table[mb_xy]= s->qscale;
04853     write_back_non_zero_count(h);
04854 
04855     if(MB_MBAFF){
04856         h->ref_count[0] >>= 1;
04857         h->ref_count[1] >>= 1;
04858     }
04859 
04860     return 0;
04861 }
04862 
04863 static int decode_cabac_field_decoding_flag(H264Context *h) {
04864     MpegEncContext * const s = &h->s;
04865     const int mb_x = s->mb_x;
04866     const int mb_y = s->mb_y & ~1;
04867     const int mba_xy = mb_x - 1 +  mb_y   *s->mb_stride;
04868     const int mbb_xy = mb_x     + (mb_y-2)*s->mb_stride;
04869 
04870     unsigned int ctx = 0;
04871 
04872     if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
04873         ctx += 1;
04874     }
04875     if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
04876         ctx += 1;
04877     }
04878 
04879     return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
04880 }
04881 
04882 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
04883     uint8_t *state= &h->cabac_state[ctx_base];
04884     int mb_type;
04885 
04886     if(intra_slice){
04887         MpegEncContext * const s = &h->s;
04888         const int mba_xy = h->left_mb_xy[0];
04889         const int mbb_xy = h->top_mb_xy;
04890         int ctx=0;
04891         if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
04892             ctx++;
04893         if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
04894             ctx++;
04895         if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
04896             return 0;   /* I4x4 */
04897         state += 2;
04898     }else{
04899         if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
04900             return 0;   /* I4x4 */
04901     }
04902 
04903     if( get_cabac_terminate( &h->cabac ) )
04904         return 25;  /* PCM */
04905 
04906     mb_type = 1; /* I16x16 */
04907     mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
04908     if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
04909         mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
04910     mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
04911     mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
04912     return mb_type;
04913 }
04914 
04915 static int decode_cabac_mb_type_b( H264Context *h ) {
04916     MpegEncContext * const s = &h->s;
04917 
04918         const int mba_xy = h->left_mb_xy[0];
04919         const int mbb_xy = h->top_mb_xy;
04920         int ctx = 0;
04921         int bits;
04922         assert(h->slice_type_nos == FF_B_TYPE);
04923 
04924         if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
04925             ctx++;
04926         if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
04927             ctx++;
04928 
04929         if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
04930             return 0; /* B_Direct_16x16 */
04931 
04932         if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
04933             return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
04934         }
04935 
04936         bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
04937         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
04938         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
04939         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
04940         if( bits < 8 )
04941             return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
04942         else if( bits == 13 ) {
04943             return decode_cabac_intra_mb_type(h, 32, 0) + 23;
04944         } else if( bits == 14 )
04945             return 11; /* B_L1_L0_8x16 */
04946         else if( bits == 15 )
04947             return 22; /* B_8x8 */
04948 
04949         bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
04950         return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
04951 }
04952 
04953 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
04954     MpegEncContext * const s = &h->s;
04955     int mba_xy, mbb_xy;
04956     int ctx = 0;
04957 
04958     if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
04959         int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
04960         mba_xy = mb_xy - 1;
04961         if( (mb_y&1)
04962             && h->slice_table[mba_xy] == h->slice_num
04963             && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
04964             mba_xy += s->mb_stride;
04965         if( MB_FIELD ){
04966             mbb_xy = mb_xy - s->mb_stride;
04967             if( !(mb_y&1)
04968                 && h->slice_table[mbb_xy] == h->slice_num
04969                 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
04970                 mbb_xy -= s->mb_stride;
04971         }else
04972             mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
04973     }else{
04974         int mb_xy = h->mb_xy;
04975         mba_xy = mb_xy - 1;
04976         mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
04977     }
04978 
04979     if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
04980         ctx++;
04981     if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
04982         ctx++;
04983 
04984     if( h->slice_type_nos == FF_B_TYPE )
04985         ctx += 13;
04986     return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
04987 }
04988 
04989 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
04990     int mode = 0;
04991 
04992     if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
04993         return pred_mode;
04994 
04995     mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
04996     mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
04997     mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
04998 
04999     if( mode >= pred_mode )
05000         return mode + 1;
05001     else
05002         return mode;
05003 }
05004 
05005 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
05006     const int mba_xy = h->left_mb_xy[0];
05007     const int mbb_xy = h->top_mb_xy;
05008 
05009     int ctx = 0;
05010 
05011     /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
05012     if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
05013         ctx++;
05014 
05015     if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
05016         ctx++;
05017 
05018     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
05019         return 0;
05020 
05021     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
05022         return 1;
05023     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
05024         return 2;
05025     else
05026         return 3;
05027 }
05028 
05029 static int decode_cabac_mb_cbp_luma( H264Context *h) {
05030     int cbp_b, cbp_a, ctx, cbp = 0;
05031 
05032     cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
05033     cbp_b = h->slice_table[h->top_mb_xy]     == h->slice_num ? h->top_cbp  : -1;
05034 
05035     ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
05036     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
05037     ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
05038     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
05039     ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
05040     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
05041     ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
05042     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
05043     return cbp;
05044 }
05045 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
05046     int ctx;
05047     int cbp_a, cbp_b;
05048 
05049     cbp_a = (h->left_cbp>>4)&0x03;
05050     cbp_b = (h-> top_cbp>>4)&0x03;
05051 
05052     ctx = 0;
05053     if( cbp_a > 0 ) ctx++;
05054     if( cbp_b > 0 ) ctx += 2;
05055     if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
05056         return 0;
05057 
05058     ctx = 4;
05059     if( cbp_a == 2 ) ctx++;
05060     if( cbp_b == 2 ) ctx += 2;
05061     return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
05062 }
05063 static int decode_cabac_mb_dqp( H264Context *h) {
05064     int   ctx= h->last_qscale_diff != 0;
05065     int   val = 0;
05066 
05067     while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
05068         ctx= 2+(ctx>>1);
05069         val++;
05070         if(val > 102) //prevent infinite loop
05071             return INT_MIN;
05072     }
05073 
05074     if( val&0x01 )
05075         return   (val + 1)>>1 ;
05076     else
05077         return -((val + 1)>>1);
05078 }
05079 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
05080     if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
05081         return 0;   /* 8x8 */
05082     if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
05083         return 1;   /* 8x4 */
05084     if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
05085         return 2;   /* 4x8 */
05086     return 3;       /* 4x4 */
05087 }
05088 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
05089     int type;
05090     if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
05091         return 0;   /* B_Direct_8x8 */
05092     if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
05093         return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
05094     type = 3;
05095     if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
05096         if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
05097             return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
05098         type += 4;
05099     }
05100     type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
05101     type +=   get_cabac( &h->cabac, &h->cabac_state[39] );
05102     return type;
05103 }
05104 
05105 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
05106     return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
05107 }
05108 
05109 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
05110     int refa = h->ref_cache[list][scan8[n] - 1];
05111     int refb = h->ref_cache[list][scan8[n] - 8];
05112     int ref  = 0;
05113     int ctx  = 0;
05114 
05115     if( h->slice_type_nos == FF_B_TYPE) {
05116         if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
05117             ctx++;
05118         if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
05119             ctx += 2;
05120     } else {
05121         if( refa > 0 )
05122             ctx++;
05123         if( refb > 0 )
05124             ctx += 2;
05125     }
05126 
05127     while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
05128         ref++;
05129         ctx = (ctx>>2)+4;
05130         if(ref >= 32 /*h->ref_list[list]*/){
05131             return -1;
05132         }
05133     }
05134     return ref;
05135 }
05136 
05137 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
05138     int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
05139                abs( h->mvd_cache[list][scan8[n] - 8][l] );
05140     int ctxbase = (l == 0) ? 40 : 47;
05141     int mvd;
05142     int ctx = (amvd>2) + (amvd>32);
05143 
05144     if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
05145         return 0;
05146 
05147     mvd= 1;
05148     ctx= 3;
05149     while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
05150         mvd++;
05151         if( ctx < 6 )
05152             ctx++;
05153     }
05154 
05155     if( mvd >= 9 ) {
05156         int k = 3;
05157         while( get_cabac_bypass( &h->cabac ) ) {
05158             mvd += 1 << k;
05159             k++;
05160             if(k>24){
05161                 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
05162                 return INT_MIN;
05163             }
05164         }
05165         while( k-- ) {
05166             if( get_cabac_bypass( &h->cabac ) )
05167                 mvd += 1 << k;
05168         }
05169     }
05170     return get_cabac_bypass_sign( &h->cabac, -mvd );
05171 }
05172 
05173 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
05174     int nza, nzb;
05175     int ctx = 0;
05176 
05177     if( is_dc ) {
05178         if( cat == 0 ) {
05179             nza = h->left_cbp&0x100;
05180             nzb = h-> top_cbp&0x100;
05181         } else {
05182             nza = (h->left_cbp>>(6+idx))&0x01;
05183             nzb = (h-> top_cbp>>(6+idx))&0x01;
05184         }
05185     } else {
05186         assert(cat == 1 || cat == 2 || cat == 4);
05187         nza = h->non_zero_count_cache[scan8[idx] - 1];
05188         nzb = h->non_zero_count_cache[scan8[idx] - 8];
05189     }
05190 
05191     if( nza > 0 )
05192         ctx++;
05193 
05194     if( nzb > 0 )
05195         ctx += 2;
05196 
05197     return ctx + 4 * cat;
05198 }
05199 
05200 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
05201     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
05202     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
05203     3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
05204     5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
05205 };
05206 
05207 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
05208     static const int significant_coeff_flag_offset[2][6] = {
05209       { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
05210       { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
05211     };
05212     static const int last_coeff_flag_offset[2][6] = {
05213       { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
05214       { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
05215     };
05216     static const int coeff_abs_level_m1_offset[6] = {
05217         227+0, 227+10, 227+20, 227+30, 227+39, 426
05218     };
05219     static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
05220       { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
05221         4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
05222         7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
05223        12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
05224       { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
05225         6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
05226         9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
05227         9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
05228     };
05229     /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
05230      * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
05231      * map node ctx => cabac ctx for level=1 */
05232     static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
05233     /* map node ctx => cabac ctx for level>1 */
05234     static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
05235     static const uint8_t coeff_abs_level_transition[2][8] = {
05236     /* update node ctx after decoding a level=1 */
05237         { 1, 2, 3, 3, 4, 5, 6, 7 },
05238     /* update node ctx after decoding a level>1 */
05239         { 4, 4, 4, 4, 5, 6, 7, 7 }
05240     };
05241 
05242     int index[64];
05243 
05244     int av_unused last;
05245     int coeff_count = 0;
05246     int node_ctx = 0;
05247 
05248     uint8_t *significant_coeff_ctx_base;
05249     uint8_t *last_coeff_ctx_base;
05250     uint8_t *abs_level_m1_ctx_base;
05251 
05252 #if !ARCH_X86
05253 #define CABAC_ON_STACK
05254 #endif
05255 #ifdef CABAC_ON_STACK
05256 #define CC &cc
05257     CABACContext cc;
05258     cc.range     = h->cabac.range;
05259     cc.low       = h->cabac.low;
05260     cc.bytestream= h->cabac.bytestream;
05261 #else
05262 #define CC &h->cabac
05263 #endif
05264 
05265 
05266     /* cat: 0-> DC 16x16  n = 0
05267      *      1-> AC 16x16  n = luma4x4idx
05268      *      2-> Luma4x4   n = luma4x4idx
05269      *      3-> DC Chroma n = iCbCr
05270      *      4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
05271      *      5-> Luma8x8   n = 4 * luma8x8idx
05272      */
05273 
05274     /* read coded block flag */
05275     if( is_dc || cat != 5 ) {
05276         if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
05277             if( !is_dc )
05278                 h->non_zero_count_cache[scan8[n]] = 0;
05279 
05280 #ifdef CABAC_ON_STACK
05281             h->cabac.range     = cc.range     ;
05282             h->cabac.low       = cc.low       ;
05283             h->cabac.bytestream= cc.bytestream;
05284 #endif
05285             return;
05286         }
05287     }
05288 
05289     significant_coeff_ctx_base = h->cabac_state
05290         + significant_coeff_flag_offset[MB_FIELD][cat];
05291     last_coeff_ctx_base = h->cabac_state
05292         + last_coeff_flag_offset[MB_FIELD][cat];
05293     abs_level_m1_ctx_base = h->cabac_state
05294         + coeff_abs_level_m1_offset[cat];
05295 
05296     if( !is_dc && cat == 5 ) {
05297 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
05298         for(last= 0; last < coefs; last++) { \
05299             uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
05300             if( get_cabac( CC, sig_ctx )) { \
05301                 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
05302                 index[coeff_count++] = last; \
05303                 if( get_cabac( CC, last_ctx ) ) { \
05304                     last= max_coeff; \
05305                     break; \
05306                 } \
05307             } \
05308         }\
05309         if( last == max_coeff -1 ) {\
05310             index[coeff_count++] = last;\
05311         }
05312         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
05313 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
05314         coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
05315     } else {
05316         coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
05317 #else
05318         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
05319     } else {
05320         DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
05321 #endif
05322     }
05323     assert(coeff_count > 0);
05324 
05325     if( is_dc ) {
05326         if( cat == 0 )
05327             h->cbp_table[h->mb_xy] |= 0x100;
05328         else
05329             h->cbp_table[h->mb_xy] |= 0x40 << n;
05330     } else {
05331         if( cat == 5 )
05332             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
05333         else {
05334             assert( cat == 1 || cat == 2 || cat == 4 );
05335             h->non_zero_count_cache[scan8[n]] = coeff_count;
05336         }
05337     }
05338 
05339     do {
05340         uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
05341 
05342         int j= scantable[index[--coeff_count]];
05343 
05344         if( get_cabac( CC, ctx ) == 0 ) {
05345             node_ctx = coeff_abs_level_transition[0][node_ctx];
05346             if( is_dc ) {
05347                 block[j] = get_cabac_bypass_sign( CC, -1);
05348             }else{
05349                 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
05350             }
05351         } else {
05352             int coeff_abs = 2;
05353             ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
05354             node_ctx = coeff_abs_level_transition[1][node_ctx];
05355 
05356             while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
05357                 coeff_abs++;
05358             }
05359 
05360             if( coeff_abs >= 15 ) {
05361                 int j = 0;
05362                 while( get_cabac_bypass( CC ) ) {
05363                     j++;
05364                 }
05365 
05366                 coeff_abs=1;
05367                 while( j-- ) {
05368                     coeff_abs += coeff_abs + get_cabac_bypass( CC );
05369                 }
05370                 coeff_abs+= 14;
05371             }
05372 
05373             if( is_dc ) {
05374                 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
05375             }else{
05376                 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
05377             }
05378         }
05379     } while( coeff_count );
05380 #ifdef CABAC_ON_STACK
05381             h->cabac.range     = cc.range     ;
05382             h->cabac.low       = cc.low       ;
05383             h->cabac.bytestream= cc.bytestream;
05384 #endif
05385 
05386 }
05387 
05388 #if !CONFIG_SMALL
05389 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
05390     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
05391 }
05392 
05393 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
05394     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
05395 }
05396 #endif
05397 
05398 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
05399 #if CONFIG_SMALL
05400     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
05401 #else
05402     if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
05403     else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
05404 #endif
05405 }
05406 
05407 static inline void compute_mb_neighbors(H264Context *h)
05408 {
05409     MpegEncContext * const s = &h->s;
05410     const int mb_xy  = h->mb_xy;
05411     h->top_mb_xy     = mb_xy - s->mb_stride;
05412     h->left_mb_xy[0] = mb_xy - 1;
05413     if(FRAME_MBAFF){
05414         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
05415         const int top_pair_xy      = pair_xy     - s->mb_stride;
05416         const int top_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
05417         const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
05418         const int curr_mb_field_flag = MB_FIELD;
05419         const int bottom = (s->mb_y & 1);
05420 
05421         if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
05422             h->top_mb_xy -= s->mb_stride;
05423         }
05424         if (!left_mb_field_flag == curr_mb_field_flag) {
05425             h->left_mb_xy[0] = pair_xy - 1;
05426         }
05427     } else if (FIELD_PICTURE) {
05428         h->top_mb_xy -= s->mb_stride;
05429     }
05430     return;
05431 }
05432 
05437 static int decode_mb_cabac(H264Context *h) {
05438     MpegEncContext * const s = &h->s;
05439     int mb_xy;
05440     int mb_type, partition_count, cbp = 0;
05441     int dct8x8_allowed= h->pps.transform_8x8_mode;
05442 
05443     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
05444 
05445     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
05446     if( h->slice_type_nos != FF_I_TYPE ) {
05447         int skip;
05448         /* a skipped mb needs the aff flag from the following mb */
05449         if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
05450             predict_field_decoding_flag(h);
05451         if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
05452             skip = h->next_mb_skipped;
05453         else
05454             skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
05455         /* read skip flags */
05456         if( skip ) {
05457             if( FRAME_MBAFF && (s->mb_y&1)==0 ){
05458                 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
05459                 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
05460                 if(!h->next_mb_skipped)
05461                     h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
05462             }
05463 
05464             decode_mb_skip(h);
05465 
05466             h->cbp_table[mb_xy] = 0;
05467             h->chroma_pred_mode_table[mb_xy] = 0;
05468             h->last_qscale_diff = 0;
05469 
05470             return 0;
05471 
05472         }
05473     }
05474     if(FRAME_MBAFF){
05475         if( (s->mb_y&1) == 0 )
05476             h->mb_mbaff =
05477             h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
05478     }
05479 
05480     h->prev_mb_skipped = 0;
05481 
05482     compute_mb_neighbors(h);
05483 
05484     if( h->slice_type_nos == FF_B_TYPE ) {
05485         mb_type = decode_cabac_mb_type_b( h );
05486         if( mb_type < 23 ){
05487             partition_count= b_mb_type_info[mb_type].partition_count;
05488             mb_type=         b_mb_type_info[mb_type].type;
05489         }else{
05490             mb_type -= 23;
05491             goto decode_intra_mb;
05492         }
05493     } else if( h->slice_type_nos == FF_P_TYPE ) {
05494         if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
05495             /* P-type */
05496             if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
05497                 /* P_L0_D16x16, P_8x8 */
05498                 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
05499             } else {
05500                 /* P_L0_D8x16, P_L0_D16x8 */
05501                 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
05502             }
05503             partition_count= p_mb_type_info[mb_type].partition_count;
05504             mb_type=         p_mb_type_info[mb_type].type;
05505         } else {
05506             mb_type= decode_cabac_intra_mb_type(h, 17, 0);
05507             goto decode_intra_mb;
05508         }
05509     } else {
05510         mb_type= decode_cabac_intra_mb_type(h, 3, 1);
05511         if(h->slice_type == FF_SI_TYPE && mb_type)
05512             mb_type--;
05513         assert(h->slice_type_nos == FF_I_TYPE);
05514 decode_intra_mb:
05515         partition_count = 0;
05516         cbp= i_mb_type_info[mb_type].cbp;
05517         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
05518         mb_type= i_mb_type_info[mb_type].type;
05519     }
05520     if(MB_FIELD)
05521         mb_type |= MB_TYPE_INTERLACED;
05522 
05523     h->slice_table[ mb_xy ]= h->slice_num;
05524 
05525     if(IS_INTRA_PCM(mb_type)) {
05526         const uint8_t *ptr;
05527 
05528         // We assume these blocks are very rare so we do not optimize it.
05529         // FIXME The two following lines get the bitstream position in the cabac
05530         // decode, I think it should be done by a function in cabac.h (or cabac.c).
05531         ptr= h->cabac.bytestream;
05532         if(h->cabac.low&0x1) ptr--;
05533         if(CABAC_BITS==16){
05534             if(h->cabac.low&0x1FF) ptr--;
05535         }
05536 
05537         // The pixels are stored in the same order as levels in h->mb array.
05538         memcpy(h->mb, ptr, 256); ptr+=256;
05539         if(CHROMA){
05540             memcpy(h->mb+128, ptr, 128); ptr+=128;
05541         }
05542 
05543         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
05544 
05545         // All blocks are present
05546         h->cbp_table[mb_xy] = 0x1ef;
05547         h->chroma_pred_mode_table[mb_xy] = 0;
05548         // In deblocking, the quantizer is 0
05549         s->current_picture.qscale_table[mb_xy]= 0;
05550         // All coeffs are present
05551         memset(h->non_zero_count[mb_xy], 16, 16);
05552         s->current_picture.mb_type[mb_xy]= mb_type;
05553         h->last_qscale_diff = 0;
05554         return 0;
05555     }
05556 
05557     if(MB_MBAFF){
05558         h->ref_count[0] <<= 1;
05559         h->ref_count[1] <<= 1;
05560     }
05561 
05562     fill_caches(h, mb_type, 0);
05563 
05564     if( IS_INTRA( mb_type ) ) {
05565         int i, pred_mode;
05566         if( IS_INTRA4x4( mb_type ) ) {
05567             if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
05568                 mb_type |= MB_TYPE_8x8DCT;
05569                 for( i = 0; i < 16; i+=4 ) {
05570                     int pred = pred_intra_mode( h, i );
05571                     int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
05572                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
05573                 }
05574             } else {
05575                 for( i = 0; i < 16; i++ ) {
05576                     int pred = pred_intra_mode( h, i );
05577                     h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
05578 
05579                 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
05580                 }
05581             }
05582             write_back_intra_pred_mode(h);
05583             if( check_intra4x4_pred_mode(h) < 0 ) return -1;
05584         } else {
05585             h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
05586             if( h->intra16x16_pred_mode < 0 ) return -1;
05587         }
05588         if(CHROMA){
05589             h->chroma_pred_mode_table[mb_xy] =
05590             pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
05591 
05592             pred_mode= check_intra_pred_mode( h, pred_mode );
05593             if( pred_mode < 0 ) return -1;
05594             h->chroma_pred_mode= pred_mode;
05595         }
05596     } else if( partition_count == 4 ) {
05597         int i, j, sub_partition_count[4], list, ref[2][4];
05598 
05599         if( h->slice_type_nos == FF_B_TYPE ) {
05600             for( i = 0; i < 4; i++ ) {
05601                 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
05602                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
05603                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
05604             }
05605             if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
05606                           h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
05607                 pred_direct_motion(h, &mb_type);
05608                 h->ref_cache[0][scan8[4]] =
05609                 h->ref_cache[1][scan8[4]] =
05610                 h->ref_cache[0][scan8[12]] =
05611                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
05612                 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
05613                     for( i = 0; i < 4; i++ )
05614                         if( IS_DIRECT(h->sub_mb_type[i]) )
05615                             fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
05616                 }
05617             }
05618         } else {
05619             for( i = 0; i < 4; i++ ) {
05620                 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
05621                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
05622                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
05623             }
05624         }
05625 
05626         for( list = 0; list < h->list_count; list++ ) {
05627                 for( i = 0; i < 4; i++ ) {
05628                     if(IS_DIRECT(h->sub_mb_type[i])) continue;
05629                     if(IS_DIR(h->sub_mb_type[i], 0, list)){
05630                         if( h->ref_count[list] > 1 ){
05631                             ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
05632                             if(ref[list][i] >= (unsigned)h->ref_count[list]){
05633                                 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
05634                                 return -1;
05635                             }
05636                         }else
05637                             ref[list][i] = 0;
05638                     } else {
05639                         ref[list][i] = -1;
05640                     }
05641                                                        h->ref_cache[list][ scan8[4*i]+1 ]=
05642                     h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
05643                 }
05644         }
05645 
05646         if(dct8x8_allowed)
05647             dct8x8_allowed = get_dct8x8_allowed(h);
05648 
05649         for(list=0; list<h->list_count; list++){
05650             for(i=0; i<4; i++){
05651                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
05652                 if(IS_DIRECT(h->sub_mb_type[i])){
05653                     fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
05654                     continue;
05655                 }
05656 
05657                 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
05658                     const int sub_mb_type= h->sub_mb_type[i];
05659                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
05660                     for(j=0; j<sub_partition_count[i]; j++){
05661                         int mpx, mpy;
05662                         int mx, my;
05663                         const int index= 4*i + block_width*j;
05664                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
05665                         int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
05666                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
05667 
05668                         mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
05669                         my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
05670                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
05671 
05672                         if(IS_SUB_8X8(sub_mb_type)){
05673                             mv_cache[ 1 ][0]=
05674                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
05675                             mv_cache[ 1 ][1]=
05676                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
05677 
05678                             mvd_cache[ 1 ][0]=
05679                             mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
05680                             mvd_cache[ 1 ][1]=
05681                             mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
05682                         }else if(IS_SUB_8X4(sub_mb_type)){
05683                             mv_cache[ 1 ][0]= mx;
05684                             mv_cache[ 1 ][1]= my;
05685 
05686                             mvd_cache[ 1 ][0]= mx - mpx;
05687                             mvd_cache[ 1 ][1]= my - mpy;
05688                         }else if(IS_SUB_4X8(sub_mb_type)){
05689                             mv_cache[ 8 ][0]= mx;
05690                             mv_cache[ 8 ][1]= my;
05691 
05692                             mvd_cache[ 8 ][0]= mx - mpx;
05693                             mvd_cache[ 8 ][1]= my - mpy;
05694                         }
05695                         mv_cache[ 0 ][0]= mx;
05696                         mv_cache[ 0 ][1]= my;
05697 
05698                         mvd_cache[ 0 ][0]= mx - mpx;
05699                         mvd_cache[ 0 ][1]= my - mpy;
05700                     }
05701                 }else{
05702                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
05703                     uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
05704                     p[0] = p[1] = p[8] = p[9] = 0;
05705                     pd[0]= pd[1]= pd[8]= pd[9]= 0;
05706                 }
05707             }
05708         }
05709     } else if( IS_DIRECT(mb_type) ) {
05710         pred_direct_motion(h, &mb_type);
05711         fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
05712         fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
05713         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
05714     } else {
05715         int list, mx, my, i, mpx, mpy;
05716         if(IS_16X16(mb_type)){
05717             for(list=0; list<h->list_count; list++){
05718                 if(IS_DIR(mb_type, 0, list)){
05719                     int ref;
05720                     if(h->ref_count[list] > 1){
05721                         ref= decode_cabac_mb_ref(h, list, 0);
05722                         if(ref >= (unsigned)h->ref_count[list]){
05723                             av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
05724                             return -1;
05725                         }
05726                     }else
05727                         ref=0;
05728                         fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
05729                 }else
05730                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
05731             }
05732             for(list=0; list<h->list_count; list++){
05733                 if(IS_DIR(mb_type, 0, list)){
05734                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
05735 
05736                     mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
05737                     my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
05738                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
05739 
05740                     fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
05741                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
05742                 }else
05743                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
05744             }
05745         }
05746         else if(IS_16X8(mb_type)){
05747             for(list=0; list<h->list_count; list++){
05748                     for(i=0; i<2; i++){
05749                         if(IS_DIR(mb_type, i, list)){
05750                             int ref;
05751                             if(h->ref_count[list] > 1){
05752                                 ref= decode_cabac_mb_ref( h, list, 8*i );
05753                                 if(ref >= (unsigned)h->ref_count[list]){
05754                                     av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
05755                                     return -1;
05756                                 }
05757                             }else
05758                                 ref=0;
05759                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
05760                         }else
05761                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
05762                     }
05763             }
05764             for(list=0; list<h->list_count; list++){
05765                 for(i=0; i<2; i++){
05766                     if(IS_DIR(mb_type, i, list)){
05767                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
05768                         mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
05769                         my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
05770                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
05771 
05772                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
05773                         fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
05774                     }else{
05775                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
05776                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
05777                     }
05778                 }
05779             }
05780         }else{
05781             assert(IS_8X16(mb_type));
05782             for(list=0; list<h->list_count; list++){
05783                     for(i=0; i<2; i++){
05784                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
05785                             int ref;
05786                             if(h->ref_count[list] > 1){
05787                                 ref= decode_cabac_mb_ref( h, list, 4*i );
05788                                 if(ref >= (unsigned)h->ref_count[list]){
05789                                     av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
05790                                     return -1;
05791                                 }
05792                             }else
05793                                 ref=0;
05794                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
05795                         }else
05796                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
05797                     }
05798             }
05799             for(list=0; list<h->list_count; list++){
05800                 for(i=0; i<2; i++){
05801                     if(IS_DIR(mb_type, i, list)){
05802                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
05803                         mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
05804                         my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
05805 
05806                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
05807                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
05808                         fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
05809                     }else{
05810                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
05811                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
05812                     }
05813                 }
05814             }
05815         }
05816     }
05817 
05818    if( IS_INTER( mb_type ) ) {
05819         h->chroma_pred_mode_table[mb_xy] = 0;
05820         write_back_motion( h, mb_type );
05821    }
05822 
05823     if( !IS_INTRA16x16( mb_type ) ) {
05824         cbp  = decode_cabac_mb_cbp_luma( h );
05825         if(CHROMA)
05826             cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
05827     }
05828 
05829     h->cbp_table[mb_xy] = h->cbp = cbp;
05830 
05831     if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
05832         if( decode_cabac_mb_transform_size( h ) )
05833             mb_type |= MB_TYPE_8x8DCT;
05834     }
05835     s->current_picture.mb_type[mb_xy]= mb_type;
05836 
05837     if( cbp || IS_INTRA16x16( mb_type ) ) {
05838         const uint8_t *scan, *scan8x8, *dc_scan;
05839         const uint32_t *qmul;
05840         int dqp;
05841 
05842         if(IS_INTERLACED(mb_type)){
05843             scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
05844             scan= s->qscale ? h->field_scan : h->field_scan_q0;
05845             dc_scan= luma_dc_field_scan;
05846         }else{
05847             scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
05848             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
05849             dc_scan= luma_dc_zigzag_scan;
05850         }
05851 
05852         h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
05853         if( dqp == INT_MIN ){
05854             av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
05855             return -1;
05856         }
05857         s->qscale += dqp;
05858         if(((unsigned)s->qscale) > 51){
05859             if(s->qscale<0) s->qscale+= 52;
05860             else            s->qscale-= 52;
05861         }
05862         h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
05863         h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
05864 
05865         if( IS_INTRA16x16( mb_type ) ) {
05866             int i;
05867             //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
05868             decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
05869 
05870             if( cbp&15 ) {
05871                 qmul = h->dequant4_coeff[0][s->qscale];
05872                 for( i = 0; i < 16; i++ ) {
05873                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
05874                     decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
05875                 }
05876             } else {
05877                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
05878             }
05879         } else {
05880             int i8x8, i4x4;
05881             for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
05882                 if( cbp & (1<<i8x8) ) {
05883                     if( IS_8x8DCT(mb_type) ) {
05884                         decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
05885                             scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
05886                     } else {
05887                         qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
05888                         for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
05889                             const int index = 4*i8x8 + i4x4;
05890                             //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
05891 //START_TIMER
05892                             decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
05893 //STOP_TIMER("decode_residual")
05894                         }
05895                     }
05896                 } else {
05897                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
05898                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
05899                 }
05900             }
05901         }
05902 
05903         if( cbp&0x30 ){
05904             int c;
05905             for( c = 0; c < 2; c++ ) {
05906                 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
05907                 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
05908             }
05909         }
05910 
05911         if( cbp&0x20 ) {
05912             int c, i;
05913             for( c = 0; c < 2; c++ ) {
05914                 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
05915                 for( i = 0; i < 4; i++ ) {
05916                     const int index = 16 + 4 * c + i;
05917                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
05918                     decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
05919                 }
05920             }
05921         } else {
05922             uint8_t * const nnz= &h->non_zero_count_cache[0];
05923             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
05924             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
05925         }
05926     } else {
05927         uint8_t * const nnz= &h->non_zero_count_cache[0];
05928         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
05929         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
05930         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
05931         h->last_qscale_diff = 0;
05932     }
05933 
05934     s->current_picture.qscale_table[mb_xy]= s->qscale;
05935     write_back_non_zero_count(h);
05936 
05937     if(MB_MBAFF){
05938         h->ref_count[0] >>= 1;
05939         h->ref_count[1] >>= 1;
05940     }
05941 
05942     return 0;
05943 }
05944 
05945 
05946 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
05947     const int index_a = qp + h->slice_alpha_c0_offset;
05948     const int alpha = (alpha_table+52)[index_a];
05949     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
05950 
05951     if( bS[0] < 4 ) {
05952         int8_t tc[4];
05953         tc[0] = (tc0_table+52)[index_a][bS[0]];
05954         tc[1] = (tc0_table+52)[index_a][bS[1]];
05955         tc[2] = (tc0_table+52)[index_a][bS[2]];
05956         tc[3] = (tc0_table+52)[index_a][bS[3]];
05957         h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
05958     } else {
05959         h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
05960     }
05961 }
05962 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
05963     const int index_a = qp + h->slice_alpha_c0_offset;
05964     const int alpha = (alpha_table+52)[index_a];
05965     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
05966 
05967     if( bS[0] < 4 ) {
05968         int8_t tc[4];
05969         tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
05970         tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
05971         tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
05972         tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
05973         h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
05974     } else {
05975         h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
05976     }
05977 }
05978 
05979 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
05980     int i;
05981     for( i = 0; i < 16; i++, pix += stride) {
05982         int index_a;
05983         int alpha;
05984         int beta;
05985 
05986         int qp_index;
05987         int bS_index = (i >> 1);
05988         if (!MB_FIELD) {
05989             bS_index &= ~1;
05990             bS_index |= (i & 1);
05991         }
05992 
05993         if( bS[bS_index] == 0 ) {
05994             continue;
05995         }
05996 
05997         qp_index = MB_FIELD ? (i >> 3) : (i & 1);
05998         index_a = qp[qp_index] + h->slice_alpha_c0_offset;
05999         alpha = (alpha_table+52)[index_a];
06000         beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
06001 
06002         if( bS[bS_index] < 4 ) {
06003             const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
06004             const int p0 = pix[-1];
06005             const int p1 = pix[-2];
06006             const int p2 = pix[-3];
06007             const int q0 = pix[0];
06008             const int q1 = pix[1];
06009             const int q2 = pix[2];
06010 
06011             if( FFABS( p0 - q0 ) < alpha &&
06012                 FFABS( p1 - p0 ) < beta &&
06013                 FFABS( q1 - q0 ) < beta ) {
06014                 int tc = tc0;
06015                 int i_delta;
06016 
06017                 if( FFABS( p2 - p0 ) < beta ) {
06018                     pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
06019                     tc++;
06020                 }
06021                 if( FFABS( q2 - q0 ) < beta ) {
06022                     pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
06023                     tc++;
06024                 }
06025 
06026                 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
06027                 pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
06028                 pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
06029                 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
06030             }
06031         }else{
06032             const int p0 = pix[-1];
06033             const int p1 = pix[-2];
06034             const int p2 = pix[-3];
06035 
06036             const int q0 = pix[0];
06037             const int q1 = pix[1];
06038             const int q2 = pix[2];
06039 
06040             if( FFABS( p0 - q0 ) < alpha &&
06041                 FFABS( p1 - p0 ) < beta &&
06042                 FFABS( q1 - q0 ) < beta ) {
06043 
06044                 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
06045                     if( FFABS( p2 - p0 ) < beta)
06046                     {
06047                         const int p3 = pix[-4];
06048                         /* p0', p1', p2' */
06049                         pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
06050                         pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
06051                         pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
06052                     } else {
06053                         /* p0' */
06054                         pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
06055                     }
06056                     if( FFABS( q2 - q0 ) < beta)
06057                     {
06058                         const int q3 = pix[3];
06059                         /* q0', q1', q2' */
06060                         pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
06061                         pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
06062                         pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
06063                     } else {
06064                         /* q0' */
06065                         pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
06066                     }
06067                 }else{
06068                     /* p0', q0' */
06069                     pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
06070                     pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
06071                 }
06072                 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
06073             }
06074         }
06075     }
06076 }
06077 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
06078     int i;
06079     for( i = 0; i < 8; i++, pix += stride) {
06080         int index_a;
06081         int alpha;
06082         int beta;
06083 
06084         int qp_index;
06085         int bS_index = i;
06086 
06087         if( bS[bS_index] == 0 ) {
06088             continue;
06089         }
06090 
06091         qp_index = MB_FIELD ? (i >> 2) : (i & 1);
06092         index_a = qp[qp_index] + h->slice_alpha_c0_offset;
06093         alpha = (alpha_table+52)[index_a];
06094         beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
06095 
06096         if( bS[bS_index] < 4 ) {
06097             const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
06098             const int p0 = pix[-1];
06099             const int p1 = pix[-2];
06100             const int q0 = pix[0];
06101             const int q1 = pix[1];
06102 
06103             if( FFABS( p0 - q0 ) < alpha &&
06104                 FFABS( p1 - p0 ) < beta &&
06105                 FFABS( q1 - q0 ) < beta ) {
06106                 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
06107 
06108                 pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
06109                 pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
06110                 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
06111             }
06112         }else{
06113             const int p0 = pix[-1];
06114             const int p1 = pix[-2];
06115             const int q0 = pix[0];
06116             const int q1 = pix[1];
06117 
06118             if( FFABS( p0 - q0 ) < alpha &&
06119                 FFABS( p1 - p0 ) < beta &&
06120                 FFABS( q1 - q0 ) < beta ) {
06121 
06122                 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
06123                 pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
06124                 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
06125             }
06126         }
06127     }
06128 }
06129 
06130 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
06131     const int index_a = qp + h->slice_alpha_c0_offset;
06132     const int alpha = (alpha_table+52)[index_a];
06133     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
06134 
06135     if( bS[0] < 4 ) {
06136         int8_t tc[4];
06137         tc[0] = (tc0_table+52)[index_a][bS[0]];
06138         tc[1] = (tc0_table+52)[index_a][bS[1]];
06139         tc[2] = (tc0_table+52)[index_a][bS[2]];
06140         tc[3] = (tc0_table+52)[index_a][bS[3]];
06141         h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
06142     } else {
06143         h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
06144     }
06145 }
06146 
06147 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
06148     const int index_a = qp + h->slice_alpha_c0_offset;
06149     const int alpha = (alpha_table+52)[index_a];
06150     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
06151 
06152     if( bS[0] < 4 ) {
06153         int8_t tc[4];
06154         tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
06155         tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
06156         tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
06157         tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
06158         h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
06159     } else {
06160         h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
06161     }
06162 }
06163 
06164 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
06165     MpegEncContext * const s = &h->s;
06166     int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
06167     int mb_xy, mb_type;
06168     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
06169 
06170     mb_xy = h->mb_xy;
06171 
06172     if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
06173         !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
06174        (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
06175                                       h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
06176         filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
06177         return;
06178     }
06179     assert(!FRAME_MBAFF);
06180 
06181     mb_type = s->current_picture.mb_type[mb_xy];
06182     qp = s->current_picture.qscale_table[mb_xy];
06183     qp0 = s->current_picture.qscale_table[mb_xy-1];
06184     qp1 = s->current_picture.qscale_table[h->top_mb_xy];
06185     qpc = get_chroma_qp( h, 0, qp );
06186     qpc0 = get_chroma_qp( h, 0, qp0 );
06187     qpc1 = get_chroma_qp( h, 0, qp1 );
06188     qp0 = (qp + qp0 + 1) >> 1;
06189     qp1 = (qp + qp1 + 1) >> 1;
06190     qpc0 = (qpc + qpc0 + 1) >> 1;
06191     qpc1 = (qpc + qpc1 + 1) >> 1;
06192     qp_thresh = 15 - h->slice_alpha_c0_offset;
06193     if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
06194        qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
06195         return;
06196 
06197     if( IS_INTRA(mb_type) ) {
06198         int16_t bS4[4] = {4,4,4,4};
06199         int16_t bS3[4] = {3,3,3,3};
06200         int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
06201         if( IS_8x8DCT(mb_type) ) {
06202             filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
06203             filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
06204             filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
06205             filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
06206         } else {
06207             filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
06208             filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
06209             filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
06210             filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
06211             filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
06212             filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
06213             filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
06214             filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
06215         }
06216         filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
06217         filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
06218         filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
06219         filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
06220         filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
06221         filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
06222         filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
06223         filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
06224         return;
06225     } else {
06226         DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
06227         uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
06228         int edges;
06229         if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
06230             edges = 4;
06231             bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
06232         } else {
06233             int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
06234                              (mb_type & MB_TYPE_16x8) ? 1 : 0;
06235             int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
06236                              && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
06237                              ? 3 : 0;
06238             int step = IS_8x8DCT(mb_type) ? 2 : 1;
06239             edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
06240             s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
06241                                               (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
06242         }
06243         if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
06244             bSv[0][0] = 0x0004000400040004ULL;
06245         if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
06246             bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
06247 
06248 #define FILTER(hv,dir,edge)\
06249         if(bSv[dir][edge]) {\
06250             filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
06251             if(!(edge&1)) {\
06252                 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
06253                 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
06254             }\
06255         }
06256         if( edges == 1 ) {
06257             FILTER(v,0,0);
06258             FILTER(h,1,0);
06259         } else if( IS_8x8DCT(mb_type) ) {
06260             FILTER(v,0,0);
06261             FILTER(v,0,2);
06262             FILTER(h,1,0);
06263             FILTER(h,1,2);
06264         } else {
06265             FILTER(v,0,0);
06266             FILTER(v,0,1);
06267             FILTER(v,0,2);
06268             FILTER(v,0,3);
06269             FILTER(h,1,0);
06270             FILTER(h,1,1);
06271             FILTER(h,1,2);
06272             FILTER(h,1,3);
06273         }
06274 #undef FILTER
06275     }
06276 }
06277 
06278 
06279 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
06280     MpegEncContext * const s = &h->s;
06281     int edge;
06282     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
06283     const int mbm_type = s->current_picture.mb_type[mbm_xy];
06284     int (*ref2frm) [64] = h->ref2frm[ h->slice_num          &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
06285     int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
06286     int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
06287 
06288     const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
06289                               == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
06290     // how often to recheck mv-based bS when iterating between edges
06291     const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
06292                           (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
06293     // how often to recheck mv-based bS when iterating along each edge
06294     const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
06295 
06296     if (first_vertical_edge_done) {
06297         start = 1;
06298     }
06299 
06300     if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
06301         start = 1;
06302 
06303     if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
06304         && !IS_INTERLACED(mb_type)
06305         && IS_INTERLACED(mbm_type)
06306         ) {
06307         // This is a special case in the norm where the filtering must
06308         // be done twice (one each of the field) even if we are in a
06309         // frame macroblock.
06310         //
06311         static const int nnz_idx[4] = {4,5,6,3};
06312         unsigned int tmp_linesize   = 2 *   linesize;
06313         unsigned int tmp_uvlinesize = 2 * uvlinesize;
06314         int mbn_xy = mb_xy - 2 * s->mb_stride;
06315         int qp;
06316         int i, j;
06317         int16_t bS[4];
06318 
06319         for(j=0; j<2; j++, mbn_xy += s->mb_stride){
06320             if( IS_INTRA(mb_type) ||
06321                 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
06322                 bS[0] = bS[1] = bS[2] = bS[3] = 3;
06323             } else {
06324                 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
06325                 for( i = 0; i < 4; i++ ) {
06326                     if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
06327                         mbn_nnz[nnz_idx[i]] != 0 )
06328                         bS[i] = 2;
06329                     else
06330                         bS[i] = 1;
06331                 }
06332             }
06333             // Do not use s->qscale as luma quantizer because it has not the same
06334             // value in IPCM macroblocks.
06335             qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
06336             tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
06337             { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
06338             filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
06339             filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
06340                               ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
06341             filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
06342                               ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
06343         }
06344 
06345         start = 1;
06346     }
06347 
06348     /* Calculate bS */
06349     for( edge = start; edge < edges; edge++ ) {
06350         /* mbn_xy: neighbor macroblock */
06351         const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
06352         const int mbn_type = s->current_picture.mb_type[mbn_xy];
06353         int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
06354         int16_t bS[4];
06355         int qp;
06356 
06357         if( (edge&1) && IS_8x8DCT(mb_type) )
06358             continue;
06359 
06360         if( IS_INTRA(mb_type) ||
06361             IS_INTRA(mbn_type) ) {
06362             int value;
06363             if (edge == 0) {
06364                 if (   (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
06365                     || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
06366                 ) {
06367                     value = 4;
06368                 } else {
06369                     value = 3;
06370                 }
06371             } else {
06372                 value = 3;
06373             }
06374             bS[0] = bS[1] = bS[2] = bS[3] = value;
06375         } else {
06376             int i, l;
06377             int mv_done;
06378 
06379             if( edge & mask_edge ) {
06380                 bS[0] = bS[1] = bS[2] = bS[3] = 0;
06381                 mv_done = 1;
06382             }
06383             else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
06384                 bS[0] = bS[1] = bS[2] = bS[3] = 1;
06385                 mv_done = 1;
06386             }
06387             else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
06388                 int b_idx= 8 + 4 + edge * (dir ? 8:1);
06389                 int bn_idx= b_idx - (dir ? 8:1);
06390                 int v = 0;
06391 
06392                 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
06393                     v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
06394                          FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
06395                          FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
06396                 }
06397 
06398                 if(h->slice_type_nos == FF_B_TYPE && v){
06399                     v=0;
06400                     for( l = 0; !v && l < 2; l++ ) {
06401                         int ln= 1-l;
06402                         v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
06403                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
06404                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
06405                     }
06406                 }
06407 
06408                 bS[0] = bS[1] = bS[2] = bS[3] = v;
06409                 mv_done = 1;
06410             }
06411             else
06412                 mv_done = 0;
06413 
06414             for( i = 0; i < 4; i++ ) {
06415                 int x = dir == 0 ? edge : i;
06416                 int y = dir == 0 ? i    : edge;
06417                 int b_idx= 8 + 4 + x + 8*y;
06418                 int bn_idx= b_idx - (dir ? 8:1);
06419 
06420                 if( h->non_zero_count_cache[b_idx] |
06421                     h->non_zero_count_cache[bn_idx] ) {
06422                     bS[i] = 2;
06423                 }
06424                 else if(!mv_done)
06425                 {
06426                     bS[i] = 0;
06427                     for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
06428                         if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
06429                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
06430                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
06431                             bS[i] = 1;
06432                             break;
06433                         }
06434                     }
06435 
06436                     if(h->slice_type_nos == FF_B_TYPE && bS[i]){
06437                         bS[i] = 0;
06438                         for( l = 0; l < 2; l++ ) {
06439                             int ln= 1-l;
06440                             if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
06441                                 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
06442                                 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
06443                                 bS[i] = 1;
06444                                 break;
06445                             }
06446                         }
06447                     }
06448                 }
06449             }
06450 
06451             if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
06452                 continue;
06453         }
06454 
06455         /* Filter edge */
06456         // Do not use s->qscale as luma quantizer because it has not the same
06457         // value in IPCM macroblocks.
06458         qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
06459         //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
06460         tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
06461         { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
06462         if( dir == 0 ) {
06463             filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
06464             if( (edge&1) == 0 ) {
06465                 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
06466                                   ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
06467                 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
06468                                   ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
06469             }
06470         } else {
06471             filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
06472             if( (edge&1) == 0 ) {
06473                 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
06474                                   ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
06475                 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
06476                                   ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
06477             }
06478         }
06479     }
06480 }
06481 
06482 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
06483     MpegEncContext * const s = &h->s;
06484     const int mb_xy= mb_x + mb_y*s->mb_stride;
06485     const int mb_type = s->current_picture.mb_type[mb_xy];
06486     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
06487     int first_vertical_edge_done = 0;
06488     av_unused int dir;
06489 
06490     //for sufficiently low qp, filtering wouldn't do anything
06491     //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
06492     if(!FRAME_MBAFF){
06493         int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
06494         int qp = s->current_picture.qscale_table[mb_xy];
06495         if(qp <= qp_thresh
06496            && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
06497            && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
06498             return;
06499         }
06500     }
06501 
06502     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
06503     if(!h->pps.cabac && h->pps.transform_8x8_mode){
06504         int top_type, left_type[2];
06505         top_type     = s->current_picture.mb_type[h->top_mb_xy]    ;
06506         left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
06507         left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
06508 
06509         if(IS_8x8DCT(top_type)){
06510             h->non_zero_count_cache[4+8*0]=
06511             h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
06512             h->non_zero_count_cache[6+8*0]=
06513             h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
06514         }
06515         if(IS_8x8DCT(left_type[0])){
06516             h->non_zero_count_cache[3+8*1]=
06517             h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
06518         }
06519         if(IS_8x8DCT(left_type[1])){
06520             h->non_zero_count_cache[3+8*3]=
06521             h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
06522         }
06523 
06524         if(IS_8x8DCT(mb_type)){
06525             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
06526             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
06527 
06528             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
06529             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
06530 
06531             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
06532             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
06533 
06534             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
06535             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
06536         }
06537     }
06538 
06539     if (FRAME_MBAFF
06540             // left mb is in picture
06541             && h->slice_table[mb_xy-1] != 0xFFFF
06542             // and current and left pair do not have the same interlaced type
06543             && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
06544             // and left mb is in the same slice if deblocking_filter == 2
06545             && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
06546         /* First vertical edge is different in MBAFF frames
06547          * There are 8 different bS to compute and 2 different Qp
06548          */
06549         const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
06550         const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
06551         int16_t bS[8];
06552         int qp[2];
06553         int bqp[2];
06554         int rqp[2];
06555         int mb_qp, mbn0_qp, mbn1_qp;
06556         int i;
06557         first_vertical_edge_done = 1;
06558 
06559         if( IS_INTRA(mb_type) )
06560             bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
06561         else {
06562             for( i = 0; i < 8; i++ ) {
06563                 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
06564 
06565                 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
06566                     bS[i] = 4;
06567                 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
06568                          ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
06569                             (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
06570                                                                        :
06571                             h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
06572                     bS[i] = 2;
06573                 else
06574                     bS[i] = 1;
06575             }
06576         }
06577 
06578         mb_qp = s->current_picture.qscale_table[mb_xy];
06579         mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
06580         mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
06581         qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
06582         bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
06583                    get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
06584         rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
06585                    get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
06586         qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
06587         bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
06588                    get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
06589         rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
06590                    get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
06591 
06592         /* Filter edge */
06593         tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
06594         { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
06595         filter_mb_mbaff_edgev ( h, &img_y [0], linesize,   bS, qp );
06596         filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
06597         filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
06598     }
06599 
06600 #if CONFIG_SMALL
06601     for( dir = 0; dir < 2; dir++ )
06602         filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
06603 #else
06604     filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
06605     filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
06606 #endif
06607 }
06608 
06609 static int decode_slice(struct AVCodecContext *avctx, void *arg){
06610     H264Context *h = *(void**)arg;
06611     MpegEncContext * const s = &h->s;
06612     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
06613 
06614     s->mb_skip_run= -1;
06615 
06616     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
06617                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
06618 
06619     if( h->pps.cabac ) {
06620         int i;
06621 
06622         /* realign */
06623         align_get_bits( &s->gb );
06624 
06625         /* init cabac */
06626         ff_init_cabac_states( &h->cabac);
06627         ff_init_cabac_decoder( &h->cabac,
06628                                s->gb.buffer + get_bits_count(&s->gb)/8,
06629                                ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
06630         /* calculate pre-state */
06631         for( i= 0; i < 460; i++ ) {
06632             int pre;
06633             if( h->slice_type_nos == FF_I_TYPE )
06634                 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
06635             else
06636                 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
06637 
06638             if( pre <= 63 )
06639                 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
06640             else
06641                 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
06642         }
06643 
06644         for(;;){
06645 //START_TIMER
06646             int ret = decode_mb_cabac(h);
06647             int eos;
06648 //STOP_TIMER("decode_mb_cabac")
06649 
06650             if(ret>=0) hl_decode_mb(h);
06651 
06652             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
06653                 s->mb_y++;
06654 
06655                 ret = decode_mb_cabac(h);
06656 
06657                 if(ret>=0) hl_decode_mb(h);
06658                 s->mb_y--;
06659             }
06660             eos = get_cabac_terminate( &h->cabac );
06661 
06662             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
06663                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
06664                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
06665                 return -1;
06666             }
06667 
06668             if( ++s->mb_x >= s->mb_width ) {
06669                 s->mb_x = 0;
06670                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
06671                 ++s->mb_y;
06672                 if(FIELD_OR_MBAFF_PICTURE) {
06673                     ++s->mb_y;
06674                 }
06675             }
06676 
06677             if( eos || s->mb_y >= s->mb_height ) {
06678                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
06679                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
06680                 return 0;
06681             }
06682         }
06683 
06684     } else {
06685         for(;;){
06686             int ret = decode_mb_cavlc(h);
06687 
06688             if(ret>=0) hl_decode_mb(h);
06689 
06690             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
06691                 s->mb_y++;
06692                 ret = decode_mb_cavlc(h);
06693 
06694                 if(ret>=0) hl_decode_mb(h);
06695                 s->mb_y--;
06696             }
06697 
06698             if(ret<0){
06699                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
06700                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
06701 
06702                 return -1;
06703             }
06704 
06705             if(++s->mb_x >= s->mb_width){
06706                 s->mb_x=0;
06707                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
06708                 ++s->mb_y;
06709                 if(FIELD_OR_MBAFF_PICTURE) {
06710                     ++s->mb_y;
06711                 }
06712                 if(s->mb_y >= s->mb_height){
06713                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
06714 
06715                     if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
06716                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
06717 
06718                         return 0;
06719                     }else{
06720                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
06721 
06722                         return -1;
06723                     }
06724                 }
06725             }
06726 
06727             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
06728                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
06729                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
06730                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
06731 
06732                     return 0;
06733                 }else{
06734                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
06735 
06736                     return -1;
06737                 }
06738             }
06739         }
06740     }
06741 
06742 #if 0
06743     for(;s->mb_y < s->mb_height; s->mb_y++){
06744         for(;s->mb_x < s->mb_width; s->mb_x++){
06745             int ret= decode_mb(h);
06746 
06747             hl_decode_mb(h);
06748 
06749             if(ret<0){
06750                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
06751                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
06752 
06753                 return -1;
06754             }
06755 
06756             if(++s->mb_x >= s->mb_width){
06757                 s->mb_x=0;
06758                 if(++s->mb_y >= s->mb_height){
06759                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
06760                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
06761 
06762                         return 0;
06763                     }else{
06764                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
06765 
06766                         return -1;
06767                     }
06768                 }
06769             }
06770 
06771             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
06772                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
06773                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
06774 
06775                     return 0;
06776                 }else{
06777                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
06778 
06779                     return -1;
06780                 }
06781             }
06782         }
06783         s->mb_x=0;
06784         ff_draw_horiz_band(s, 16*s->mb_y, 16);
06785     }
06786 #endif
06787     return -1; //not reached
06788 }
06789 
06790 static int decode_picture_timing(H264Context *h){
06791     MpegEncContext * const s = &h->s;
06792     if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
06793         h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
06794         h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
06795     }
06796     if(h->sps.pic_struct_present_flag){
06797         unsigned int i, num_clock_ts;
06798         h->sei_pic_struct = get_bits(&s->gb, 4);
06799 
06800         if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
06801             return -1;
06802 
06803         num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
06804 
06805         for (i = 0 ; i < num_clock_ts ; i++){
06806             if(get_bits(&s->gb, 1)){                  /* clock_timestamp_flag */
06807                 unsigned int full_timestamp_flag;
06808                 skip_bits(&s->gb, 2);                 /* ct_type */
06809                 skip_bits(&s->gb, 1);                 /* nuit_field_based_flag */
06810                 skip_bits(&s->gb, 5);                 /* counting_type */
06811                 full_timestamp_flag = get_bits(&s->gb, 1);
06812                 skip_bits(&s->gb, 1);                 /* discontinuity_flag */
06813                 skip_bits(&s->gb, 1);                 /* cnt_dropped_flag */
06814                 skip_bits(&s->gb, 8);                 /* n_frames */
06815                 if(full_timestamp_flag){
06816                     skip_bits(&s->gb, 6);             /* seconds_value 0..59 */
06817                     skip_bits(&s->gb, 6);             /* minutes_value 0..59 */
06818                     skip_bits(&s->gb, 5);             /* hours_value 0..23 */
06819                 }else{
06820                     if(get_bits(&s->gb, 1)){          /* seconds_flag */
06821                         skip_bits(&s->gb, 6);         /* seconds_value range 0..59 */
06822                         if(get_bits(&s->gb, 1)){      /* minutes_flag */
06823                             skip_bits(&s->gb, 6);     /* minutes_value 0..59 */
06824                             if(get_bits(&s->gb, 1))   /* hours_flag */
06825                                 skip_bits(&s->gb, 5); /* hours_value 0..23 */
06826                         }
06827                     }
06828                 }
06829                 if(h->sps.time_offset_length > 0)
06830                     skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
06831             }
06832         }
06833     }
06834     return 0;
06835 }
06836 
06837 static int decode_unregistered_user_data(H264Context *h, int size){
06838     MpegEncContext * const s = &h->s;
06839     uint8_t user_data[16+256];
06840     int e, build, i;
06841 
06842     if(size<16)
06843         return -1;
06844 
06845     for(i=0; i<sizeof(user_data)-1 && i<size; i++){
06846         user_data[i]= get_bits(&s->gb, 8);
06847     }
06848 
06849     user_data[i]= 0;
06850     e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
06851     if(e==1 && build>=0)
06852         h->x264_build= build;
06853 
06854     if(s->avctx->debug & FF_DEBUG_BUGS)
06855         av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
06856 
06857     for(; i<size; i++)
06858         skip_bits(&s->gb, 8);
06859 
06860     return 0;
06861 }
06862 
06863 static int decode_recovery_point(H264Context *h){
06864     MpegEncContext * const s = &h->s;
06865 
06866     h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
06867     skip_bits(&s->gb, 4);       /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
06868 
06869     return 0;
06870 }
06871 
06872 static int decode_buffering_period(H264Context *h){
06873     MpegEncContext * const s = &h->s;
06874     unsigned int sps_id;
06875     int sched_sel_idx;
06876     SPS *sps;
06877 
06878     sps_id = get_ue_golomb_31(&s->gb);
06879     if(sps_id > 31 || !h->sps_buffers[sps_id]) {
06880         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
06881         return -1;
06882     }
06883     sps = h->sps_buffers[sps_id];
06884 
06885     // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
06886     if (sps->nal_hrd_parameters_present_flag) {
06887         for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
06888             h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
06889             skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
06890         }
06891     }
06892     if (sps->vcl_hrd_parameters_present_flag) {
06893         for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
06894             h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
06895             skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
06896         }
06897     }
06898 
06899     h->sei_buffering_period_present = 1;
06900     return 0;
06901 }
06902 
06903 int ff_h264_decode_sei(H264Context *h){
06904     MpegEncContext * const s = &h->s;
06905 
06906     while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
06907         int size, type;
06908 
06909         type=0;
06910         do{
06911             type+= show_bits(&s->gb, 8);
06912         }while(get_bits(&s->gb, 8) == 255);
06913 
06914         size=0;
06915         do{
06916             size+= show_bits(&s->gb, 8);
06917         }while(get_bits(&s->gb, 8) == 255);
06918 
06919         switch(type){
06920         case SEI_TYPE_PIC_TIMING: // Picture timing SEI
06921             if(decode_picture_timing(h) < 0)
06922                 return -1;
06923             break;
06924         case SEI_TYPE_USER_DATA_UNREGISTERED:
06925             if(decode_unregistered_user_data(h, size) < 0)
06926                 return -1;
06927             break;
06928         case SEI_TYPE_RECOVERY_POINT:
06929             if(decode_recovery_point(h) < 0)
06930                 return -1;
06931             break;
06932         case SEI_BUFFERING_PERIOD:
06933             if(decode_buffering_period(h) < 0)
06934                 return -1;
06935             break;
06936         default:
06937             skip_bits(&s->gb, 8*size);
06938         }
06939 
06940         //FIXME check bits here
06941         align_get_bits(&s->gb);
06942     }
06943 
06944     return 0;
06945 }
06946 
06947 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
06948     MpegEncContext * const s = &h->s;
06949     int cpb_count, i;
06950     cpb_count = get_ue_golomb_31(&s->gb) + 1;
06951 
06952     if(cpb_count > 32U){
06953         av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
06954         return -1;
06955     }
06956 
06957     get_bits(&s->gb, 4); /* bit_rate_scale */
06958     get_bits(&s->gb, 4); /* cpb_size_scale */
06959     for(i=0; i<cpb_count; i++){
06960         get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
06961         get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
06962         get_bits1(&s->gb);     /* cbr_flag */
06963     }
06964     sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
06965     sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
06966     sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
06967     sps->time_offset_length = get_bits(&s->gb, 5);
06968     sps->cpb_cnt = cpb_count;
06969     return 0;
06970 }
06971 
06972 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
06973     MpegEncContext * const s = &h->s;
06974     int aspect_ratio_info_present_flag;
06975     unsigned int aspect_ratio_idc;
06976 
06977     aspect_ratio_info_present_flag= get_bits1(&s->gb);
06978 
06979     if( aspect_ratio_info_present_flag ) {
06980         aspect_ratio_idc= get_bits(&s->gb, 8);
06981         if( aspect_ratio_idc == EXTENDED_SAR ) {
06982             sps->sar.num= get_bits(&s->gb, 16);
06983             sps->sar.den= get_bits(&s->gb, 16);
06984         }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
06985             sps->sar=  pixel_aspect[aspect_ratio_idc];
06986         }else{
06987             av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
06988             return -1;
06989         }
06990     }else{
06991         sps->sar.num=
06992         sps->sar.den= 0;
06993     }
06994 //            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
06995 
06996     if(get_bits1(&s->gb)){      /* overscan_info_present_flag */
06997         get_bits1(&s->gb);      /* overscan_appropriate_flag */
06998     }
06999 
07000     if(get_bits1(&s->gb)){      /* video_signal_type_present_flag */
07001         get_bits(&s->gb, 3);    /* video_format */
07002         get_bits1(&s->gb);      /* video_full_range_flag */
07003         if(get_bits1(&s->gb)){  /* colour_description_present_flag */
07004             get_bits(&s->gb, 8); /* colour_primaries */
07005             get_bits(&s->gb, 8); /* transfer_characteristics */
07006             get_bits(&s->gb, 8); /* matrix_coefficients */
07007         }
07008     }
07009 
07010     if(get_bits1(&s->gb)){      /* chroma_location_info_present_flag */
07011         get_ue_golomb(&s->gb);  /* chroma_sample_location_type_top_field */
07012         get_ue_golomb(&s->gb);  /* chroma_sample_location_type_bottom_field */
07013     }
07014 
07015     sps->timing_info_present_flag = get_bits1(&s->gb);
07016     if(sps->timing_info_present_flag){
07017         sps->num_units_in_tick = get_bits_long(&s->gb, 32);
07018         sps->time_scale = get_bits_long(&s->gb, 32);
07019         if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){
07020             av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick inavlid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick);
07021             return -1;
07022         }
07023         sps->fixed_frame_rate_flag = get_bits1(&s->gb);
07024     }
07025 
07026     sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
07027     if(sps->nal_hrd_parameters_present_flag)
07028         if(decode_hrd_parameters(h, sps) < 0)
07029             return -1;
07030     sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
07031     if(sps->vcl_hrd_parameters_present_flag)
07032         if(decode_hrd_parameters(h, sps) < 0)
07033             return -1;
07034     if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
07035         get_bits1(&s->gb);     /* low_delay_hrd_flag */
07036     sps->pic_struct_present_flag = get_bits1(&s->gb);
07037 
07038     sps->bitstream_restriction_flag = get_bits1(&s->gb);
07039     if(sps->bitstream_restriction_flag){
07040         get_bits1(&s->gb);     /* motion_vectors_over_pic_boundaries_flag */
07041         get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
07042         get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
07043         get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
07044         get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
07045         sps->num_reorder_frames= get_ue_golomb(&s->gb);
07046         get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
07047 
07048         if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
07049             av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
07050             return -1;
07051         }
07052     }
07053 
07054     return 0;
07055 }
07056 
07057 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
07058                                 const uint8_t *jvt_list, const uint8_t *fallback_list){
07059     MpegEncContext * const s = &h->s;
07060     int i, last = 8, next = 8;
07061     const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
07062     if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
07063         memcpy(factors, fallback_list, size*sizeof(uint8_t));
07064     else
07065     for(i=0;i<size;i++){
07066         if(next)
07067             next = (last + get_se_golomb(&s->gb)) & 0xff;
07068         if(!i && !next){ /* matrix not written, we use the preset one */
07069             memcpy(factors, jvt_list, size*sizeof(uint8_t));
07070             break;
07071         }
07072         last = factors[scan[i]] = next ? next : last;
07073     }
07074 }
07075 
07076 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
07077                                    uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
07078     MpegEncContext * const s = &h->s;
07079     int fallback_sps = !is_sps && sps->scaling_matrix_present;
07080     const uint8_t *fallback[4] = {
07081         fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
07082         fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
07083         fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
07084         fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
07085     };
07086     if(get_bits1(&s->gb)){
07087         sps->scaling_matrix_present |= is_sps;
07088         decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
07089         decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
07090         decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
07091         decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
07092         decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
07093         decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
07094         if(is_sps || pps->transform_8x8_mode){
07095             decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
07096             decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
07097         }
07098     }
07099 }
07100 
07101 int ff_h264_decode_seq_parameter_set(H264Context *h){
07102     MpegEncContext * const s = &h->s;
07103     int profile_idc, level_idc;
07104     unsigned int sps_id;
07105     int i;
07106     SPS *sps;
07107 
07108     profile_idc= get_bits(&s->gb, 8);
07109     get_bits1(&s->gb);   //constraint_set0_flag
07110     get_bits1(&s->gb);   //constraint_set1_flag
07111     get_bits1(&s->gb);   //constraint_set2_flag
07112     get_bits1(&s->gb);   //constraint_set3_flag
07113     get_bits(&s->gb, 4); // reserved
07114     level_idc= get_bits(&s->gb, 8);
07115     sps_id= get_ue_golomb_31(&s->gb);
07116 
07117     if(sps_id >= MAX_SPS_COUNT) {
07118         av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
07119         return -1;
07120     }
07121     sps= av_mallocz(sizeof(SPS));
07122     if(sps == NULL)
07123         return -1;
07124 
07125     sps->profile_idc= profile_idc;
07126     sps->level_idc= level_idc;
07127 
07128     memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
07129     memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
07130     sps->scaling_matrix_present = 0;
07131 
07132     if(sps->profile_idc >= 100){ //high profile
07133         sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
07134         if(sps->chroma_format_idc > 3) {
07135             av_log(h->s.avctx, AV_LOG_ERROR, "chroma_format_idc (%u) out of range\n", sps->chroma_format_idc);
07136             return -1;
07137         } else if(sps->chroma_format_idc == 3) {
07138             sps->residual_color_transform_flag = get_bits1(&s->gb);
07139         }
07140         sps->bit_depth_luma   = get_ue_golomb(&s->gb) + 8;
07141         sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
07142         sps->transform_bypass = get_bits1(&s->gb);
07143         decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
07144     }else{
07145         sps->chroma_format_idc= 1;
07146     }
07147 
07148     sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
07149     sps->poc_type= get_ue_golomb_31(&s->gb);
07150 
07151     if(sps->poc_type == 0){ //FIXME #define
07152         sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
07153     } else if(sps->poc_type == 1){//FIXME #define
07154         sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
07155         sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
07156         sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
07157         sps->poc_cycle_length                = get_ue_golomb(&s->gb);
07158 
07159         if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
07160             av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
07161             goto fail;
07162         }
07163 
07164         for(i=0; i<sps->poc_cycle_length; i++)
07165             sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
07166     }else if(sps->poc_type != 2){
07167         av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
07168         goto fail;
07169     }
07170 
07171     sps->ref_frame_count= get_ue_golomb_31(&s->gb);
07172     if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
07173         av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
07174         goto fail;
07175     }
07176     sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
07177     sps->mb_width = get_ue_golomb(&s->gb) + 1;
07178     sps->mb_height= get_ue_golomb(&s->gb) + 1;
07179     if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
07180        avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
07181         av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
07182         goto fail;
07183     }
07184 
07185     sps->frame_mbs_only_flag= get_bits1(&s->gb);
07186     if(!sps->frame_mbs_only_flag)
07187         sps->mb_aff= get_bits1(&s->gb);
07188     else
07189         sps->mb_aff= 0;
07190 
07191     sps->direct_8x8_inference_flag= get_bits1(&s->gb);
07192 
07193 #ifndef ALLOW_INTERLACE
07194     if(sps->mb_aff)
07195         av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
07196 #endif
07197     sps->crop= get_bits1(&s->gb);
07198     if(sps->crop){
07199         sps->crop_left  = get_ue_golomb(&s->gb);
07200         sps->crop_right = get_ue_golomb(&s->gb);
07201         sps->crop_top   = get_ue_golomb(&s->gb);
07202         sps->crop_bottom= get_ue_golomb(&s->gb);
07203         if(sps->crop_left || sps->crop_top){
07204             av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
07205         }
07206         if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
07207             av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
07208         }
07209     }else{
07210         sps->crop_left  =
07211         sps->crop_right =
07212         sps->crop_top   =
07213         sps->crop_bottom= 0;
07214     }
07215 
07216     sps->vui_parameters_present_flag= get_bits1(&s->gb);
07217     if( sps->vui_parameters_present_flag )
07218         decode_vui_parameters(h, sps);
07219 
07220     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
07221         av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
07222                sps_id, sps->profile_idc, sps->level_idc,
07223                sps->poc_type,
07224                sps->ref_frame_count,
07225                sps->mb_width, sps->mb_height,
07226                sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
07227                sps->direct_8x8_inference_flag ? "8B8" : "",
07228                sps->crop_left, sps->crop_right,
07229                sps->crop_top, sps->crop_bottom,
07230                sps->vui_parameters_present_flag ? "VUI" : "",
07231                ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
07232                );
07233     }
07234 
07235     av_free(h->sps_buffers[sps_id]);
07236     h->sps_buffers[sps_id]= sps;
07237     h->sps = *sps;
07238     return 0;
07239 fail:
07240     av_free(sps);
07241     return -1;
07242 }
07243 
07244 static void
07245 build_qp_table(PPS *pps, int t, int index)
07246 {
07247     int i;
07248     for(i = 0; i < 52; i++)
07249         pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
07250 }
07251 
07252 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
07253     MpegEncContext * const s = &h->s;
07254     unsigned int pps_id= get_ue_golomb(&s->gb);
07255     PPS *pps;
07256 
07257     if(pps_id >= MAX_PPS_COUNT) {
07258         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
07259         return -1;
07260     }
07261 
07262     pps= av_mallocz(sizeof(PPS));
07263     if(pps == NULL)
07264         return -1;
07265     pps->sps_id= get_ue_golomb_31(&s->gb);
07266     if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
07267         av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
07268         goto fail;
07269     }
07270 
07271     pps->cabac= get_bits1(&s->gb);
07272     pps->pic_order_present= get_bits1(&s->gb);
07273     pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
07274     if(pps->slice_group_count > 1 ){
07275         pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
07276         av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
07277         switch(pps->mb_slice_group_map_type){
07278         case 0:
07279 #if 0
07280 |   for( i = 0; i <= num_slice_groups_minus1; i++ ) |   |        |
07281 |    run_length[ i ]                                |1  |ue(v)   |
07282 #endif
07283             break;
07284         case 2:
07285 #if 0
07286 |   for( i = 0; i < num_slice_groups_minus1; i++ )  |   |        |
07287 |{                                                  |   |        |
07288 |    top_left_mb[ i ]                               |1  |ue(v)   |
07289 |    bottom_right_mb[ i ]                           |1  |ue(v)   |
07290 |   }                                               |   |        |
07291 #endif
07292             break;
07293         case 3:
07294         case 4:
07295         case 5:
07296 #if 0
07297 |   slice_group_change_direction_flag               |1  |u(1)    |
07298 |   slice_group_change_rate_minus1                  |1  |ue(v)   |
07299 #endif
07300             break;
07301         case 6:
07302 #if 0
07303 |   slice_group_id_cnt_minus1                       |1  |ue(v)   |
07304 |   for( i = 0; i <= slice_group_id_cnt_minus1; i++ |   |        |
07305 |)                                                  |   |        |
07306 |    slice_group_id[ i ]                            |1  |u(v)    |
07307 #endif
07308             break;
07309         }
07310     }
07311     pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
07312     pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
07313     if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
07314         av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
07315         goto fail;
07316     }
07317 
07318     pps->weighted_pred= get_bits1(&s->gb);
07319     pps->weighted_bipred_idc= get_bits(&s->gb, 2);
07320     pps->init_qp= get_se_golomb(&s->gb) + 26;
07321     pps->init_qs= get_se_golomb(&s->gb) + 26;
07322     pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
07323     pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
07324     pps->constrained_intra_pred= get_bits1(&s->gb);
07325     pps->redundant_pic_cnt_present = get_bits1(&s->gb);
07326 
07327     pps->transform_8x8_mode= 0;
07328     h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
07329     memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
07330     memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
07331 
07332     if(get_bits_count(&s->gb) < bit_length){
07333         pps->transform_8x8_mode= get_bits1(&s->gb);
07334         decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
07335         pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
07336     } else {
07337         pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
07338     }
07339 
07340     build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
07341     build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
07342     if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
07343         h->pps.chroma_qp_diff= 1;
07344 
07345     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
07346         av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
07347                pps_id, pps->sps_id,
07348                pps->cabac ? "CABAC" : "CAVLC",
07349                pps->slice_group_count,
07350                pps->ref_count[0], pps->ref_count[1],
07351                pps->weighted_pred ? "weighted" : "",
07352                pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
07353                pps->deblocking_filter_parameters_present ? "LPAR" : "",
07354                pps->constrained_intra_pred ? "CONSTR" : "",
07355                pps->redundant_pic_cnt_present ? "REDU" : "",
07356                pps->transform_8x8_mode ? "8x8DCT" : ""
07357                );
07358     }
07359 
07360     av_free(h->pps_buffers[pps_id]);
07361     h->pps_buffers[pps_id]= pps;
07362     return 0;
07363 fail:
07364     av_free(pps);
07365     return -1;
07366 }
07367 
07374 static void execute_decode_slices(H264Context *h, int context_count){
07375     MpegEncContext * const s = &h->s;
07376     AVCodecContext * const avctx= s->avctx;
07377     H264Context *hx;
07378     int i;
07379 
07380     if (s->avctx->hwaccel)
07381         return;
07382     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
07383         return;
07384     if(context_count == 1) {
07385         decode_slice(avctx, &h);
07386     } else {
07387         for(i = 1; i < context_count; i++) {
07388             hx = h->thread_context[i];
07389             hx->s.error_recognition = avctx->error_recognition;
07390             hx->s.error_count = 0;
07391         }
07392 
07393         avctx->execute(avctx, (void *)decode_slice,
07394                        (void **)h->thread_context, NULL, context_count, sizeof(void*));
07395 
07396         /* pull back stuff from slices to master context */
07397         hx = h->thread_context[context_count - 1];
07398         s->mb_x = hx->s.mb_x;
07399         s->mb_y = hx->s.mb_y;
07400         s->dropable = hx->s.dropable;
07401         s->picture_structure = hx->s.picture_structure;
07402         for(i = 1; i < context_count; i++)
07403             h->s.error_count += h->thread_context[i]->s.error_count;
07404     }
07405 }
07406 
07407 
07408 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
07409     MpegEncContext * const s = &h->s;
07410     AVCodecContext * const avctx= s->avctx;
07411     int buf_index=0;
07412     H264Context *hx; 
07413     int context_count = 0;
07414 
07415     h->max_contexts = avctx->thread_count;
07416 #if 0
07417     int i;
07418     for(i=0; i<50; i++){
07419         av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
07420     }
07421 #endif
07422     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
07423         h->current_slice = 0;
07424         if (!s->first_field)
07425             s->current_picture_ptr= NULL;
07426         reset_sei(h);
07427     }
07428 
07429     for(;;){
07430         int consumed;
07431         int dst_length;
07432         int bit_length;
07433         const uint8_t *ptr;
07434         int i, nalsize = 0;
07435         int err;
07436 
07437         if(h->is_avc) {
07438             if(buf_index >= buf_size) break;
07439             nalsize = 0;
07440             for(i = 0; i < h->nal_length_size; i++)
07441                 nalsize = (nalsize << 8) | buf[buf_index++];
07442             if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
07443                 if(nalsize == 1){
07444                     buf_index++;
07445                     continue;
07446                 }else{
07447                     av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
07448                     break;
07449                 }
07450             }
07451         } else {
07452             // start code prefix search
07453             for(; buf_index + 3 < buf_size; buf_index++){
07454                 // This should always succeed in the first iteration.
07455                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
07456                     break;
07457             }
07458 
07459 
07460             if (buf_index + 3 >= buf_size) {
07461                 buf_index = buf_size;
07462                 break;
07463             }
07464 
07465             buf_index+=3;
07466         }
07467 
07468         hx = h->thread_context[context_count];
07469 
07470         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
07471         if (ptr==NULL || dst_length < 0){
07472             return -1;
07473         }
07474         while(ptr[dst_length - 1] == 0 && dst_length > 0)
07475             dst_length--;
07476         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
07477 
07478         if(s->avctx->debug&FF_DEBUG_STARTCODE){
07479             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
07480         }
07481 
07482         if (h->is_avc && (nalsize != consumed)){
07483             int i, debug_level = AV_LOG_DEBUG;
07484             for (i = consumed; i < nalsize; i++)
07485                 if (buf[buf_index+i])
07486                     debug_level = AV_LOG_ERROR;
07487             av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
07488             consumed= nalsize;
07489         }
07490 
07491         buf_index += consumed;
07492 
07493         if(  (s->hurry_up == 1 && h->nal_ref_idc  == 0) //FIXME do not discard SEI id
07494            ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
07495             continue;
07496 
07497       again:
07498         err = 0;
07499         switch(hx->nal_unit_type){
07500         case NAL_IDR_SLICE:
07501             if (h->nal_unit_type != NAL_IDR_SLICE) {
07502                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
07503                 return -1;
07504             }
07505             idr(h); //FIXME ensure we don't loose some frames if there is reordering
07506         case NAL_SLICE:
07507             init_get_bits(&hx->s.gb, ptr, bit_length);
07508             hx->intra_gb_ptr=
07509             hx->inter_gb_ptr= &hx->s.gb;
07510             hx->s.data_partitioning = 0;
07511 
07512             if((err = decode_slice_header(hx, h)))
07513                break;
07514 
07515             if (s->avctx->hwaccel && h->current_slice == 1) {
07516                 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
07517                     return -1;
07518             }
07519 
07520             s->current_picture_ptr->key_frame |=
07521                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
07522                     (h->sei_recovery_frame_cnt >= 0);
07523             if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
07524                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
07525                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
07526                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
07527                && avctx->skip_frame < AVDISCARD_ALL){
07528                 if(avctx->hwaccel) {
07529                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
07530                         return -1;
07531                 }else
07532                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
07533                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
07534                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
07535                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
07536                 }else
07537                     context_count++;
07538             }
07539             break;
07540         case NAL_DPA:
07541             init_get_bits(&hx->s.gb, ptr, bit_length);
07542             hx->intra_gb_ptr=
07543             hx->inter_gb_ptr= NULL;
07544             hx->s.data_partitioning = 1;
07545 
07546             err = decode_slice_header(hx, h);
07547             break;
07548         case NAL_DPB:
07549             init_get_bits(&hx->intra_gb, ptr, bit_length);
07550             hx->intra_gb_ptr= &hx->intra_gb;
07551             break;
07552         case NAL_DPC:
07553             init_get_bits(&hx->inter_gb, ptr, bit_length);
07554             hx->inter_gb_ptr= &hx->inter_gb;
07555 
07556             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
07557                && s->current_picture_ptr
07558                && s->context_initialized
07559                && s->hurry_up < 5
07560                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
07561                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
07562                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
07563                && avctx->skip_frame < AVDISCARD_ALL)
07564                 context_count++;
07565             break;
07566         case NAL_SEI:
07567             init_get_bits(&s->gb, ptr, bit_length);
07568             ff_h264_decode_sei(h);
07569             break;
07570         case NAL_SPS:
07571             init_get_bits(&s->gb, ptr, bit_length);
07572             ff_h264_decode_seq_parameter_set(h);
07573 
07574             if(s->flags& CODEC_FLAG_LOW_DELAY)
07575                 s->low_delay=1;
07576 
07577             if(avctx->has_b_frames < 2)
07578                 avctx->has_b_frames= !s->low_delay;
07579             break;
07580         case NAL_PPS:
07581             init_get_bits(&s->gb, ptr, bit_length);
07582 
07583             ff_h264_decode_picture_parameter_set(h, bit_length);
07584 
07585             break;
07586         case NAL_AUD:
07587         case NAL_END_SEQUENCE:
07588         case NAL_END_STREAM:
07589         case NAL_FILLER_DATA:
07590         case NAL_SPS_EXT:
07591         case NAL_AUXILIARY_SLICE:
07592             break;
07593         default:
07594             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
07595         }
07596 
07597         if(context_count == h->max_contexts) {
07598             execute_decode_slices(h, context_count);
07599             context_count = 0;
07600         }
07601 
07602         if (err < 0)
07603             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
07604         else if(err == 1) {
07605             /* Slice could not be decoded in parallel mode, copy down
07606              * NAL unit stuff to context 0 and restart. Note that
07607              * rbsp_buffer is not transferred, but since we no longer
07608              * run in parallel mode this should not be an issue. */
07609             h->nal_unit_type = hx->nal_unit_type;
07610             h->nal_ref_idc   = hx->nal_ref_idc;
07611             hx = h;
07612             goto again;
07613         }
07614     }
07615     if(context_count)
07616         execute_decode_slices(h, context_count);
07617     return buf_index;
07618 }
07619 
07623 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
07624         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
07625         if(pos+10>buf_size) pos=buf_size; // oops ;)
07626 
07627         return pos;
07628 }
07629 
07630 static int decode_frame(AVCodecContext *avctx,
07631                              void *data, int *data_size,
07632                              const uint8_t *buf, int buf_size)
07633 {
07634     H264Context *h = avctx->priv_data;
07635     MpegEncContext *s = &h->s;
07636     AVFrame *pict = data;
07637     int buf_index;
07638 
07639     s->flags= avctx->flags;
07640     s->flags2= avctx->flags2;
07641 
07642    /* end of stream, output what is still in the buffers */
07643     if (buf_size == 0) {
07644         Picture *out;
07645         int i, out_idx;
07646 
07647 //FIXME factorize this with the output code below
07648         out = h->delayed_pic[0];
07649         out_idx = 0;
07650         for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
07651             if(h->delayed_pic[i]->poc < out->poc){
07652                 out = h->delayed_pic[i];
07653                 out_idx = i;
07654             }
07655 
07656         for(i=out_idx; h->delayed_pic[i]; i++)
07657             h->delayed_pic[i] = h->delayed_pic[i+1];
07658 
07659         if(out){
07660             *data_size = sizeof(AVFrame);
07661             *pict= *(AVFrame*)out;
07662         }
07663 
07664         return 0;
07665     }
07666 
07667     if(h->is_avc && !h->got_avcC) {
07668         int i, cnt, nalsize;
07669         unsigned char *p = avctx->extradata;
07670         if(avctx->extradata_size < 7) {
07671             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
07672             return -1;
07673         }
07674         if(*p != 1) {
07675             av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
07676             return -1;
07677         }
07678         /* sps and pps in the avcC always have length coded with 2 bytes,
07679            so put a fake nal_length_size = 2 while parsing them */
07680         h->nal_length_size = 2;
07681         // Decode sps from avcC
07682         cnt = *(p+5) & 0x1f; // Number of sps
07683         p += 6;
07684         for (i = 0; i < cnt; i++) {
07685             nalsize = AV_RB16(p) + 2;
07686             if(decode_nal_units(h, p, nalsize) < 0) {
07687                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
07688                 return -1;
07689             }
07690             p += nalsize;
07691         }
07692         // Decode pps from avcC
07693         cnt = *(p++); // Number of pps
07694         for (i = 0; i < cnt; i++) {
07695             nalsize = AV_RB16(p) + 2;
07696             if(decode_nal_units(h, p, nalsize)  != nalsize) {
07697                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
07698                 return -1;
07699             }
07700             p += nalsize;
07701         }
07702         // Now store right nal length size, that will be use to parse all other nals
07703         h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
07704         // Do not reparse avcC
07705         h->got_avcC = 1;
07706     }
07707 
07708     if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
07709         if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
07710             return -1;
07711         h->got_avcC = 1;
07712     }
07713 
07714     buf_index=decode_nal_units(h, buf, buf_size);
07715     if(buf_index < 0)
07716         return -1;
07717 
07718     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
07719         if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
07720         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
07721         return -1;
07722     }
07723 
07724     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
07725         Picture *out = s->current_picture_ptr;
07726         Picture *cur = s->current_picture_ptr;
07727         int i, pics, cross_idr, out_of_order, out_idx;
07728 
07729         s->mb_y= 0;
07730 
07731         s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
07732         s->current_picture_ptr->pict_type= s->pict_type;
07733 
07734         if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
07735             ff_vdpau_h264_set_reference_frames(s);
07736 
07737         if(!s->dropable) {
07738             execute_ref_pic_marking(h, h->mmco, h->mmco_index);
07739             h->prev_poc_msb= h->poc_msb;
07740             h->prev_poc_lsb= h->poc_lsb;
07741         }
07742         h->prev_frame_num_offset= h->frame_num_offset;
07743         h->prev_frame_num= h->frame_num;
07744 
07745         if (avctx->hwaccel) {
07746             if (avctx->hwaccel->end_frame(avctx) < 0)
07747                 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
07748         }
07749 
07750         if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
07751             ff_vdpau_h264_picture_complete(s);
07752 
07753         /*
07754          * FIXME: Error handling code does not seem to support interlaced
07755          * when slices span multiple rows
07756          * The ff_er_add_slice calls don't work right for bottom
07757          * fields; they cause massive erroneous error concealing
07758          * Error marking covers both fields (top and bottom).
07759          * This causes a mismatched s->error_count
07760          * and a bad error table. Further, the error count goes to
07761          * INT_MAX when called for bottom field, because mb_y is
07762          * past end by one (callers fault) and resync_mb_y != 0
07763          * causes problems for the first MB line, too.
07764          */
07765         if (!FIELD_PICTURE)
07766             ff_er_frame_end(s);
07767 
07768         MPV_frame_end(s);
07769 
07770         if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
07771             /* Wait for second field. */
07772             *data_size = 0;
07773 
07774         } else {
07775             cur->repeat_pict = 0;
07776 
07777             /* Signal interlacing information externally. */
07778             /* Prioritize picture timing SEI information over used decoding process if it exists. */
07779             if(h->sps.pic_struct_present_flag){
07780                 switch (h->sei_pic_struct)
07781                 {
07782                 case SEI_PIC_STRUCT_FRAME:
07783                     cur->interlaced_frame = 0;
07784                     break;
07785                 case SEI_PIC_STRUCT_TOP_FIELD:
07786                 case SEI_PIC_STRUCT_BOTTOM_FIELD:
07787                 case SEI_PIC_STRUCT_TOP_BOTTOM:
07788                 case SEI_PIC_STRUCT_BOTTOM_TOP:
07789                     cur->interlaced_frame = 1;
07790                     break;
07791                 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
07792                 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
07793                     // Signal the possibility of telecined film externally (pic_struct 5,6)
07794                     // From these hints, let the applications decide if they apply deinterlacing.
07795                     cur->repeat_pict = 1;
07796                     cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
07797                     break;
07798                 case SEI_PIC_STRUCT_FRAME_DOUBLING:
07799                     // Force progressive here, as doubling interlaced frame is a bad idea.
07800                     cur->interlaced_frame = 0;
07801                     cur->repeat_pict = 2;
07802                     break;
07803                 case SEI_PIC_STRUCT_FRAME_TRIPLING:
07804                     cur->interlaced_frame = 0;
07805                     cur->repeat_pict = 4;
07806                     break;
07807                 }
07808             }else{
07809                 /* Derive interlacing flag from used decoding process. */
07810                 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
07811             }
07812 
07813             if (cur->field_poc[0] != cur->field_poc[1]){
07814                 /* Derive top_field_first from field pocs. */
07815                 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
07816             }else{
07817                 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
07818                     /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
07819                     if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
07820                       || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
07821                         cur->top_field_first = 1;
07822                     else
07823                         cur->top_field_first = 0;
07824                 }else{
07825                     /* Most likely progressive */
07826                     cur->top_field_first = 0;
07827                 }
07828             }
07829 
07830         //FIXME do something with unavailable reference frames
07831 
07832             /* Sort B-frames into display order */
07833 
07834             if(h->sps.bitstream_restriction_flag
07835                && s->avctx->has_b_frames < h->sps.num_reorder_frames){
07836                 s->avctx->has_b_frames = h->sps.num_reorder_frames;
07837                 s->low_delay = 0;
07838             }
07839 
07840             if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
07841                && !h->sps.bitstream_restriction_flag){
07842                 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
07843                 s->low_delay= 0;
07844             }
07845 
07846             pics = 0;
07847             while(h->delayed_pic[pics]) pics++;
07848 
07849             assert(pics <= MAX_DELAYED_PIC_COUNT);
07850 
07851             h->delayed_pic[pics++] = cur;
07852             if(cur->reference == 0)
07853                 cur->reference = DELAYED_PIC_REF;
07854 
07855             out = h->delayed_pic[0];
07856             out_idx = 0;
07857             for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
07858                 if(h->delayed_pic[i]->poc < out->poc){
07859                     out = h->delayed_pic[i];
07860                     out_idx = i;
07861                 }
07862             cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
07863 
07864             out_of_order = !cross_idr && out->poc < h->outputed_poc;
07865 
07866             if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
07867                 { }
07868             else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
07869                || (s->low_delay &&
07870                 ((!cross_idr && out->poc > h->outputed_poc + 2)
07871                  || cur->pict_type == FF_B_TYPE)))
07872             {
07873                 s->low_delay = 0;
07874                 s->avctx->has_b_frames++;
07875             }
07876 
07877             if(out_of_order || pics > s->avctx->has_b_frames){
07878                 out->reference &= ~DELAYED_PIC_REF;
07879                 for(i=out_idx; h->delayed_pic[i]; i++)
07880                     h->delayed_pic[i] = h->delayed_pic[i+1];
07881             }
07882             if(!out_of_order && pics > s->avctx->has_b_frames){
07883                 *data_size = sizeof(AVFrame);
07884 
07885                 h->outputed_poc = out->poc;
07886                 *pict= *(AVFrame*)out;
07887             }else{
07888                 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
07889             }
07890         }
07891     }
07892 
07893     assert(pict->data[0] || !*data_size);
07894     ff_print_debug_info(s, pict);
07895 //printf("out %d\n", (int)pict->data[0]);
07896 #if 0 //?
07897 
07898     /* Return the Picture timestamp as the frame number */
07899     /* we subtract 1 because it is added on utils.c     */
07900     avctx->frame_number = s->picture_number - 1;
07901 #endif
07902     return get_consumed_bytes(s, buf_index, buf_size);
07903 }
07904 #if 0
07905 static inline void fill_mb_avail(H264Context *h){
07906     MpegEncContext * const s = &h->s;
07907     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
07908 
07909     if(s->mb_y){
07910         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
07911         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
07912         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
07913     }else{
07914         h->mb_avail[0]=
07915         h->mb_avail[1]=
07916         h->mb_avail[2]= 0;
07917     }
07918     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
07919     h->mb_avail[4]= 1; //FIXME move out
07920     h->mb_avail[5]= 0; //FIXME move out
07921 }
07922 #endif
07923 
07924 #ifdef TEST
07925 #undef printf
07926 #undef random
07927 #define COUNT 8000
07928 #define SIZE (COUNT*40)
07929 int main(void){
07930     int i;
07931     uint8_t temp[SIZE];
07932     PutBitContext pb;
07933     GetBitContext gb;
07934 //    int int_temp[10000];
07935     DSPContext dsp;
07936     AVCodecContext avctx;
07937 
07938     dsputil_init(&dsp, &avctx);
07939 
07940     init_put_bits(&pb, temp, SIZE);
07941     printf("testing unsigned exp golomb\n");
07942     for(i=0; i<COUNT; i++){
07943         START_TIMER
07944         set_ue_golomb(&pb, i);
07945         STOP_TIMER("set_ue_golomb");
07946     }
07947     flush_put_bits(&pb);
07948 
07949     init_get_bits(&gb, temp, 8*SIZE);
07950     for(i=0; i<COUNT; i++){
07951         int j, s;
07952 
07953         s= show_bits(&gb, 24);
07954 
07955         START_TIMER
07956         j= get_ue_golomb(&gb);
07957         if(j != i){
07958             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
07959 //            return -1;
07960         }
07961         STOP_TIMER("get_ue_golomb");
07962     }
07963 
07964 
07965     init_put_bits(&pb, temp, SIZE);
07966     printf("testing signed exp golomb\n");
07967     for(i=0; i<COUNT; i++){
07968         START_TIMER
07969         set_se_golomb(&pb, i - COUNT/2);
07970         STOP_TIMER("set_se_golomb");
07971     }
07972     flush_put_bits(&pb);
07973 
07974     init_get_bits(&gb, temp, 8*SIZE);
07975     for(i=0; i<COUNT; i++){
07976         int j, s;
07977 
07978         s= show_bits(&gb, 24);
07979 
07980         START_TIMER
07981         j= get_se_golomb(&gb);
07982         if(j != i - COUNT/2){
07983             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
07984 //            return -1;
07985         }
07986         STOP_TIMER("get_se_golomb");
07987     }
07988 
07989 #if 0
07990     printf("testing 4x4 (I)DCT\n");
07991 
07992     DCTELEM block[16];
07993     uint8_t src[16], ref[16];
07994     uint64_t error= 0, max_error=0;
07995 
07996     for(i=0; i<COUNT; i++){
07997         int j;
07998 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
07999         for(j=0; j<16; j++){
08000             ref[j]= random()%255;
08001             src[j]= random()%255;
08002         }
08003 
08004         h264_diff_dct_c(block, src, ref, 4);
08005 
08006         //normalize
08007         for(j=0; j<16; j++){
08008 //            printf("%d ", block[j]);
08009             block[j]= block[j]*4;
08010             if(j&1) block[j]= (block[j]*4 + 2)/5;
08011             if(j&4) block[j]= (block[j]*4 + 2)/5;
08012         }
08013 //        printf("\n");
08014 
08015         s->dsp.h264_idct_add(ref, block, 4);
08016 /*        for(j=0; j<16; j++){
08017             printf("%d ", ref[j]);
08018         }
08019         printf("\n");*/
08020 
08021         for(j=0; j<16; j++){
08022             int diff= FFABS(src[j] - ref[j]);
08023 
08024             error+= diff*diff;
08025             max_error= FFMAX(max_error, diff);
08026         }
08027     }
08028     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
08029     printf("testing quantizer\n");
08030     for(qp=0; qp<52; qp++){
08031         for(i=0; i<16; i++)
08032             src1_block[i]= src2_block[i]= random()%255;
08033 
08034     }
08035     printf("Testing NAL layer\n");
08036 
08037     uint8_t bitstream[COUNT];
08038     uint8_t nal[COUNT*2];
08039     H264Context h;
08040     memset(&h, 0, sizeof(H264Context));
08041 
08042     for(i=0; i<COUNT; i++){
08043         int zeros= i;
08044         int nal_length;
08045         int consumed;
08046         int out_length;
08047         uint8_t *out;
08048         int j;
08049 
08050         for(j=0; j<COUNT; j++){
08051             bitstream[j]= (random() % 255) + 1;
08052         }
08053 
08054         for(j=0; j<zeros; j++){
08055             int pos= random() % COUNT;
08056             while(bitstream[pos] == 0){
08057                 pos++;
08058                 pos %= COUNT;
08059             }
08060             bitstream[pos]=0;
08061         }
08062 
08063         START_TIMER
08064 
08065         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
08066         if(nal_length<0){
08067             printf("encoding failed\n");
08068             return -1;
08069         }
08070 
08071         out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
08072 
08073         STOP_TIMER("NAL")
08074 
08075         if(out_length != COUNT){
08076             printf("incorrect length %d %d\n", out_length, COUNT);
08077             return -1;
08078         }
08079 
08080         if(consumed != nal_length){
08081             printf("incorrect consumed length %d %d\n", nal_length, consumed);
08082             return -1;
08083         }
08084 
08085         if(memcmp(bitstream, out, COUNT)){
08086             printf("mismatch\n");
08087             return -1;
08088         }
08089     }
08090 #endif
08091 
08092     printf("Testing RBSP\n");
08093 
08094 
08095     return 0;
08096 }
08097 #endif /* TEST */
08098 
08099 
08100 static av_cold int decode_end(AVCodecContext *avctx)
08101 {
08102     H264Context *h = avctx->priv_data;
08103     MpegEncContext *s = &h->s;
08104     int i;
08105 
08106     av_freep(&h->rbsp_buffer[0]);
08107     av_freep(&h->rbsp_buffer[1]);
08108     free_tables(h); //FIXME cleanup init stuff perhaps
08109 
08110     for(i = 0; i < MAX_SPS_COUNT; i++)
08111         av_freep(h->sps_buffers + i);
08112 
08113     for(i = 0; i < MAX_PPS_COUNT; i++)
08114         av_freep(h->pps_buffers + i);
08115 
08116     MPV_common_end(s);
08117 
08118 //    memset(h, 0, sizeof(H264Context));
08119 
08120     return 0;
08121 }
08122 
08123 
08124 AVCodec h264_decoder = {
08125     "h264",
08126     CODEC_TYPE_VIDEO,
08127     CODEC_ID_H264,
08128     sizeof(H264Context),
08129     decode_init,
08130     NULL,
08131     decode_end,
08132     decode_frame,
08133     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
08134     .flush= flush_dpb,
08135     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
08136     .pix_fmts= ff_hwaccel_pixfmt_list_420,
08137 };
08138 
08139 #if CONFIG_H264_VDPAU_DECODER
08140 AVCodec h264_vdpau_decoder = {
08141     "h264_vdpau",
08142     CODEC_TYPE_VIDEO,
08143     CODEC_ID_H264,
08144     sizeof(H264Context),
08145     decode_init,
08146     NULL,
08147     decode_end,
08148     decode_frame,
08149     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
08150     .flush= flush_dpb,
08151     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
08152 };
08153 #endif
08154 
08155 #if CONFIG_SVQ3_DECODER
08156 #include "svq3.c"
08157 #endif

Generated on Sat Feb 16 2013 09:23:12 for ffmpeg by  doxygen 1.7.1