Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master: (40 commits)
H.264: template left MB handling
H.264: faster fill_decode_caches
H.264: faster write_back_*
H.264: faster fill_filter_caches
H.264: make filter_mb_fast support the case of unavailable top mb
Do not include log.h in avutil.h
Do not include pixfmt.h in avutil.h
Do not include rational.h in avutil.h
Do not include mathematics.h in avutil.h
Do not include intfloat_readwrite.h in avutil.h
Remove return statements following infinite loops without break
RTSP: Doxygen comment cleanup
doxygen: Escape '\' in Doxygen documentation.
md5: cosmetics
md5: use AV_WL32 to write result
md5: add fate test
md5: include correct headers
md5: fix test program
doxygen: Drop array size declarations from Doxygen parameter names.
doxygen: Fix parameter names to match the function prototypes.
...

Conflicts:
libavcodec/x86/dsputil_mmx.c
libavformat/flvenc.c
libavformat/oggenc.c
libavformat/wtv.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2011/07/04 07:34:44
Showing 129 changed files
... ...
@@ -1,4 +1,4 @@
1
-# Doxyfile 1.5.6
1
+# Doxyfile 1.7.1
2 2
 
3 3
 # This file describes the settings to be used by the documentation system
4 4
 # doxygen (www.doxygen.org) for a project
... ...
@@ -54,11 +54,11 @@ CREATE_SUBDIRS         = NO
54 54
 # information to generate all constant output in the proper language.
55 55
 # The default language is English, other supported languages are:
56 56
 # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
57
-# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek,
58
-# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages),
59
-# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish,
60
-# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish,
61
-# and Ukrainian.
57
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
58
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
59
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
60
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
61
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
62 62
 
63 63
 OUTPUT_LANGUAGE        = English
64 64
 
... ...
@@ -155,13 +155,6 @@ QT_AUTOBRIEF           = NO
155 155
 
156 156
 MULTILINE_CPP_IS_BRIEF = NO
157 157
 
158
-# If the DETAILS_AT_TOP tag is set to YES then Doxygen
159
-# will output the detailed description near the top, like JavaDoc.
160
-# If set to NO, the detailed description appears after the member
161
-# documentation.
162
-
163
-DETAILS_AT_TOP         = NO
164
-
165 158
 # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
166 159
 # member inherits the documentation from any documented member that it
167 160
 # re-implements.
... ...
@@ -214,6 +207,18 @@ OPTIMIZE_FOR_FORTRAN   = NO
214 214
 
215 215
 OPTIMIZE_OUTPUT_VHDL   = NO
216 216
 
217
+# Doxygen selects the parser to use depending on the extension of the files it
218
+# parses. With this tag you can assign which parser to use for a given extension.
219
+# Doxygen has a built-in mapping, but you can override or extend it using this
220
+# tag. The format is ext=language, where ext is a file extension, and language
221
+# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
222
+# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
223
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
224
+# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
225
+# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
226
+
227
+EXTENSION_MAPPING      =
228
+
217 229
 # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
218 230
 # to include (a tag file for) the STL sources as input, then you should
219 231
 # set this tag to YES in order to let doxygen match functions declarations and
... ...
@@ -268,6 +273,22 @@ SUBGROUPING            = YES
268 268
 
269 269
 TYPEDEF_HIDES_STRUCT   = NO
270 270
 
271
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
272
+# determine which symbols to keep in memory and which to flush to disk.
273
+# When the cache is full, less often used symbols will be written to disk.
274
+# For small to medium size projects (<1000 input files) the default value is
275
+# probably good enough. For larger projects a too small cache size can cause
276
+# doxygen to be busy swapping symbols to and from disk most of the time
277
+# causing a significant performance penality.
278
+# If the system has enough physical memory increasing the cache will improve the
279
+# performance by keeping more symbols in memory. Note that the value works on
280
+# a logarithmic scale so increasing the size by one will rougly double the
281
+# memory usage. The cache size is given by this formula:
282
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
283
+# corresponding to a cache size of 2^16 = 65536 symbols
284
+
285
+SYMBOL_CACHE_SIZE      = 0
286
+
271 287
 #---------------------------------------------------------------------------
272 288
 # Build related configuration options
273 289
 #---------------------------------------------------------------------------
... ...
@@ -366,6 +387,12 @@ HIDE_SCOPE_NAMES       = NO
366 366
 
367 367
 SHOW_INCLUDE_FILES     = YES
368 368
 
369
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
370
+# will list include files with double quotes in the documentation
371
+# rather than with sharp brackets.
372
+
373
+FORCE_LOCAL_INCLUDES   = NO
374
+
369 375
 # If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
370 376
 # is inserted in the documentation for inline members.
371 377
 
... ...
@@ -385,6 +412,16 @@ SORT_MEMBER_DOCS       = YES
385 385
 
386 386
 SORT_BRIEF_DOCS        = NO
387 387
 
388
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
389
+# will sort the (brief and detailed) documentation of class members so that
390
+# constructors and destructors are listed first. If set to NO (the default)
391
+# the constructors will appear in the respective orders defined by
392
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
393
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
394
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
395
+
396
+SORT_MEMBERS_CTORS_1ST = NO
397
+
388 398
 # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
389 399
 # hierarchy of group names into alphabetical order. If set to NO (the default)
390 400
 # the group names will appear in their defined order.
... ...
@@ -459,7 +496,8 @@ SHOW_DIRECTORIES       = NO
459 459
 SHOW_FILES             = YES
460 460
 
461 461
 # Set the SHOW_NAMESPACES tag to NO to disable the generation of the
462
-# Namespaces page.  This will remove the Namespaces entry from the Quick Index
462
+# Namespaces page.
463
+# This will remove the Namespaces entry from the Quick Index
463 464
 # and from the Folder Tree View (if specified). The default is YES.
464 465
 
465 466
 SHOW_NAMESPACES        = YES
... ...
@@ -474,6 +512,15 @@ SHOW_NAMESPACES        = YES
474 474
 
475 475
 FILE_VERSION_FILTER    =
476 476
 
477
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
478
+# by doxygen. The layout file controls the global structure of the generated
479
+# output files in an output format independent way. The create the layout file
480
+# that represents doxygen's defaults, run doxygen with the -l option.
481
+# You can optionally specify a file name after the option, if omitted
482
+# DoxygenLayout.xml will be used as the name of the layout file.
483
+
484
+LAYOUT_FILE            =
485
+
477 486
 #---------------------------------------------------------------------------
478 487
 # configuration options related to warning and progress messages
479 488
 #---------------------------------------------------------------------------
... ...
@@ -577,7 +624,8 @@ EXCLUDE_SYMLINKS       = NO
577 577
 # against the file with absolute path, so to exclude all test directories
578 578
 # for example use the pattern */test/*
579 579
 
580
-EXCLUDE_PATTERNS       = *.git *.d
580
+EXCLUDE_PATTERNS       = *.git \
581
+                         *.d
581 582
 
582 583
 # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
583 584
 # (namespaces, classes, functions, etc.) that should be excluded from the
... ...
@@ -591,7 +639,8 @@ EXCLUDE_SYMBOLS        =
591 591
 # directories that contain example code fragments that are included (see
592 592
 # the \include command).
593 593
 
594
-EXAMPLE_PATH           = libavcodec/ libavformat/
594
+EXAMPLE_PATH           = libavcodec/ \
595
+                         libavformat/
595 596
 
596 597
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
597 598
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
... ...
@@ -618,14 +667,17 @@ IMAGE_PATH             =
618 618
 # by executing (via popen()) the command <filter> <input-file>, where <filter>
619 619
 # is the value of the INPUT_FILTER tag, and <input-file> is the name of an
620 620
 # input file. Doxygen will then use the output that the filter program writes
621
-# to standard output.  If FILTER_PATTERNS is specified, this tag will be
621
+# to standard output.
622
+# If FILTER_PATTERNS is specified, this tag will be
622 623
 # ignored.
623 624
 
624 625
 INPUT_FILTER           =
625 626
 
626 627
 # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
627
-# basis.  Doxygen will compare the file name with each pattern and apply the
628
-# filter if there is a match.  The filters are a list of the form:
628
+# basis.
629
+# Doxygen will compare the file name with each pattern and apply the
630
+# filter if there is a match.
631
+# The filters are a list of the form:
629 632
 # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
630 633
 # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
631 634
 # is applied to all files.
... ...
@@ -675,7 +727,8 @@ REFERENCES_RELATION    = NO
675 675
 # If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
676 676
 # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
677 677
 # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
678
-# link to the source code.  Otherwise they will link to the documentstion.
678
+# link to the source code.
679
+# Otherwise they will link to the documentation.
679 680
 
680 681
 REFERENCES_LINK_SOURCE = YES
681 682
 
... ...
@@ -758,18 +811,50 @@ HTML_FOOTER            =
758 758
 
759 759
 HTML_STYLESHEET        =
760 760
 
761
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
762
+# Doxygen will adjust the colors in the stylesheet and background images
763
+# according to this color. Hue is specified as an angle on a colorwheel,
764
+# see http://en.wikipedia.org/wiki/Hue for more information.
765
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
766
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
767
+# The allowed range is 0 to 359.
768
+
769
+HTML_COLORSTYLE_HUE    = 220
770
+
771
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
772
+# the colors in the HTML output. For a value of 0 the output will use
773
+# grayscales only. A value of 255 will produce the most vivid colors.
774
+
775
+HTML_COLORSTYLE_SAT    = 100
776
+
777
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
778
+# the luminance component of the colors in the HTML output. Values below
779
+# 100 gradually make the output lighter, whereas values above 100 make
780
+# the output darker. The value divided by 100 is the actual gamma applied,
781
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
782
+# and 100 does not change the gamma.
783
+
784
+HTML_COLORSTYLE_GAMMA  = 80
785
+
786
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
787
+# page will contain the date and time when the page was generated. Setting
788
+# this to NO can help when comparing the output of multiple runs.
789
+
790
+HTML_TIMESTAMP         = YES
791
+
761 792
 # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
762 793
 # files or namespaces will be aligned in HTML using tables. If set to
763 794
 # NO a bullet list will be used.
764 795
 
765 796
 HTML_ALIGN_MEMBERS     = YES
766 797
 
767
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files
768
-# will be generated that can be used as input for tools like the
769
-# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
770
-# of the generated HTML documentation.
798
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
799
+# documentation will contain sections that can be hidden and shown after the
800
+# page has loaded. For this to work a browser that supports
801
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
802
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
771 803
 
772
-GENERATE_HTMLHELP      = NO
804
+HTML_DYNAMIC_SECTIONS  = NO
773 805
 
774 806
 # If the GENERATE_DOCSET tag is set to YES, additional index files
775 807
 # will be generated that can be used as input for Apple's Xcode 3
... ...
@@ -779,6 +864,8 @@ GENERATE_HTMLHELP      = NO
779 779
 # directory and running "make install" will install the docset in
780 780
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
781 781
 # it at startup.
782
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
783
+# for more information.
782 784
 
783 785
 GENERATE_DOCSET        = NO
784 786
 
... ...
@@ -796,13 +883,22 @@ DOCSET_FEEDNAME        = "Doxygen generated docs"
796 796
 
797 797
 DOCSET_BUNDLE_ID       = org.doxygen.Project
798 798
 
799
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
800
-# documentation will contain sections that can be hidden and shown after the
801
-# page has loaded. For this to work a browser that supports
802
-# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
803
-# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
799
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
800
+# the documentation publisher. This should be a reverse domain-name style
801
+# string, e.g. com.mycompany.MyDocSet.documentation.
804 802
 
805
-HTML_DYNAMIC_SECTIONS  = NO
803
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
804
+
805
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
806
+
807
+DOCSET_PUBLISHER_NAME  = Publisher
808
+
809
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
810
+# will be generated that can be used as input for tools like the
811
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
812
+# of the generated HTML documentation.
813
+
814
+GENERATE_HTMLHELP      = NO
806 815
 
807 816
 # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
808 817
 # be used to specify the file name of the resulting .chm file. You
... ...
@@ -841,6 +937,76 @@ BINARY_TOC             = NO
841 841
 
842 842
 TOC_EXPAND             = NO
843 843
 
844
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
845
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
846
+# that can be used as input for Qt's qhelpgenerator to generate a
847
+# Qt Compressed Help (.qch) of the generated HTML documentation.
848
+
849
+GENERATE_QHP           = NO
850
+
851
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
852
+# be used to specify the file name of the resulting .qch file.
853
+# The path specified is relative to the HTML output folder.
854
+
855
+QCH_FILE               =
856
+
857
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
858
+# Qt Help Project output. For more information please see
859
+# http://doc.trolltech.com/qthelpproject.html#namespace
860
+
861
+QHP_NAMESPACE          = org.doxygen.Project
862
+
863
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
864
+# Qt Help Project output. For more information please see
865
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
866
+
867
+QHP_VIRTUAL_FOLDER     = doc
868
+
869
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
870
+# add. For more information please see
871
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
872
+
873
+QHP_CUST_FILTER_NAME   =
874
+
875
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
876
+# custom filter to add. For more information please see
877
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
878
+# Qt Help Project / Custom Filters</a>.
879
+
880
+QHP_CUST_FILTER_ATTRS  =
881
+
882
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
883
+# project's
884
+# filter section matches.
885
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
886
+# Qt Help Project / Filter Attributes</a>.
887
+
888
+QHP_SECT_FILTER_ATTRS  =
889
+
890
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
891
+# be used to specify the location of Qt's qhelpgenerator.
892
+# If non-empty doxygen will try to run qhelpgenerator on the generated
893
+# .qhp file.
894
+
895
+QHG_LOCATION           =
896
+
897
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
898
+#  will be generated, which together with the HTML files, form an Eclipse help
899
+# plugin. To install this plugin and make it available under the help contents
900
+# menu in Eclipse, the contents of the directory containing the HTML and XML
901
+# files needs to be copied into the plugins directory of eclipse. The name of
902
+# the directory within the plugins directory should be the same as
903
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
904
+# the help appears.
905
+
906
+GENERATE_ECLIPSEHELP   = NO
907
+
908
+# A unique identifier for the eclipse help plugin. When installing the plugin
909
+# the directory name containing the HTML and XML files should also have
910
+# this name.
911
+
912
+ECLIPSE_DOC_ID         = org.doxygen.Project
913
+
844 914
 # The DISABLE_INDEX tag can be used to turn on/off the condensed index at
845 915
 # top of each HTML page. The value NO (the default) enables the index and
846 916
 # the value YES disables it.
... ...
@@ -854,27 +1020,30 @@ ENUM_VALUES_PER_LINE   = 4
854 854
 
855 855
 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
856 856
 # structure should be generated to display hierarchical information.
857
-# If the tag value is set to FRAME, a side panel will be generated
857
+# If the tag value is set to YES, a side panel will be generated
858 858
 # containing a tree-like index structure (just like the one that
859 859
 # is generated for HTML Help). For this to work a browser that supports
860
-# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
861
-# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
862
-# probably better off using the HTML help feature. Other possible values
863
-# for this tag are: HIERARCHIES, which will generate the Groups, Directories,
864
-# and Class Hiererachy pages using a tree view instead of an ordered list;
865
-# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which
866
-# disables this behavior completely. For backwards compatibility with previous
867
-# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE
868
-# respectively.
860
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
861
+# Windows users are probably better off using the HTML help feature.
869 862
 
870 863
 GENERATE_TREEVIEW      = NO
871 864
 
865
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
866
+# and Class Hierarchy pages using a tree view instead of an ordered list.
867
+
868
+USE_INLINE_TREES       = NO
869
+
872 870
 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
873 871
 # used to set the initial width (in pixels) of the frame in which the tree
874 872
 # is shown.
875 873
 
876 874
 TREEVIEW_WIDTH         = 250
877 875
 
876
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
877
+# links to external symbols imported via tag files in a separate window.
878
+
879
+EXT_LINKS_IN_WINDOW    = NO
880
+
878 881
 # Use this tag to change the font size of Latex formulas included
879 882
 # as images in the HTML documentation. The default is 10. Note that
880 883
 # when you change the font size after a successful doxygen run you need
... ...
@@ -883,6 +1052,34 @@ TREEVIEW_WIDTH         = 250
883 883
 
884 884
 FORMULA_FONTSIZE       = 10
885 885
 
886
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
887
+# generated for formulas are transparent PNGs. Transparent PNGs are
888
+# not supported properly for IE 6.0, but are supported on all modern browsers.
889
+# Note that when changing this option you need to delete any form_*.png files
890
+# in the HTML output before the changes have effect.
891
+
892
+FORMULA_TRANSPARENT    = YES
893
+
894
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
895
+# for the HTML output. The underlying search engine uses javascript
896
+# and DHTML and should work on any modern browser. Note that when using
897
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
898
+# (GENERATE_DOCSET) there is already a search function so this one should
899
+# typically be disabled. For large projects the javascript based search engine
900
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
901
+
902
+SEARCHENGINE           = NO
903
+
904
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
905
+# implemented using a PHP enabled web server instead of at the web client
906
+# using Javascript. Doxygen will generate the search PHP script and index
907
+# file to put on the web server. The advantage of the server
908
+# based approach is that it scales better to large projects and allows
909
+# full text search. The disadvances is that it is more difficult to setup
910
+# and does not have live searching capabilities.
911
+
912
+SERVER_BASED_SEARCH    = NO
913
+
886 914
 #---------------------------------------------------------------------------
887 915
 # configuration options related to the LaTeX output
888 916
 #---------------------------------------------------------------------------
... ...
@@ -900,6 +1097,9 @@ LATEX_OUTPUT           = latex
900 900
 
901 901
 # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
902 902
 # invoked. If left blank `latex' will be used as the default command name.
903
+# Note that when enabling USE_PDFLATEX this option is only used for
904
+# generating bitmaps for formulas in the HTML output, but not in the
905
+# Makefile that is written to the output directory.
903 906
 
904 907
 LATEX_CMD_NAME         = latex
905 908
 
... ...
@@ -959,6 +1159,13 @@ LATEX_BATCHMODE        = NO
959 959
 
960 960
 LATEX_HIDE_INDICES     = NO
961 961
 
962
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
963
+# source code with syntax highlighting in the LaTeX output.
964
+# Note that which sources are shown also depends on other settings
965
+# such as SOURCE_BROWSER.
966
+
967
+LATEX_SOURCE_CODE      = NO
968
+
962 969
 #---------------------------------------------------------------------------
963 970
 # configuration options related to the RTF output
964 971
 #---------------------------------------------------------------------------
... ...
@@ -1095,8 +1302,10 @@ GENERATE_PERLMOD       = NO
1095 1095
 PERLMOD_LATEX          = NO
1096 1096
 
1097 1097
 # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
1098
-# nicely formatted so it can be parsed by a human reader.  This is useful
1099
-# if you want to understand what is going on.  On the other hand, if this
1098
+# nicely formatted so it can be parsed by a human reader.
1099
+# This is useful
1100
+# if you want to understand what is going on.
1101
+# On the other hand, if this
1100 1102
 # tag is set to NO the size of the Perl module output will be much smaller
1101 1103
 # and Perl will parse it just the same.
1102 1104
 
... ...
@@ -1158,17 +1367,22 @@ INCLUDE_FILE_PATTERNS  =
1158 1158
 # undefined via #undef or recursively expanded use the := operator
1159 1159
 # instead of the = operator.
1160 1160
 
1161
-PREDEFINED             = __attribute__(x)="" "RENAME(x)=x ## _TMPL" "DEF(x)=x ## _TMPL" \
1162
-                         HAVE_AV_CONFIG_H HAVE_MMX HAVE_MMX2 HAVE_AMD3DNOW \
1163
-                         "DECLARE_ALIGNED(a,t,n)=t n" "offsetof(x,y)=0x42" \
1161
+PREDEFINED             = "__attribute__(x)=" \
1162
+                         "RENAME(x)=x ## _TMPL" \
1163
+                         "DEF(x)=x ## _TMPL" \
1164
+                         HAVE_AV_CONFIG_H \
1165
+                         HAVE_MMX \
1166
+                         HAVE_MMX2 \
1167
+                         HAVE_AMD3DNOW \
1168
+                         "DECLARE_ALIGNED(a,t,n)=t n" \
1169
+                         "offsetof(x,y)=0x42"
1164 1170
 
1165 1171
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
1166 1172
 # this tag can be used to specify a list of macro names that should be expanded.
1167 1173
 # The macro definition that is found in the sources will be used.
1168 1174
 # Use the PREDEFINED tag if you want to use a different macro definition.
1169 1175
 
1170
-#EXPAND_AS_DEFINED      = FF_COMMON_FRAME
1171
-EXPAND_AS_DEFINED      = declare_idct(idct, table, idct_row_head, idct_row, idct_row_tail, idct_row_mid)
1176
+EXPAND_AS_DEFINED      = declare_idct
1172 1177
 
1173 1178
 # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
1174 1179
 # doxygen's preprocessor will remove all function-like macros that are alone
... ...
@@ -1186,9 +1400,11 @@ SKIP_FUNCTION_MACROS   = YES
1186 1186
 # Optionally an initial location of the external documentation
1187 1187
 # can be added for each tagfile. The format of a tag file without
1188 1188
 # this location is as follows:
1189
-#   TAGFILES = file1 file2 ...
1189
+#
1190
+# TAGFILES = file1 file2 ...
1190 1191
 # Adding location for the tag files is done as follows:
1191
-#   TAGFILES = file1=loc1 "file2 = loc2" ...
1192
+#
1193
+# TAGFILES = file1=loc1 "file2 = loc2" ...
1192 1194
 # where "loc1" and "loc2" can be relative or absolute paths or
1193 1195
 # URLs. If a location is present for each tag, the installdox tool
1194 1196
 # does not have to be run to correct the links.
... ...
@@ -1256,6 +1472,14 @@ HIDE_UNDOC_RELATIONS   = YES
1256 1256
 
1257 1257
 HAVE_DOT               = NO
1258 1258
 
1259
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
1260
+# allowed to run in parallel. When set to 0 (the default) doxygen will
1261
+# base this on the number of processors available in the system. You can set it
1262
+# explicitly to a value larger than 0 to get control over the balance
1263
+# between CPU load and processing speed.
1264
+
1265
+DOT_NUM_THREADS        = 0
1266
+
1259 1267
 # By default doxygen will write a font called FreeSans.ttf to the output
1260 1268
 # directory and reference it in all dot files that doxygen generates. This
1261 1269
 # font does not include all possible unicode characters however, so when you need
... ...
@@ -1267,6 +1491,11 @@ HAVE_DOT               = NO
1267 1267
 
1268 1268
 DOT_FONTNAME           = FreeSans
1269 1269
 
1270
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
1271
+# The default size is 10pt.
1272
+
1273
+DOT_FONTSIZE           = 10
1274
+
1270 1275
 # By default doxygen will tell dot to use the output directory to look for the
1271 1276
 # FreeSans.ttf font (which doxygen will put there itself). If you specify a
1272 1277
 # different font using DOT_FONTNAME you can set the path where dot
... ...
@@ -1384,10 +1613,10 @@ DOT_GRAPH_MAX_NODES    = 50
1384 1384
 MAX_DOT_GRAPH_DEPTH    = 0
1385 1385
 
1386 1386
 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
1387
-# background. This is enabled by default, which results in a transparent
1388
-# background. Warning: Depending on the platform used, enabling this option
1389
-# may lead to badly anti-aliased labels on the edges of a graph (i.e. they
1390
-# become hard to read).
1387
+# background. This is disabled by default, because dot on Windows does not
1388
+# seem to support this out of the box. Warning: Depending on the platform used,
1389
+# enabling this option may lead to badly anti-aliased labels on the edges of
1390
+# a graph (i.e. they become hard to read).
1391 1391
 
1392 1392
 DOT_TRANSPARENT        = YES
1393 1393
 
... ...
@@ -1409,12 +1638,3 @@ GENERATE_LEGEND        = YES
1409 1409
 # the various graphs.
1410 1410
 
1411 1411
 DOT_CLEANUP            = YES
1412
-
1413
-#---------------------------------------------------------------------------
1414
-# Configuration::additions related to the search engine
1415
-#---------------------------------------------------------------------------
1416
-
1417
-# The SEARCHENGINE tag specifies whether or not a search engine should be
1418
-# used. If set to NO the values of all tags below this one will be ignored.
1419
-
1420
-SEARCHENGINE           = NO
... ...
@@ -2560,8 +2560,6 @@ EOF
2560 2560
     exit 1;
2561 2561
 fi
2562 2562
 
2563
-disabled static && LIBNAME=""
2564
-
2565 2563
 die_license_disabled() {
2566 2564
     enabled $1 || { enabled $2 && die "$2 is $1 and --enable-$1 is not specified."; }
2567 2565
 }
... ...
@@ -34,6 +34,7 @@
34 34
 #include <string.h>
35 35
 #include <math.h>
36 36
 
37
+#include "libavutil/mathematics.h"
37 38
 #include "libavformat/avformat.h"
38 39
 #include "libswscale/swscale.h"
39 40
 
... ...
@@ -40,6 +40,7 @@
40 40
 #include "libavutil/fifo.h"
41 41
 #include "libavutil/intreadwrite.h"
42 42
 #include "libavutil/dict.h"
43
+#include "libavutil/mathematics.h"
43 44
 #include "libavutil/pixdesc.h"
44 45
 #include "libavutil/avstring.h"
45 46
 #include "libavutil/libm.h"
... ...
@@ -25,6 +25,7 @@
25 25
 #include <limits.h>
26 26
 #include "libavutil/avstring.h"
27 27
 #include "libavutil/colorspace.h"
28
+#include "libavutil/mathematics.h"
28 29
 #include "libavutil/pixdesc.h"
29 30
 #include "libavutil/imgutils.h"
30 31
 #include "libavutil/dict.h"
... ...
@@ -37,6 +37,7 @@
37 37
 #include "libavutil/avstring.h"
38 38
 #include "libavutil/lfg.h"
39 39
 #include "libavutil/dict.h"
40
+#include "libavutil/mathematics.h"
40 41
 #include "libavutil/random_seed.h"
41 42
 #include "libavutil/parseutils.h"
42 43
 #include "libavutil/opt.h"
... ...
@@ -77,7 +77,7 @@ static VLC vlc_ps[10];
77 77
  * @param avctx contains the current codec context
78 78
  * @param gb    pointer to the input bitstream
79 79
  * @param ps    pointer to the Parametric Stereo context
80
- * @param par   pointer to the parameter to be read
80
+ * @param PAR   pointer to the parameter to be read
81 81
  * @param e     envelope to decode
82 82
  * @param dt    1: time delta-coded, 0: frequency delta-coded
83 83
  */
... ...
@@ -20,6 +20,7 @@
20 20
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 21
  */
22 22
 
23
+#include "libavutil/mathematics.h"
23 24
 #include "avcodec.h"
24 25
 #include "dsputil.h"
25 26
 #include "acelp_pitch_delay.h"
... ...
@@ -154,7 +154,7 @@ static void draw_char(AVCodecContext *avctx, int c)
154 154
 
155 155
 /**
156 156
  * Execute ANSI escape code
157
- * @param <0 error
157
+ * @return 0 on success, negative on error
158 158
  */
159 159
 static int execute_code(AVCodecContext * avctx, int c)
160 160
 {
... ...
@@ -30,6 +30,9 @@
30 30
 #include "libavutil/samplefmt.h"
31 31
 #include "libavutil/avutil.h"
32 32
 #include "libavutil/cpu.h"
33
+#include "libavutil/log.h"
34
+#include "libavutil/pixfmt.h"
35
+#include "libavutil/rational.h"
33 36
 
34 37
 #include "libavcodec/version.h"
35 38
 
... ...
@@ -335,7 +335,7 @@ static av_cold int cook_decode_close(AVCodecContext *avctx)
335 335
  * Fill the gain array for the timedomain quantization.
336 336
  *
337 337
  * @param gb          pointer to the GetBitContext
338
- * @param gaininfo[9] array of gain indexes
338
+ * @param gaininfo    array[9] of gain indexes
339 339
  */
340 340
 
341 341
 static void decode_gain_info(GetBitContext *gb, int *gaininfo)
... ...
@@ -1156,7 +1156,6 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
1156 1156
             default:
1157 1157
                 av_log_ask_for_sample(avctx, "Unknown Cook version.\n");
1158 1158
                 return -1;
1159
-                break;
1160 1159
         }
1161 1160
 
1162 1161
         if(s > 1 && q->subpacket[s].samples_per_channel != q->samples_per_channel) {
... ...
@@ -42,25 +42,6 @@
42 42
 
43 43
 extern const uint8_t ff_log2_run[41];
44 44
 
45
-static const int8_t quant3[256]={
46
- 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
56
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
57
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
58
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
62
-};
63
-
64 45
 static const int8_t quant5_10bit[256]={
65 46
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
66 47
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
... ...
@@ -98,42 +79,7 @@ static const int8_t quant5[256]={
98 98
 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 99
 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
100 100
 };
101
-static const int8_t quant7[256]={
102
- 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
105
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
106
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
107
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
111
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
112
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
--3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
116
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
117
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
118
-};
119
-static const int8_t quant9[256]={
120
- 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
121
- 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
122
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
123
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
129
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
130
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
135
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
136
-};
101
+
137 102
 static const int8_t quant9_10bit[256]={
138 103
  0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
139 104
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
... ...
@@ -171,24 +117,6 @@ static const int8_t quant11[256]={
171 171
 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
172 172
 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
173 173
 };
174
-static const int8_t quant13[256]={
175
- 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
176
- 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
177
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
178
- 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
179
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
180
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
181
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
182
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
183
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
184
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
185
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
186
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
187
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
188
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
189
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
190
--4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
191
-};
192 174
 
193 175
 static const uint8_t ver2_state[256]= {
194 176
    0,  10,  10,  10,  10,  16,  16,  16,  28,  16,  16,  29,  42,  49,  20,  49,
... ...
@@ -112,7 +112,6 @@ static av_cold int flic_decode_init(AVCodecContext *avctx)
112 112
         case 24 : avctx->pix_fmt = PIX_FMT_BGR24; /* Supposedly BGR, but havent any files to test with */
113 113
                   av_log(avctx, AV_LOG_ERROR, "24Bpp FLC/FLX is unsupported due to no test files.\n");
114 114
                   return -1;
115
-                  break;
116 115
         default :
117 116
                   av_log(avctx, AV_LOG_ERROR, "Unknown FLC/FLX depth of %d Bpp is unsupported.\n",depth);
118 117
                   return -1;
... ...
@@ -60,15 +60,6 @@ static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
60 60
     PIX_FMT_NONE
61 61
 };
62 62
 
63
-void ff_h264_write_back_intra_pred_mode(H264Context *h){
64
-    int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
65
-
66
-    AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
67
-    mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
68
-    mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
69
-    mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
70
-}
71
-
72 63
 /**
73 64
  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
74 65
  */
... ...
@@ -3057,6 +3048,82 @@ int ff_h264_get_slice_type(const H264Context *h)
3057 3057
     }
3058 3058
 }
3059 3059
 
3060
+static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
3061
+                                                      int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
3062
+{
3063
+    int b_stride = h->b_stride;
3064
+    int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
3065
+    int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
3066
+    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
3067
+        if(USES_LIST(top_type, list)){
3068
+            const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
3069
+            const int b8_xy= 4*top_xy + 2;
3070
+            int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3071
+            AV_COPY128(mv_dst - 1*8, s->current_picture.motion_val[list][b_xy + 0]);
3072
+            ref_cache[0 - 1*8]=
3073
+            ref_cache[1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
3074
+            ref_cache[2 - 1*8]=
3075
+            ref_cache[3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
3076
+        }else{
3077
+            AV_ZERO128(mv_dst - 1*8);
3078
+            AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3079
+        }
3080
+
3081
+        if(!IS_INTERLACED(mb_type^left_type[LTOP])){
3082
+            if(USES_LIST(left_type[LTOP], list)){
3083
+                const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
3084
+                const int b8_xy= 4*left_xy[LTOP] + 1;
3085
+                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3086
+                AV_COPY32(mv_dst - 1 + 0, s->current_picture.motion_val[list][b_xy + b_stride*0]);
3087
+                AV_COPY32(mv_dst - 1 + 8, s->current_picture.motion_val[list][b_xy + b_stride*1]);
3088
+                AV_COPY32(mv_dst - 1 +16, s->current_picture.motion_val[list][b_xy + b_stride*2]);
3089
+                AV_COPY32(mv_dst - 1 +24, s->current_picture.motion_val[list][b_xy + b_stride*3]);
3090
+                ref_cache[-1 +  0]=
3091
+                ref_cache[-1 +  8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
3092
+                ref_cache[-1 + 16]=
3093
+                ref_cache[-1 + 24]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
3094
+            }else{
3095
+                AV_ZERO32(mv_dst - 1 + 0);
3096
+                AV_ZERO32(mv_dst - 1 + 8);
3097
+                AV_ZERO32(mv_dst - 1 +16);
3098
+                AV_ZERO32(mv_dst - 1 +24);
3099
+                ref_cache[-1 +  0]=
3100
+                ref_cache[-1 +  8]=
3101
+                ref_cache[-1 + 16]=
3102
+                ref_cache[-1 + 24]= LIST_NOT_USED;
3103
+            }
3104
+        }
3105
+    }
3106
+
3107
+    if(!USES_LIST(mb_type, list)){
3108
+        fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0,0), 4);
3109
+        AV_WN32A(&ref_cache[0*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3110
+        AV_WN32A(&ref_cache[1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3111
+        AV_WN32A(&ref_cache[2*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3112
+        AV_WN32A(&ref_cache[3*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3113
+        return;
3114
+    }
3115
+
3116
+    {
3117
+        int8_t *ref = &s->current_picture.ref_index[list][4*mb_xy];
3118
+        int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3119
+        uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
3120
+        uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]],ref2frm[list][ref[3]])&0x00FF00FF)*0x0101;
3121
+        AV_WN32A(&ref_cache[0*8], ref01);
3122
+        AV_WN32A(&ref_cache[1*8], ref01);
3123
+        AV_WN32A(&ref_cache[2*8], ref23);
3124
+        AV_WN32A(&ref_cache[3*8], ref23);
3125
+    }
3126
+
3127
+    {
3128
+        int16_t (*mv_src)[2] = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
3129
+        AV_COPY128(mv_dst + 8*0, mv_src + 0*b_stride);
3130
+        AV_COPY128(mv_dst + 8*1, mv_src + 1*b_stride);
3131
+        AV_COPY128(mv_dst + 8*2, mv_src + 2*b_stride);
3132
+        AV_COPY128(mv_dst + 8*3, mv_src + 3*b_stride);
3133
+    }
3134
+}
3135
+
3060 3136
 /**
3061 3137
  *
3062 3138
  * @return non zero if the loop filter can be skiped
... ...
@@ -3064,208 +3131,124 @@ int ff_h264_get_slice_type(const H264Context *h)
3064 3064
 static int fill_filter_caches(H264Context *h, int mb_type){
3065 3065
     MpegEncContext * const s = &h->s;
3066 3066
     const int mb_xy= h->mb_xy;
3067
-    int top_xy, left_xy[2];
3068
-    int top_type, left_type[2];
3067
+    int top_xy, left_xy[LEFT_MBS];
3068
+    int top_type, left_type[LEFT_MBS];
3069
+    uint8_t *nnz;
3070
+    uint8_t *nnz_cache;
3069 3071
 
3070 3072
     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
3071 3073
 
3072
-    //FIXME deblocking could skip the intra and nnz parts.
3073
-
3074 3074
     /* Wow, what a mess, why didn't they simplify the interlacing & intra
3075 3075
      * stuff, I can't imagine that these complex rules are worth it. */
3076 3076
 
3077
-    left_xy[1] = left_xy[0] = mb_xy-1;
3077
+    left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
3078 3078
     if(FRAME_MBAFF){
3079 3079
         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
3080 3080
         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
3081 3081
         if(s->mb_y&1){
3082 3082
             if (left_mb_field_flag != curr_mb_field_flag) {
3083
-                left_xy[0] -= s->mb_stride;
3083
+                left_xy[LTOP] -= s->mb_stride;
3084 3084
             }
3085 3085
         }else{
3086 3086
             if(curr_mb_field_flag){
3087 3087
                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
3088 3088
             }
3089 3089
             if (left_mb_field_flag != curr_mb_field_flag) {
3090
-                left_xy[1] += s->mb_stride;
3090
+                left_xy[LBOT] += s->mb_stride;
3091 3091
             }
3092 3092
         }
3093 3093
     }
3094 3094
 
3095 3095
     h->top_mb_xy = top_xy;
3096
-    h->left_mb_xy[0] = left_xy[0];
3097
-    h->left_mb_xy[1] = left_xy[1];
3096
+    h->left_mb_xy[LTOP] = left_xy[LTOP];
3097
+    h->left_mb_xy[LBOT] = left_xy[LBOT];
3098 3098
     {
3099 3099
         //for sufficiently low qp, filtering wouldn't do anything
3100 3100
         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
3101 3101
         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
3102 3102
         int qp = s->current_picture.qscale_table[mb_xy];
3103 3103
         if(qp <= qp_thresh
3104
-           && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
3105
-           && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
3104
+           && (left_xy[LTOP]<0 || ((qp + s->current_picture.qscale_table[left_xy[LTOP]] + 1)>>1) <= qp_thresh)
3105
+           && (top_xy       <0 || ((qp + s->current_picture.qscale_table[top_xy       ] + 1)>>1) <= qp_thresh)){
3106 3106
             if(!FRAME_MBAFF)
3107 3107
                 return 1;
3108
-            if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
3109
-               && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
3108
+            if(   (left_xy[LTOP]< 0            || ((qp + s->current_picture.qscale_table[left_xy[LBOT]             ] + 1)>>1) <= qp_thresh)
3109
+               && (top_xy       < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy       -s->mb_stride] + 1)>>1) <= qp_thresh))
3110 3110
                 return 1;
3111 3111
         }
3112 3112
     }
3113 3113
 
3114
-    top_type     = s->current_picture.mb_type[top_xy]    ;
3115
-    left_type[0] = s->current_picture.mb_type[left_xy[0]];
3116
-    left_type[1] = s->current_picture.mb_type[left_xy[1]];
3114
+    top_type        = s->current_picture.mb_type[top_xy];
3115
+    left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]];
3116
+    left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]];
3117 3117
     if(h->deblocking_filter == 2){
3118
-        if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
3119
-        if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
3118
+        if(h->slice_table[top_xy       ] != h->slice_num) top_type= 0;
3119
+        if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
3120 3120
     }else{
3121
-        if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
3122
-        if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
3121
+        if(h->slice_table[top_xy       ] == 0xFFFF) top_type= 0;
3122
+        if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
3123 3123
     }
3124
-    h->top_type    = top_type    ;
3125
-    h->left_type[0]= left_type[0];
3126
-    h->left_type[1]= left_type[1];
3124
+    h->top_type       = top_type;
3125
+    h->left_type[LTOP]= left_type[LTOP];
3126
+    h->left_type[LBOT]= left_type[LBOT];
3127 3127
 
3128 3128
     if(IS_INTRA(mb_type))
3129 3129
         return 0;
3130 3130
 
3131
-    AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
3132
-    AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
3133
-    AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
3134
-    AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
3131
+    fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 0);
3132
+    if(h->list_count == 2)
3133
+        fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 1);
3135 3134
 
3135
+    nnz = h->non_zero_count[mb_xy];
3136
+    nnz_cache = h->non_zero_count_cache;
3137
+    AV_COPY32(&nnz_cache[4+8*1], &nnz[ 0]);
3138
+    AV_COPY32(&nnz_cache[4+8*2], &nnz[ 4]);
3139
+    AV_COPY32(&nnz_cache[4+8*3], &nnz[ 8]);
3140
+    AV_COPY32(&nnz_cache[4+8*4], &nnz[12]);
3136 3141
     h->cbp= h->cbp_table[mb_xy];
3137 3142
 
3138
-    {
3139
-        int list;
3140
-        for(list=0; list<h->list_count; list++){
3141
-            int8_t *ref;
3142
-            int y, b_stride;
3143
-            int16_t (*mv_dst)[2];
3144
-            int16_t (*mv_src)[2];
3145
-
3146
-            if(!USES_LIST(mb_type, list)){
3147
-                fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
3148
-                AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3149
-                AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3150
-                AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3151
-                AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3152
-                continue;
3153
-            }
3154
-
3155
-            ref = &s->current_picture.ref_index[list][4*mb_xy];
3156
-            {
3157
-                int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3158
-                AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
3159
-                AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
3160
-                ref += 2;
3161
-                AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
3162
-                AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
3163
-            }
3164
-
3165
-            b_stride = h->b_stride;
3166
-            mv_dst   = &h->mv_cache[list][scan8[0]];
3167
-            mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
3168
-            for(y=0; y<4; y++){
3169
-                AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
3170
-            }
3171
-
3172
-        }
3173
-    }
3174
-
3175
-
3176
-/*
3177
-0 . T T. T T T T
3178
-1 L . .L . . . .
3179
-2 L . .L . . . .
3180
-3 . T TL . . . .
3181
-4 L . .L . . . .
3182
-5 L . .. . . . .
3183
-*/
3184
-//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
3185 3143
     if(top_type){
3186
-        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
3144
+        nnz = h->non_zero_count[top_xy];
3145
+        AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
3187 3146
     }
3188 3147
 
3189
-    if(left_type[0]){
3190
-        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
3191
-        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
3192
-        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
3193
-        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
3148
+    if(left_type[LTOP]){
3149
+        nnz = h->non_zero_count[left_xy[LTOP]];
3150
+        nnz_cache[3+8*1]= nnz[3+0*4];
3151
+        nnz_cache[3+8*2]= nnz[3+1*4];
3152
+        nnz_cache[3+8*3]= nnz[3+2*4];
3153
+        nnz_cache[3+8*4]= nnz[3+3*4];
3194 3154
     }
3195 3155
 
3196 3156
     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
3197 3157
     if(!CABAC && h->pps.transform_8x8_mode){
3198 3158
         if(IS_8x8DCT(top_type)){
3199
-            h->non_zero_count_cache[4+8*0]=
3200
-            h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
3201
-            h->non_zero_count_cache[6+8*0]=
3202
-            h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
3159
+            nnz_cache[4+8*0]=
3160
+            nnz_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
3161
+            nnz_cache[6+8*0]=
3162
+            nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
3203 3163
         }
3204
-        if(IS_8x8DCT(left_type[0])){
3205
-            h->non_zero_count_cache[3+8*1]=
3206
-            h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
3164
+        if(IS_8x8DCT(left_type[LTOP])){
3165
+            nnz_cache[3+8*1]=
3166
+            nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
3207 3167
         }
3208
-        if(IS_8x8DCT(left_type[1])){
3209
-            h->non_zero_count_cache[3+8*3]=
3210
-            h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
3168
+        if(IS_8x8DCT(left_type[LBOT])){
3169
+            nnz_cache[3+8*3]=
3170
+            nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
3211 3171
         }
3212 3172
 
3213 3173
         if(IS_8x8DCT(mb_type)){
3214
-            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
3215
-            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
3216
-
3217
-            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
3218
-            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
3174
+            nnz_cache[scan8[0   ]]= nnz_cache[scan8[1   ]]=
3175
+            nnz_cache[scan8[2   ]]= nnz_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
3219 3176
 
3220
-            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
3221
-            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
3177
+            nnz_cache[scan8[0+ 4]]= nnz_cache[scan8[1+ 4]]=
3178
+            nnz_cache[scan8[2+ 4]]= nnz_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
3222 3179
 
3223
-            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
3224
-            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
3225
-        }
3226
-    }
3180
+            nnz_cache[scan8[0+ 8]]= nnz_cache[scan8[1+ 8]]=
3181
+            nnz_cache[scan8[2+ 8]]= nnz_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
3227 3182
 
3228
-    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
3229
-        int list;
3230
-        for(list=0; list<h->list_count; list++){
3231
-            if(USES_LIST(top_type, list)){
3232
-                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
3233
-                const int b8_xy= 4*top_xy + 2;
3234
-                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3235
-                AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
3236
-                h->ref_cache[list][scan8[0] + 0 - 1*8]=
3237
-                h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
3238
-                h->ref_cache[list][scan8[0] + 2 - 1*8]=
3239
-                h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
3240
-            }else{
3241
-                AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
3242
-                AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
3243
-            }
3244
-
3245
-            if(!IS_INTERLACED(mb_type^left_type[0])){
3246
-                if(USES_LIST(left_type[0], list)){
3247
-                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
3248
-                    const int b8_xy= 4*left_xy[0] + 1;
3249
-                    int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
3250
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
3251
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
3252
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
3253
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
3254
-                    h->ref_cache[list][scan8[0] - 1 + 0 ]=
3255
-                    h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
3256
-                    h->ref_cache[list][scan8[0] - 1 +16 ]=
3257
-                    h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
3258
-                }else{
3259
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
3260
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
3261
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
3262
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
3263
-                    h->ref_cache[list][scan8[0] - 1 + 0  ]=
3264
-                    h->ref_cache[list][scan8[0] - 1 + 8  ]=
3265
-                    h->ref_cache[list][scan8[0] - 1 + 16 ]=
3266
-                    h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
3267
-                }
3268
-            }
3183
+            nnz_cache[scan8[0+12]]= nnz_cache[scan8[1+12]]=
3184
+            nnz_cache[scan8[2+12]]= nnz_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
3269 3185
         }
3270 3186
     }
3271 3187
 
... ...
@@ -3556,7 +3539,6 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
3556 3556
         ff_draw_horiz_band(s, 16*s->mb_y, 16);
3557 3557
     }
3558 3558
 #endif
3559
-    return -1; //not reached
3560 3559
 }
3561 3560
 
3562 3561
 /**
... ...
@@ -70,6 +70,10 @@
70 70
 #define MB_FIELD h->mb_field_decoding_flag
71 71
 #define FRAME_MBAFF h->mb_aff_frame
72 72
 #define FIELD_PICTURE (s->picture_structure != PICT_FRAME)
73
+#define LEFT_MBS 2
74
+#define LTOP 0
75
+#define LBOT 1
76
+#define LEFT(i) (i)
73 77
 #else
74 78
 #define MB_MBAFF 0
75 79
 #define MB_FIELD 0
... ...
@@ -77,6 +81,10 @@
77 77
 #define FIELD_PICTURE 0
78 78
 #undef  IS_INTERLACED
79 79
 #define IS_INTERLACED(mb_type) 0
80
+#define LEFT_MBS 1
81
+#define LTOP 0
82
+#define LBOT 0
83
+#define LEFT(i) 0
80 84
 #endif
81 85
 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
82 86
 
... ...
@@ -272,12 +280,12 @@ typedef struct H264Context{
272 272
     int topleft_mb_xy;
273 273
     int top_mb_xy;
274 274
     int topright_mb_xy;
275
-    int left_mb_xy[2];
275
+    int left_mb_xy[LEFT_MBS];
276 276
 
277 277
     int topleft_type;
278 278
     int top_type;
279 279
     int topright_type;
280
-    int left_type[2];
280
+    int left_type[LEFT_MBS];
281 281
 
282 282
     const uint8_t * left_block;
283 283
     int topleft_partition;
... ...
@@ -308,11 +316,6 @@ typedef struct H264Context{
308 308
 #define PART_NOT_AVAILABLE -2
309 309
 
310 310
     /**
311
-     * is 1 if the specific list MV&references are set to 0,0,-2.
312
-     */
313
-    int mv_cache_clean[2];
314
-
315
-    /**
316 311
      * number of neighbors (top and/or left) that used 8x8 dct
317 312
      */
318 313
     int neighbor_transform_size;
... ...
@@ -658,7 +661,6 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h);
658 658
  */
659 659
 int ff_h264_check_intra_pred_mode(H264Context *h, int mode);
660 660
 
661
-void ff_h264_write_back_intra_pred_mode(H264Context *h);
662 661
 void ff_h264_hl_decode_mb(H264Context *h);
663 662
 int ff_h264_frame_start(H264Context *h);
664 663
 int ff_h264_decode_extradata(H264Context *h);
... ...
@@ -773,7 +775,7 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
773 773
 static void fill_decode_neighbors(H264Context *h, int mb_type){
774 774
     MpegEncContext * const s = &h->s;
775 775
     const int mb_xy= h->mb_xy;
776
-    int topleft_xy, top_xy, topright_xy, left_xy[2];
776
+    int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
777 777
     static const uint8_t left_block_options[4][32]={
778 778
         {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
779 779
         {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
... ...
@@ -790,16 +792,16 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
790 790
 
791 791
     topleft_xy = top_xy - 1;
792 792
     topright_xy= top_xy + 1;
793
-    left_xy[1] = left_xy[0] = mb_xy-1;
793
+    left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
794 794
     h->left_block = left_block_options[0];
795 795
     if(FRAME_MBAFF){
796 796
         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
797 797
         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
798 798
         if(s->mb_y&1){
799 799
             if (left_mb_field_flag != curr_mb_field_flag) {
800
-                left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
800
+                left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1;
801 801
                 if (curr_mb_field_flag) {
802
-                    left_xy[1] += s->mb_stride;
802
+                    left_xy[LBOT] += s->mb_stride;
803 803
                     h->left_block = left_block_options[3];
804 804
                 } else {
805 805
                     topleft_xy += s->mb_stride;
... ...
@@ -816,7 +818,7 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
816 816
             }
817 817
             if (left_mb_field_flag != curr_mb_field_flag) {
818 818
                 if (curr_mb_field_flag) {
819
-                    left_xy[1] += s->mb_stride;
819
+                    left_xy[LBOT] += s->mb_stride;
820 820
                     h->left_block = left_block_options[3];
821 821
                 } else {
822 822
                     h->left_block = left_block_options[2];
... ...
@@ -828,25 +830,25 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
828 828
     h->topleft_mb_xy = topleft_xy;
829 829
     h->top_mb_xy     = top_xy;
830 830
     h->topright_mb_xy= topright_xy;
831
-    h->left_mb_xy[0] = left_xy[0];
832
-    h->left_mb_xy[1] = left_xy[1];
831
+    h->left_mb_xy[LTOP] = left_xy[LTOP];
832
+    h->left_mb_xy[LBOT] = left_xy[LBOT];
833 833
     //FIXME do we need all in the context?
834 834
 
835 835
     h->topleft_type = s->current_picture.mb_type[topleft_xy] ;
836 836
     h->top_type     = s->current_picture.mb_type[top_xy]     ;
837 837
     h->topright_type= s->current_picture.mb_type[topright_xy];
838
-    h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ;
839
-    h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ;
838
+    h->left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]] ;
839
+    h->left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]] ;
840 840
 
841 841
     if(FMO){
842
-    if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0;
843
-    if(h->slice_table[top_xy     ] != h->slice_num) h->top_type     = 0;
844
-    if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
842
+    if(h->slice_table[topleft_xy    ] != h->slice_num) h->topleft_type = 0;
843
+    if(h->slice_table[top_xy        ] != h->slice_num) h->top_type     = 0;
844
+    if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
845 845
     }else{
846 846
         if(h->slice_table[topleft_xy ] != h->slice_num){
847 847
             h->topleft_type = 0;
848
-            if(h->slice_table[top_xy     ] != h->slice_num) h->top_type     = 0;
849
-            if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
848
+            if(h->slice_table[top_xy        ] != h->slice_num) h->top_type     = 0;
849
+            if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
850 850
         }
851 851
     }
852 852
     if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
... ...
@@ -854,21 +856,23 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
854 854
 
855 855
 static void fill_decode_caches(H264Context *h, int mb_type){
856 856
     MpegEncContext * const s = &h->s;
857
-    int topleft_xy, top_xy, topright_xy, left_xy[2];
858
-    int topleft_type, top_type, topright_type, left_type[2];
857
+    int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
858
+    int topleft_type, top_type, topright_type, left_type[LEFT_MBS];
859 859
     const uint8_t * left_block= h->left_block;
860 860
     int i;
861
-
862
-    topleft_xy   = h->topleft_mb_xy ;
863
-    top_xy       = h->top_mb_xy     ;
864
-    topright_xy  = h->topright_mb_xy;
865
-    left_xy[0]   = h->left_mb_xy[0] ;
866
-    left_xy[1]   = h->left_mb_xy[1] ;
867
-    topleft_type = h->topleft_type  ;
868
-    top_type     = h->top_type      ;
869
-    topright_type= h->topright_type ;
870
-    left_type[0] = h->left_type[0]  ;
871
-    left_type[1] = h->left_type[1]  ;
861
+    uint8_t *nnz;
862
+    uint8_t *nnz_cache;
863
+
864
+    topleft_xy     = h->topleft_mb_xy;
865
+    top_xy         = h->top_mb_xy;
866
+    topright_xy    = h->topright_mb_xy;
867
+    left_xy[LTOP]  = h->left_mb_xy[LTOP];
868
+    left_xy[LBOT]  = h->left_mb_xy[LBOT];
869
+    topleft_type   = h->topleft_type;
870
+    top_type       = h->top_type;
871
+    topright_type  = h->topright_type;
872
+    left_type[LTOP]= h->left_type[LTOP];
873
+    left_type[LBOT]= h->left_type[LBOT];
872 874
 
873 875
     if(!IS_SKIP(mb_type)){
874 876
         if(IS_INTRA(mb_type)){
... ...
@@ -883,27 +887,27 @@ static void fill_decode_caches(H264Context *h, int mb_type){
883 883
                 h->top_samples_available= 0x33FF;
884 884
                 h->topright_samples_available= 0x26EA;
885 885
             }
886
-            if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
886
+            if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){
887 887
                 if(IS_INTERLACED(mb_type)){
888
-                    if(!(left_type[0] & type_mask)){
888
+                    if(!(left_type[LTOP] & type_mask)){
889 889
                         h->topleft_samples_available&= 0xDFFF;
890 890
                         h->left_samples_available&= 0x5FFF;
891 891
                     }
892
-                    if(!(left_type[1] & type_mask)){
892
+                    if(!(left_type[LBOT] & type_mask)){
893 893
                         h->topleft_samples_available&= 0xFF5F;
894 894
                         h->left_samples_available&= 0xFF5F;
895 895
                     }
896 896
                 }else{
897
-                    int left_typei = s->current_picture.mb_type[left_xy[0] + s->mb_stride];
897
+                    int left_typei = s->current_picture.mb_type[left_xy[LTOP] + s->mb_stride];
898 898
 
899
-                    assert(left_xy[0] == left_xy[1]);
900
-                    if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
899
+                    assert(left_xy[LTOP] == left_xy[LBOT]);
900
+                    if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){
901 901
                         h->topleft_samples_available&= 0xDF5F;
902 902
                         h->left_samples_available&= 0x5F5F;
903 903
                     }
904 904
                 }
905 905
             }else{
906
-                if(!(left_type[0] & type_mask)){
906
+                if(!(left_type[LTOP] & type_mask)){
907 907
                     h->topleft_samples_available&= 0xDF5F;
908 908
                     h->left_samples_available&= 0x5F5F;
909 909
                 }
... ...
@@ -925,13 +929,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){
925 925
                     h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
926 926
                 }
927 927
                 for(i=0; i<2; i++){
928
-                    if(IS_INTRA4x4(left_type[i])){
929
-                        int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]];
928
+                    if(IS_INTRA4x4(left_type[LEFT(i)])){
929
+                        int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
930 930
                         h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
931 931
                         h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
932 932
                     }else{
933 933
                         h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
934
-                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[i] & type_mask);
934
+                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask);
935 935
                     }
936 936
                 }
937 937
             }
... ...
@@ -947,42 +951,45 @@ static void fill_decode_caches(H264Context *h, int mb_type){
947 947
 5 L . .. . . . .
948 948
 */
949 949
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
950
+    nnz_cache = h->non_zero_count_cache;
950 951
     if(top_type){
951
-        AV_COPY32(&h->non_zero_count_cache[4+8* 0], &h->non_zero_count[top_xy][4*3]);
952
+        nnz = h->non_zero_count[top_xy];
953
+        AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]);
952 954
         if(CHROMA444){
953
-            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 7]);
954
-            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4*11]);
955
+            AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]);
956
+            AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]);
955 957
         }else{
956
-            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 5]);
957
-            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4* 9]);
958
+            AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 5]);
959
+            AV_COPY32(&nnz_cache[4+8*10], &nnz[4* 9]);
958 960
         }
959 961
     }else{
960 962
         uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
961
-        AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
962
-        AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
963
-        AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
963
+        AV_WN32A(&nnz_cache[4+8* 0], top_empty);
964
+        AV_WN32A(&nnz_cache[4+8* 5], top_empty);
965
+        AV_WN32A(&nnz_cache[4+8*10], top_empty);
964 966
     }
965 967
 
966 968
     for (i=0; i<2; i++) {
967
-        if(left_type[i]){
968
-            h->non_zero_count_cache[3+8* 1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
969
-            h->non_zero_count_cache[3+8* 2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
969
+        if(left_type[LEFT(i)]){
970
+            nnz = h->non_zero_count[left_xy[LEFT(i)]];
971
+            nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]];
972
+            nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]];
970 973
             if(CHROMA444){
971
-                h->non_zero_count_cache[3+8* 6 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+4*4];
972
-                h->non_zero_count_cache[3+8* 7 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+4*4];
973
-                h->non_zero_count_cache[3+8*11 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+8*4];
974
-                h->non_zero_count_cache[3+8*12 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+8*4];
974
+                nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]+4*4];
975
+                nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4];
976
+                nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4];
977
+                nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4];
975 978
             }else{
976
-                h->non_zero_count_cache[3+8* 6 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
977
-                h->non_zero_count_cache[3+8*11 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
979
+                nnz_cache[3+8* 6 +   8*i]= nnz[left_block[8+4+2*i]];
980
+                nnz_cache[3+8*11 +   8*i]= nnz[left_block[8+5+2*i]];
978 981
             }
979 982
         }else{
980
-            h->non_zero_count_cache[3+8* 1 + 2*8*i]=
981
-            h->non_zero_count_cache[3+8* 2 + 2*8*i]=
982
-            h->non_zero_count_cache[3+8* 6 + 2*8*i]=
983
-            h->non_zero_count_cache[3+8* 7 + 2*8*i]=
984
-            h->non_zero_count_cache[3+8*11 + 2*8*i]=
985
-            h->non_zero_count_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
983
+            nnz_cache[3+8* 1 + 2*8*i]=
984
+            nnz_cache[3+8* 2 + 2*8*i]=
985
+            nnz_cache[3+8* 6 + 2*8*i]=
986
+            nnz_cache[3+8* 7 + 2*8*i]=
987
+            nnz_cache[3+8*11 + 2*8*i]=
988
+            nnz_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
986 989
         }
987 990
     }
988 991
 
... ...
@@ -994,10 +1001,10 @@ static void fill_decode_caches(H264Context *h, int mb_type){
994 994
             h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
995 995
         }
996 996
         // left_cbp
997
-        if (left_type[0]) {
998
-            h->left_cbp =   (h->cbp_table[left_xy[0]] & 0x7F0)
999
-                        |  ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
1000
-                        | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
997
+        if (left_type[LTOP]) {
998
+            h->left_cbp =   (h->cbp_table[left_xy[LTOP]] & 0x7F0)
999
+                        |  ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2)
1000
+                        | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2);
1001 1001
         } else {
1002 1002
             h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
1003 1003
         }
... ...
@@ -1006,144 +1013,145 @@ static void fill_decode_caches(H264Context *h, int mb_type){
1006 1006
 
1007 1007
     if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
1008 1008
         int list;
1009
+        int b_stride = h->b_stride;
1009 1010
         for(list=0; list<h->list_count; list++){
1011
+            int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
1012
+            int8_t *ref = s->current_picture.ref_index[list];
1013
+            int16_t (*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
1014
+            int16_t (*mv)[2] = s->current_picture.motion_val[list];
1010 1015
             if(!USES_LIST(mb_type, list)){
1011
-                /*if(!h->mv_cache_clean[list]){
1012
-                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
1013
-                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
1014
-                    h->mv_cache_clean[list]= 1;
1015
-                }*/
1016 1016
                 continue;
1017 1017
             }
1018 1018
             assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
1019 1019
 
1020
-            h->mv_cache_clean[list]= 0;
1021
-
1022 1020
             if(USES_LIST(top_type, list)){
1023
-                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
1024
-                AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
1025
-                    h->ref_cache[list][scan8[0] + 0 - 1*8]=
1026
-                    h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 2];
1027
-                    h->ref_cache[list][scan8[0] + 2 - 1*8]=
1028
-                    h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 3];
1021
+                const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
1022
+                AV_COPY128(mv_cache[0 - 1*8], mv[b_xy + 0]);
1023
+                ref_cache[0 - 1*8]=
1024
+                ref_cache[1 - 1*8]= ref[4*top_xy + 2];
1025
+                ref_cache[2 - 1*8]=
1026
+                ref_cache[3 - 1*8]= ref[4*top_xy + 3];
1029 1027
             }else{
1030
-                AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
1031
-                AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
1028
+                AV_ZERO128(mv_cache[0 - 1*8]);
1029
+                AV_WN32A(&ref_cache[0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
1032 1030
             }
1033 1031
 
1034 1032
             if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
1035 1033
             for(i=0; i<2; i++){
1036
-                int cache_idx = scan8[0] - 1 + i*2*8;
1037
-                if(USES_LIST(left_type[i], list)){
1038
-                    const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
1039
-                    const int b8_xy= 4*left_xy[i] + 1;
1040
-                    AV_COPY32(h->mv_cache[list][cache_idx  ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]);
1041
-                    AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]);
1042
-                        h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + (left_block[0+i*2]&~1)];
1043
-                        h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + (left_block[1+i*2]&~1)];
1034
+                int cache_idx = -1 + i*2*8;
1035
+                if(USES_LIST(left_type[LEFT(i)], list)){
1036
+                    const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3;
1037
+                    const int b8_xy= 4*left_xy[LEFT(i)] + 1;
1038
+                    AV_COPY32(mv_cache[cache_idx  ], mv[b_xy + b_stride*left_block[0+i*2]]);
1039
+                    AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]);
1040
+                    ref_cache[cache_idx  ]= ref[b8_xy + (left_block[0+i*2]&~1)];
1041
+                    ref_cache[cache_idx+8]= ref[b8_xy + (left_block[1+i*2]&~1)];
1044 1042
                 }else{
1045
-                    AV_ZERO32(h->mv_cache [list][cache_idx  ]);
1046
-                    AV_ZERO32(h->mv_cache [list][cache_idx+8]);
1047
-                    h->ref_cache[list][cache_idx  ]=
1048
-                    h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1043
+                    AV_ZERO32(mv_cache[cache_idx  ]);
1044
+                    AV_ZERO32(mv_cache[cache_idx+8]);
1045
+                    ref_cache[cache_idx  ]=
1046
+                    ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1049 1047
                 }
1050 1048
             }
1051 1049
             }else{
1052
-                if(USES_LIST(left_type[0], list)){
1053
-                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1054
-                    const int b8_xy= 4*left_xy[0] + 1;
1055
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]);
1056
-                    h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_index[list][b8_xy + (left_block[0]&~1)];
1050
+                if(USES_LIST(left_type[LTOP], list)){
1051
+                    const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
1052
+                    const int b8_xy= 4*left_xy[LTOP] + 1;
1053
+                    AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]);
1054
+                    ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)];
1057 1055
                 }else{
1058
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1]);
1059
-                    h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1056
+                    AV_ZERO32(mv_cache[-1]);
1057
+                    ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1060 1058
                 }
1061 1059
             }
1062 1060
 
1063 1061
             if(USES_LIST(topright_type, list)){
1064
-                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
1065
-                AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]);
1066
-                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][4*topright_xy + 2];
1062
+                const int b_xy= h->mb2b_xy[topright_xy] + 3*b_stride;
1063
+                AV_COPY32(mv_cache[4 - 1*8], mv[b_xy]);
1064
+                ref_cache[4 - 1*8]= ref[4*topright_xy + 2];
1067 1065
             }else{
1068
-                AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]);
1069
-                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1066
+                AV_ZERO32(mv_cache[4 - 1*8]);
1067
+                ref_cache[4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1070 1068
             }
1071
-            if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){
1069
+            if(ref_cache[4 - 1*8] < 0){
1072 1070
                 if(USES_LIST(topleft_type, list)){
1073
-                    const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride);
1071
+                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride);
1074 1072
                     const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2);
1075
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]);
1076
-                    h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
1073
+                    AV_COPY32(mv_cache[-1 - 1*8], mv[b_xy]);
1074
+                    ref_cache[-1 - 1*8]= ref[b8_xy];
1077 1075
                 }else{
1078
-                    AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]);
1079
-                    h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1076
+                    AV_ZERO32(mv_cache[-1 - 1*8]);
1077
+                    ref_cache[-1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1080 1078
                 }
1081 1079
             }
1082 1080
 
1083 1081
             if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
1084 1082
                 continue;
1085 1083
 
1086
-            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) {
1087
-            h->ref_cache[list][scan8[4 ]] =
1088
-            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
1089
-            AV_ZERO32(h->mv_cache [list][scan8[4 ]]);
1090
-            AV_ZERO32(h->mv_cache [list][scan8[12]]);
1091
-
1092
-            if( CABAC ) {
1093
-                /* XXX beurk, Load mvd */
1094
-                if(USES_LIST(top_type, list)){
1095
-                    const int b_xy= h->mb2br_xy[top_xy];
1096
-                    AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
1097
-                }else{
1098
-                    AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
1099
-                }
1100
-                if(USES_LIST(left_type[0], list)){
1101
-                    const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
1102
-                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]);
1103
-                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]);
1104
-                }else{
1105
-                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
1106
-                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
1107
-                }
1108
-                if(USES_LIST(left_type[1], list)){
1109
-                    const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
1110
-                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]);
1111
-                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]);
1112
-                }else{
1113
-                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
1114
-                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
1115
-                }
1116
-                AV_ZERO16(h->mvd_cache [list][scan8[4 ]]);
1117
-                AV_ZERO16(h->mvd_cache [list][scan8[12]]);
1118
-                if(h->slice_type_nos == AV_PICTURE_TYPE_B){
1119
-                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);
1120
-
1121
-                    if(IS_DIRECT(top_type)){
1122
-                        AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
1123
-                    }else if(IS_8X8(top_type)){
1124
-                        int b8_xy = 4*top_xy;
1125
-                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy + 2];
1126
-                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 3];
1084
+            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))){
1085
+                uint8_t (*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]];
1086
+                uint8_t (*mvd)[2] = h->mvd_table[list];
1087
+                ref_cache[2+8*0] =
1088
+                ref_cache[2+8*2] = PART_NOT_AVAILABLE;
1089
+                AV_ZERO32(mv_cache[2+8*0]);
1090
+                AV_ZERO32(mv_cache[2+8*2]);
1091
+
1092
+                if( CABAC ) {
1093
+                    if(USES_LIST(top_type, list)){
1094
+                        const int b_xy= h->mb2br_xy[top_xy];
1095
+                        AV_COPY64(mvd_cache[0 - 1*8], mvd[b_xy + 0]);
1127 1096
                     }else{
1128
-                        AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1));
1097
+                        AV_ZERO64(mvd_cache[0 - 1*8]);
1098
+                    }
1099
+                    if(USES_LIST(left_type[LTOP], list)){
1100
+                        const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6;
1101
+                        AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]);
1102
+                        AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]);
1103
+                    }else{
1104
+                        AV_ZERO16(mvd_cache[-1 + 0*8]);
1105
+                        AV_ZERO16(mvd_cache[-1 + 1*8]);
1106
+                    }
1107
+                    if(USES_LIST(left_type[LBOT], list)){
1108
+                        const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6;
1109
+                        AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]);
1110
+                        AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]);
1111
+                    }else{
1112
+                        AV_ZERO16(mvd_cache[-1 + 2*8]);
1113
+                        AV_ZERO16(mvd_cache[-1 + 3*8]);
1114
+                    }
1115
+                    AV_ZERO16(mvd_cache[2+8*0]);
1116
+                    AV_ZERO16(mvd_cache[2+8*2]);
1117
+                    if(h->slice_type_nos == AV_PICTURE_TYPE_B){
1118
+                        uint8_t *direct_cache = &h->direct_cache[scan8[0]];
1119
+                        uint8_t *direct_table = h->direct_table;
1120
+                        fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16>>1, 1);
1121
+
1122
+                        if(IS_DIRECT(top_type)){
1123
+                            AV_WN32A(&direct_cache[-1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
1124
+                        }else if(IS_8X8(top_type)){
1125
+                            int b8_xy = 4*top_xy;
1126
+                            direct_cache[0 - 1*8]= direct_table[b8_xy + 2];
1127
+                            direct_cache[2 - 1*8]= direct_table[b8_xy + 3];
1128
+                        }else{
1129
+                            AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1));
1130
+                        }
1131
+
1132
+                        if(IS_DIRECT(left_type[LTOP]))
1133
+                            direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1;
1134
+                        else if(IS_8X8(left_type[LTOP]))
1135
+                            direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)];
1136
+                        else
1137
+                            direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1;
1138
+
1139
+                        if(IS_DIRECT(left_type[LBOT]))
1140
+                            direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1;
1141
+                        else if(IS_8X8(left_type[LBOT]))
1142
+                            direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)];
1143
+                        else
1144
+                            direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1;
1129 1145
                     }
1130
-
1131
-                    if(IS_DIRECT(left_type[0]))
1132
-                        h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
1133
-                    else if(IS_8X8(left_type[0]))
1134
-                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)];
1135
-                    else
1136
-                        h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1;
1137
-
1138
-                    if(IS_DIRECT(left_type[1]))
1139
-                        h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1;
1140
-                    else if(IS_8X8(left_type[1]))
1141
-                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)];
1142
-                    else
1143
-                        h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1;
1144 1146
                 }
1145 1147
             }
1146
-            }
1147 1148
             if(FRAME_MBAFF){
1148 1149
 #define MAP_MVS\
1149 1150
                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
... ...
@@ -1152,10 +1160,10 @@ static void fill_decode_caches(H264Context *h, int mb_type){
1152 1152
                     MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
1153 1153
                     MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
1154 1154
                     MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
1155
-                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
1156
-                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
1157
-                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
1158
-                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
1155
+                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\
1156
+                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\
1157
+                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\
1158
+                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT])
1159 1159
                 if(MB_FIELD){
1160 1160
 #define MAP_F2F(idx, mb_type)\
1161 1161
                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
... ...
@@ -1179,13 +1187,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){
1179 1179
         }
1180 1180
     }
1181 1181
 
1182
-        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
1182
+        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
1183 1183
 }
1184 1184
 
1185 1185
 /**
1186 1186
  * gets the predicted intra4x4 prediction mode.
1187 1187
  */
1188
-static inline int pred_intra_mode(H264Context *h, int n){
1188
+static av_always_inline int pred_intra_mode(H264Context *h, int n){
1189 1189
     const int index8= scan8[n];
1190 1190
     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1191 1191
     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
... ...
@@ -1197,69 +1205,83 @@ static inline int pred_intra_mode(H264Context *h, int n){
1197 1197
     else      return min;
1198 1198
 }
1199 1199
 
1200
-static inline void write_back_non_zero_count(H264Context *h){
1201
-    const int mb_xy= h->mb_xy;
1200
+static av_always_inline void write_back_intra_pred_mode(H264Context *h){
1201
+    int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
1202
+    int8_t *i4x4_cache= h->intra4x4_pred_mode_cache;
1203
+
1204
+    AV_COPY32(i4x4, i4x4_cache + 4 + 8*4);
1205
+    i4x4[4]= i4x4_cache[7+8*3];
1206
+    i4x4[5]= i4x4_cache[7+8*2];
1207
+    i4x4[6]= i4x4_cache[7+8*1];
1208
+}
1202 1209
 
1203
-    AV_COPY32(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[4+8* 1]);
1204
-    AV_COPY32(&h->non_zero_count[mb_xy][ 4], &h->non_zero_count_cache[4+8* 2]);
1205
-    AV_COPY32(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[4+8* 3]);
1206
-    AV_COPY32(&h->non_zero_count[mb_xy][12], &h->non_zero_count_cache[4+8* 4]);
1207
-    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[4+8* 6]);
1208
-    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8* 7]);
1209
-    AV_COPY32(&h->non_zero_count[mb_xy][32], &h->non_zero_count_cache[4+8*11]);
1210
-    AV_COPY32(&h->non_zero_count[mb_xy][36], &h->non_zero_count_cache[4+8*12]);
1210
+static av_always_inline void write_back_non_zero_count(H264Context *h){
1211
+    const int mb_xy= h->mb_xy;
1212
+    uint8_t *nnz = h->non_zero_count[mb_xy];
1213
+    uint8_t *nnz_cache = h->non_zero_count_cache;
1214
+
1215
+    AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]);
1216
+    AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]);
1217
+    AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]);
1218
+    AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]);
1219
+    AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]);
1220
+    AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]);
1221
+    AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
1222
+    AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
1211 1223
 
1212 1224
     if(CHROMA444){
1213
-        AV_COPY32(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[4+8* 8]);
1214
-        AV_COPY32(&h->non_zero_count[mb_xy][28], &h->non_zero_count_cache[4+8* 9]);
1215
-        AV_COPY32(&h->non_zero_count[mb_xy][40], &h->non_zero_count_cache[4+8*13]);
1216
-        AV_COPY32(&h->non_zero_count[mb_xy][44], &h->non_zero_count_cache[4+8*14]);
1225
+        AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
1226
+        AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
1227
+        AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
1228
+        AV_COPY32(&nnz[44], &nnz_cache[4+8*14]);
1217 1229
     }
1218 1230
 }
1219 1231
 
1220
-static inline void write_back_motion(H264Context *h, int mb_type){
1232
+static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride,
1233
+                                                    int b_xy, int b8_xy, int mb_type, int list )
1234
+{
1235
+    int16_t (*mv_dst)[2] = &s->current_picture.motion_val[list][b_xy];
1236
+    int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]];
1237
+    AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0);
1238
+    AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1);
1239
+    AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2);
1240
+    AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3);
1241
+    if( CABAC ) {
1242
+        uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
1243
+        uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
1244
+        if(IS_SKIP(mb_type))
1245
+            AV_ZERO128(mvd_dst);
1246
+        else{
1247
+            AV_COPY64(mvd_dst, mvd_src + 8*3);
1248
+            AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
1249
+            AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
1250
+            AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
1251
+        }
1252
+    }
1253
+
1254
+    {
1255
+        int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1256
+        int8_t *ref_cache = h->ref_cache[list];
1257
+        ref_index[0+0*2]= ref_cache[scan8[0]];
1258
+        ref_index[1+0*2]= ref_cache[scan8[4]];
1259
+        ref_index[0+1*2]= ref_cache[scan8[8]];
1260
+        ref_index[1+1*2]= ref_cache[scan8[12]];
1261
+    }
1262
+}
1263
+
1264
+static av_always_inline void write_back_motion(H264Context *h, int mb_type){
1221 1265
     MpegEncContext * const s = &h->s;
1266
+    const int b_stride = h->b_stride;
1222 1267
     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
1223 1268
     const int b8_xy= 4*h->mb_xy;
1224
-    int list;
1225 1269
 
1226
-    if(!USES_LIST(mb_type, 0))
1270
+    if(USES_LIST(mb_type, 0)){
1271
+        write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0);
1272
+    }else{
1227 1273
         fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
1228
-
1229
-    for(list=0; list<h->list_count; list++){
1230
-        int y, b_stride;
1231
-        int16_t (*mv_dst)[2];
1232
-        int16_t (*mv_src)[2];
1233
-
1234
-        if(!USES_LIST(mb_type, list))
1235
-            continue;
1236
-
1237
-        b_stride = h->b_stride;
1238
-        mv_dst   = &s->current_picture.motion_val[list][b_xy];
1239
-        mv_src   = &h->mv_cache[list][scan8[0]];
1240
-        for(y=0; y<4; y++){
1241
-            AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
1242
-        }
1243
-        if( CABAC ) {
1244
-            uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
1245
-            uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
1246
-            if(IS_SKIP(mb_type))
1247
-                AV_ZERO128(mvd_dst);
1248
-            else{
1249
-            AV_COPY64(mvd_dst, mvd_src + 8*3);
1250
-                AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
1251
-                AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
1252
-                AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
1253
-            }
1254
-        }
1255
-
1256
-        {
1257
-            int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1258
-            ref_index[0+0*2]= h->ref_cache[list][scan8[0]];
1259
-            ref_index[1+0*2]= h->ref_cache[list][scan8[4]];
1260
-            ref_index[0+1*2]= h->ref_cache[list][scan8[8]];
1261
-            ref_index[1+1*2]= h->ref_cache[list][scan8[12]];
1262
-        }
1274
+    }
1275
+    if(USES_LIST(mb_type, 1)){
1276
+        write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1);
1263 1277
     }
1264 1278
 
1265 1279
     if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){
... ...
@@ -1272,7 +1294,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
1272 1272
     }
1273 1273
 }
1274 1274
 
1275
-static inline int get_dct8x8_allowed(H264Context *h){
1275
+static av_always_inline int get_dct8x8_allowed(H264Context *h){
1276 1276
     if(h->sps.direct_8x8_inference_flag)
1277 1277
         return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
1278 1278
     else
... ...
@@ -1296,9 +1296,9 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
1296 1296
 
1297 1297
     if(intra_slice){
1298 1298
         int ctx=0;
1299
-        if( h->left_type[0] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
1299
+        if( h->left_type[LTOP] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
1300 1300
             ctx++;
1301
-        if( h->top_type     & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
1301
+        if( h->top_type        & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
1302 1302
             ctx++;
1303 1303
         if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
1304 1304
             return 0;   /* I4x4 */
... ...
@@ -1376,10 +1376,10 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
1376 1376
     int ctx = 0;
1377 1377
 
1378 1378
     /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
1379
-    if( h->left_type[0] && h->chroma_pred_mode_table[mba_xy] != 0 )
1379
+    if( h->left_type[LTOP] && h->chroma_pred_mode_table[mba_xy] != 0 )
1380 1380
         ctx++;
1381 1381
 
1382
-    if( h->top_type     && h->chroma_pred_mode_table[mbb_xy] != 0 )
1382
+    if( h->top_type        && h->chroma_pred_mode_table[mbb_xy] != 0 )
1383 1383
         ctx++;
1384 1384
 
1385 1385
     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
... ...
@@ -1881,7 +1881,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
1881 1881
         int ctx = 0;
1882 1882
         assert(h->slice_type_nos == AV_PICTURE_TYPE_B);
1883 1883
 
1884
-        if( !IS_DIRECT( h->left_type[0]-1 ) )
1884
+        if( !IS_DIRECT( h->left_type[LTOP]-1 ) )
1885 1885
             ctx++;
1886 1886
         if( !IS_DIRECT( h->top_type-1 ) )
1887 1887
             ctx++;
... ...
@@ -2000,7 +2000,7 @@ decode_intra_mb:
2000 2000
                 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
2001 2001
                 }
2002 2002
             }
2003
-            ff_h264_write_back_intra_pred_mode(h);
2003
+            write_back_intra_pred_mode(h);
2004 2004
             if( ff_h264_check_intra4x4_pred_mode(h) < 0 ) return -1;
2005 2005
         } else {
2006 2006
             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode );
... ...
@@ -2249,21 +2249,22 @@ decode_intra_mb:
2249 2249
      * the transform mode of the current macroblock there. */
2250 2250
     if (CHROMA444 && IS_8x8DCT(mb_type)){
2251 2251
         int i;
2252
+        uint8_t *nnz_cache = h->non_zero_count_cache;
2252 2253
         for (i = 0; i < 2; i++){
2253
-            if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){
2254
-                h->non_zero_count_cache[3+8* 1 + 2*8*i]=
2255
-                h->non_zero_count_cache[3+8* 2 + 2*8*i]=
2256
-                h->non_zero_count_cache[3+8* 6 + 2*8*i]=
2257
-                h->non_zero_count_cache[3+8* 7 + 2*8*i]=
2258
-                h->non_zero_count_cache[3+8*11 + 2*8*i]=
2259
-                h->non_zero_count_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
2254
+            if (h->left_type[LEFT(i)] && !IS_8x8DCT(h->left_type[LEFT(i)])){
2255
+                nnz_cache[3+8* 1 + 2*8*i]=
2256
+                nnz_cache[3+8* 2 + 2*8*i]=
2257
+                nnz_cache[3+8* 6 + 2*8*i]=
2258
+                nnz_cache[3+8* 7 + 2*8*i]=
2259
+                nnz_cache[3+8*11 + 2*8*i]=
2260
+                nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
2260 2261
             }
2261 2262
         }
2262 2263
         if (h->top_type && !IS_8x8DCT(h->top_type)){
2263 2264
             uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
2264
-            AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
2265
-            AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
2266
-            AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
2265
+            AV_WN32A(&nnz_cache[4+8* 0], top_empty);
2266
+            AV_WN32A(&nnz_cache[4+8* 5], top_empty);
2267
+            AV_WN32A(&nnz_cache[4+8*10], top_empty);
2267 2268
         }
2268 2269
     }
2269 2270
     s->current_picture.mb_type[mb_xy]= mb_type;
... ...
@@ -731,7 +731,7 @@ decode_intra_mb:
731 731
                 else
732 732
                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
733 733
             }
734
-            ff_h264_write_back_intra_pred_mode(h);
734
+            write_back_intra_pred_mode(h);
735 735
             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
736 736
                 return -1;
737 737
         }else{
... ...
@@ -215,19 +215,20 @@ static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t
215 215
 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
216 216
     MpegEncContext * const s = &h->s;
217 217
     int mb_xy;
218
-    int mb_type, left_type;
218
+    int mb_type, left_type, top_type;
219 219
     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
220 220
     int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
221 221
     int chroma444 = CHROMA444;
222 222
 
223 223
     mb_xy = h->mb_xy;
224 224
 
225
-    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
225
+    if(!h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
226 226
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
227 227
         return;
228 228
     }
229 229
     assert(!FRAME_MBAFF);
230
-    left_type= h->left_type[0];
230
+    left_type= h->left_type[LTOP];
231
+    top_type= h->top_type;
231 232
 
232 233
     mb_type = s->current_picture.mb_type[mb_xy];
233 234
     qp = s->current_picture.qscale_table[mb_xy];
... ...
@@ -253,13 +254,17 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
253 253
             filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
254 254
         if( IS_8x8DCT(mb_type) ) {
255 255
             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
256
-            filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
256
+            if(top_type){
257
+                filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
258
+            }
257 259
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
258 260
         } else {
259 261
             filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h);
260 262
             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
261 263
             filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h);
262
-            filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
264
+            if(top_type){
265
+                filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
266
+            }
263 267
             filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h);
264 268
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
265 269
             filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
... ...
@@ -273,8 +278,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
273 273
                 if( IS_8x8DCT(mb_type) ) {
274 274
                     filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h);
275 275
                     filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
276
-                    filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
277
-                    filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
276
+                    if(top_type){
277
+                        filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
278
+                        filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
279
+                    }
278 280
                     filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
279 281
                     filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h);
280 282
                 } else {
... ...
@@ -284,8 +291,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
284 284
                     filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
285 285
                     filter_mb_edgev( &img_cb[4*3], linesize, bS3, qpc, h);
286 286
                     filter_mb_edgev( &img_cr[4*3], linesize, bS3, qpc, h);
287
-                    filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
288
-                    filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
287
+                    if(top_type){
288
+                        filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
289
+                        filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
290
+                    }
289 291
                     filter_mb_edgeh( &img_cb[4*1*linesize], linesize, bS3, qpc, h);
290 292
                     filter_mb_edgeh( &img_cr[4*1*linesize], linesize, bS3, qpc, h);
291 293
                     filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
... ...
@@ -300,9 +309,11 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
300 300
                 }
301 301
                 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
302 302
                 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
303
-                filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
303
+                if(top_type){
304
+                    filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
305
+                    filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
306
+                }
304 307
                 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
305
-                filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
306 308
                 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
307 309
             }
308 310
         }
... ...
@@ -318,7 +329,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
318 318
             AV_WN64A(bS[1][2], 0x0002000200020002ULL);
319 319
         } else {
320 320
             int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
321
-            int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
321
+            int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
322 322
             int step =  1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
323 323
             edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
324 324
             h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
... ...
@@ -326,7 +337,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
326 326
         }
327 327
         if( IS_INTRA(left_type) )
328 328
             AV_WN64A(bS[0][0], 0x0004000400040004ULL);
329
-        if( IS_INTRA(h->top_type) )
329
+        if( IS_INTRA(top_type) )
330 330
             AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL);
331 331
 
332 332
 #define FILTER(hv,dir,edge)\
... ...
@@ -345,16 +356,19 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
345 345
         if(left_type)
346 346
             FILTER(v,0,0);
347 347
         if( edges == 1 ) {
348
-            FILTER(h,1,0);
348
+            if(top_type)
349
+                FILTER(h,1,0);
349 350
         } else if( IS_8x8DCT(mb_type) ) {
350 351
             FILTER(v,0,2);
351
-            FILTER(h,1,0);
352
+            if(top_type)
353
+                FILTER(h,1,0);
352 354
             FILTER(h,1,2);
353 355
         } else {
354 356
             FILTER(v,0,1);
355 357
             FILTER(v,0,2);
356 358
             FILTER(v,0,3);
357
-            FILTER(h,1,0);
359
+            if(top_type)
360
+                FILTER(h,1,0);
358 361
             FILTER(h,1,1);
359 362
             FILTER(h,1,2);
360 363
             FILTER(h,1,3);
... ...
@@ -397,7 +411,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
397 397
     int edge;
398 398
     int chroma_qp_avg[2];
399 399
     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
400
-    const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type;
400
+    const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
401 401
 
402 402
     // how often to recheck mv-based bS when iterating between edges
403 403
     static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1},
... ...
@@ -633,9 +647,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
633 633
 
634 634
     if (FRAME_MBAFF
635 635
             // and current and left pair do not have the same interlaced type
636
-            && IS_INTERLACED(mb_type^h->left_type[0])
636
+            && IS_INTERLACED(mb_type^h->left_type[LTOP])
637 637
             // and left mb is in available to us
638
-            && h->left_type[0]) {
638
+            && h->left_type[LTOP]) {
639 639
         /* First vertical edge is different in MBAFF frames
640 640
          * There are 8 different bS to compute and 2 different Qp
641 641
          */
... ...
@@ -663,8 +677,8 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
663 663
             const uint8_t *off= offset[MB_FIELD][mb_y&1];
664 664
             for( i = 0; i < 8; i++ ) {
665 665
                 int j= MB_FIELD ? i>>2 : i&1;
666
-                int mbn_xy = h->left_mb_xy[j];
667
-                int mbn_type= h->left_type[j];
666
+                int mbn_xy = h->left_mb_xy[LEFT(j)];
667
+                int mbn_type= h->left_type[LEFT(j)];
668 668
 
669 669
                 if( IS_INTRA( mbn_type ) )
670 670
                     bS[i] = 4;
... ...
@@ -64,7 +64,6 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
64 64
             if(!MB_FIELD
65 65
                && IS_INTERLACED(h->left_type[0])){
66 66
                 SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5));
67
-                assert(h->left_mb_xy[0] == h->left_mb_xy[1]);
68 67
             }
69 68
             if(MB_FIELD
70 69
                && !IS_INTERLACED(h->left_type[0])){
... ...
@@ -237,7 +237,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
237 237
 }
238 238
 /**
239 239
  * IDCT transforms the 16 dc values and dequantizes them.
240
- * @param qp quantization parameter
241 240
  */
242 241
 void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, int qmul){
243 242
 #define stride 16
... ...
@@ -30,6 +30,7 @@
30 30
 //#define DEBUG
31 31
 #include <limits.h>
32 32
 
33
+#include "libavutil/mathematics.h"
33 34
 #include "dsputil.h"
34 35
 #include "avcodec.h"
35 36
 #include "mpegvideo.h"
... ...
@@ -149,7 +149,7 @@ static int estimate_best_order(double *ref, int min_order, int max_order)
149 149
 /**
150 150
  * Calculate LPC coefficients for multiple orders
151 151
  *
152
- * @param use_lpc LPC method for determining coefficients
152
+ * @param lpc_type LPC method for determining coefficients
153 153
  * 0  = LPC with fixed pre-defined coeffs
154 154
  * 1  = LPC with coeffs determined by Levinson-Durbin recursion
155 155
  * 2+ = LPC with coeffs determined by Cholesky factorization using (use_lpc-1) passes.
... ...
@@ -1505,29 +1505,26 @@ eoi_parser:
1505 1505
                         av_log(avctx, AV_LOG_WARNING, "Found EOI before any SOF, ignoring\n");
1506 1506
                         break;
1507 1507
                     }
1508
-                    {
1509
-                        if (s->interlaced) {
1510
-                            s->bottom_field ^= 1;
1511
-                            /* if not bottom field, do not output image yet */
1512
-                            if (s->bottom_field == !s->interlace_polarity)
1513
-                                goto not_the_end;
1514
-                        }
1515
-                        *picture = *s->picture_ptr;
1516
-                        *data_size = sizeof(AVFrame);
1517
-
1518
-                        if(!s->lossless){
1519
-                            picture->quality= FFMAX3(s->qscale[0], s->qscale[1], s->qscale[2]);
1520
-                            picture->qstride= 0;
1521
-                            picture->qscale_table= s->qscale_table;
1522
-                            memset(picture->qscale_table, picture->quality, (s->width+15)/16);
1523
-                            if(avctx->debug & FF_DEBUG_QP)
1524
-                                av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
1525
-                            picture->quality*= FF_QP2LAMBDA;
1526
-                        }
1527
-
1528
-                        goto the_end;
1508
+                    if (s->interlaced) {
1509
+                        s->bottom_field ^= 1;
1510
+                        /* if not bottom field, do not output image yet */
1511
+                        if (s->bottom_field == !s->interlace_polarity)
1512
+                            goto not_the_end;
1529 1513
                     }
1530
-                    break;
1514
+                    *picture = *s->picture_ptr;
1515
+                    *data_size = sizeof(AVFrame);
1516
+
1517
+                    if(!s->lossless){
1518
+                        picture->quality= FFMAX3(s->qscale[0], s->qscale[1], s->qscale[2]);
1519
+                        picture->qstride= 0;
1520
+                        picture->qscale_table= s->qscale_table;
1521
+                        memset(picture->qscale_table, picture->quality, (s->width+15)/16);
1522
+                        if(avctx->debug & FF_DEBUG_QP)
1523
+                            av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
1524
+                        picture->quality*= FF_QP2LAMBDA;
1525
+                    }
1526
+
1527
+                    goto the_end;
1531 1528
                 case SOS:
1532 1529
                     if (!s->got_picture) {
1533 1530
                         av_log(avctx, AV_LOG_WARNING, "Can not process SOS before SOF, skipping\n");
... ...
@@ -993,7 +993,7 @@ static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dm
993 993
 }
994 994
 
995 995
 /**
996
-   @param P[10][2] a list of candidate mvs to check before starting the
996
+   @param P a list of candidate mvs to check before starting the
997 997
    iterative search. If one of the candidates is close to the optimal mv, then
998 998
    it takes fewer iterations. And it increases the chance that we find the
999 999
    optimal mv.
... ...
@@ -1965,8 +1965,6 @@ static int slice_decode_thread(AVCodecContext *c, void *arg){
1965 1965
         if(mb_y < 0 || mb_y >= s->end_mb_y)
1966 1966
             return -1;
1967 1967
     }
1968
-
1969
-    return 0; //not reached
1970 1968
 }
1971 1969
 
1972 1970
 /**
... ...
@@ -28,6 +28,7 @@
28 28
  */
29 29
 
30 30
 #include "libavutil/intmath.h"
31
+#include "libavutil/mathematics.h"
31 32
 #include "avcodec.h"
32 33
 #include "dsputil.h"
33 34
 #include "mpegvideo.h"
... ...
@@ -440,7 +440,6 @@ static int pcm_decode_frame(AVCodecContext *avctx,
440 440
         default:
441 441
             av_log(avctx, AV_LOG_ERROR, "PCM DVD unsupported sample depth\n");
442 442
             return -1;
443
-            break;
444 443
         }
445 444
         samples = (short *) dst_int32_t;
446 445
         break;
... ...
@@ -471,7 +471,6 @@ static int shorten_decode_frame(AVCodecContext *avctx,
471 471
                         s->cur_chan = 0;
472 472
                         goto frame_done;
473 473
                     }
474
-                    break;
475 474
                 }
476 475
                 break;
477 476
             case FN_VERBATIM:
... ...
@@ -489,11 +488,9 @@ static int shorten_decode_frame(AVCodecContext *avctx,
489 489
             case FN_QUIT:
490 490
                 *data_size = 0;
491 491
                 return buf_size;
492
-                break;
493 492
             default:
494 493
                 av_log(avctx, AV_LOG_ERROR, "unknown shorten function %d\n", cmd);
495 494
                 return -1;
496
-                break;
497 495
         }
498 496
     }
499 497
 frame_done:
... ...
@@ -33,42 +33,6 @@
33 33
 #undef NDEBUG
34 34
 #include <assert.h>
35 35
 
36
-static const int8_t quant3[256]={
37
- 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
53
-};
54
-static const int8_t quant3b[256]={
55
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-};
72 36
 static const int8_t quant3bA[256]={
73 37
  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 38
  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
... ...
@@ -87,153 +51,7 @@ static const int8_t quant3bA[256]={
87 87
  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 88
  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 89
 };
90
-static const int8_t quant5[256]={
91
- 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
107
-};
108
-static const int8_t quant7[256]={
109
- 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
112
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
--3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
123
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
124
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
125
-};
126
-static const int8_t quant9[256]={
127
- 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
128
- 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
142
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
143
-};
144
-static const int8_t quant11[256]={
145
- 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
146
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147
- 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
159
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
160
--4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
161
-};
162
-static const int8_t quant13[256]={
163
- 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
164
- 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
166
- 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
176
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
178
--4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179
-};
180 90
 
181
-#if 0 //64*cubic
182
-static const uint8_t obmc32[1024]={
183
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
184
-  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
185
-  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
186
-  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
187
-  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
188
-  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
189
-  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
190
-  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
191
-  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
192
-  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
193
-  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
194
-  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
195
-  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
196
-  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
197
-  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
198
-  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
199
-  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
200
-  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
201
-  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
202
-  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
203
-  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
204
-  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
205
-  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
206
-  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
207
-  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
208
-  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
209
-  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
210
-  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
211
-  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
212
-  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
213
-  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
214
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
215
-//error:0.000022
216
-};
217
-static const uint8_t obmc16[256]={
218
-  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
219
-  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
220
-  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
221
-  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
222
-  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
223
-  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
224
-  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
225
-  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
226
-  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
227
-  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
228
-  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
229
-  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
230
-  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
231
-  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
232
-  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
233
-  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
234
-//error:0.000033
235
-};
236
-#elif 1 // 64*linear
237 91
 static const uint8_t obmc32[1024]={
238 92
   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
239 93
   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
... ...
@@ -288,62 +106,6 @@ static const uint8_t obmc16[256]={
288 288
   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
289 289
 //error:0.000015
290 290
 };
291
-#else //64*cos
292
-static const uint8_t obmc32[1024]={
293
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
294
-  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
295
-  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
296
-  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
297
-  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
298
-  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
299
-  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
300
-  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
301
-  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
302
-  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
303
-  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
304
-  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
305
-  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
306
-  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
307
-  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
308
-  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
309
-  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
310
-  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
311
-  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
312
-  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
313
-  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
314
-  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
315
-  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
316
-  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
317
-  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
318
-  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
319
-  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
320
-  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
321
-  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
322
-  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
323
-  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
324
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
325
-//error:0.000022
326
-};
327
-static const uint8_t obmc16[256]={
328
-  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
329
-  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
330
-  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
331
-  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
332
-  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
333
-  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
334
-  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
335
-  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
336
-  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
337
-  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
338
-  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
339
-  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
340
-  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
341
-  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
342
-  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
343
-  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
344
-//error:0.000022
345
-};
346
-#endif /* 0 */
347 291
 
348 292
 //linear *64
349 293
 static const uint8_t obmc8[64]={
... ...
@@ -509,7 +271,6 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
509 509
     if(v){
510 510
         const int a= FFABS(v);
511 511
         const int e= av_log2(a);
512
-#if 1
513 512
         const int el= FFMIN(e, 10);
514 513
         put_rac(c, state+0, 0);
515 514
 
... ...
@@ -530,35 +291,6 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
530 530
 
531 531
         if(is_signed)
532 532
             put_rac(c, state+11 + el, v < 0); //11..21
533
-#else
534
-
535
-        put_rac(c, state+0, 0);
536
-        if(e<=9){
537
-            for(i=0; i<e; i++){
538
-                put_rac(c, state+1+i, 1);  //1..10
539
-            }
540
-            put_rac(c, state+1+i, 0);
541
-
542
-            for(i=e-1; i>=0; i--){
543
-                put_rac(c, state+22+i, (a>>i)&1); //22..31
544
-            }
545
-
546
-            if(is_signed)
547
-                put_rac(c, state+11 + e, v < 0); //11..21
548
-        }else{
549
-            for(i=0; i<e; i++){
550
-                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
551
-            }
552
-            put_rac(c, state+1+9, 0);
553
-
554
-            for(i=e-1; i>=0; i--){
555
-                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
556
-            }
557
-
558
-            if(is_signed)
559
-                put_rac(c, state+11 + 10, v < 0); //11..21
560
-        }
561
-#endif /* 1 */
562 533
     }else{
563 534
         put_rac(c, state+0, 1);
564 535
     }
... ...
@@ -789,14 +521,6 @@ static int alloc_blocks(SnowContext *s){
789 789
     return 0;
790 790
 }
791 791
 
792
-static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
793
-    uint8_t *bytestream= d->bytestream;
794
-    uint8_t *bytestream_start= d->bytestream_start;
795
-    *d= *s;
796
-    d->bytestream= bytestream;
797
-    d->bytestream_start= bytestream_start;
798
-}
799
-
800 792
 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
801 793
     const int w= s->b_width << s->block_max_depth;
802 794
     const int rem_depth= s->block_max_depth - level;
... ...
@@ -1323,40 +1047,6 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer
1323 1323
         block[3]= ptmp;
1324 1324
         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1325 1325
     }
1326
-#if 0
1327
-    for(y=0; y<b_h; y++){
1328
-        for(x=0; x<b_w; x++){
1329
-            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
1330
-            if(add) dst[x + y*dst_stride] += v;
1331
-            else    dst[x + y*dst_stride] -= v;
1332
-        }
1333
-    }
1334
-    for(y=0; y<b_h; y++){
1335
-        uint8_t *obmc2= obmc + (obmc_stride>>1);
1336
-        for(x=0; x<b_w; x++){
1337
-            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
1338
-            if(add) dst[x + y*dst_stride] += v;
1339
-            else    dst[x + y*dst_stride] -= v;
1340
-        }
1341
-    }
1342
-    for(y=0; y<b_h; y++){
1343
-        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1344
-        for(x=0; x<b_w; x++){
1345
-            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
1346
-            if(add) dst[x + y*dst_stride] += v;
1347
-            else    dst[x + y*dst_stride] -= v;
1348
-        }
1349
-    }
1350
-    for(y=0; y<b_h; y++){
1351
-        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1352
-        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1353
-        for(x=0; x<b_w; x++){
1354
-            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
1355
-            if(add) dst[x + y*dst_stride] += v;
1356
-            else    dst[x + y*dst_stride] -= v;
1357
-        }
1358
-    }
1359
-#else
1360 1326
     if(sliced){
1361 1327
         s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1362 1328
     }else{
... ...
@@ -1387,7 +1077,6 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer
1387 1387
             }
1388 1388
         }
1389 1389
     }
1390
-#endif /* 0 */
1391 1390
 }
1392 1391
 
1393 1392
 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
... ...
@@ -4008,6 +3697,7 @@ AVCodec ff_snow_encoder = {
4008 4008
 #undef printf
4009 4009
 
4010 4010
 #include "libavutil/lfg.h"
4011
+#include "libavutil/mathematics.h"
4011 4012
 
4012 4013
 int main(void){
4013 4014
     int width=256;
... ...
@@ -4042,27 +3732,6 @@ int main(void){
4042 4042
     for(i=0; i<width*height; i++)
4043 4043
         if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4044 4044
 
4045
-#if 0
4046
-    printf("testing AC coder\n");
4047
-    memset(s.header_state, 0, sizeof(s.header_state));
4048
-    ff_init_range_encoder(&s.c, buffer[0], 256*256);
4049
-    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4050
-
4051
-    for(i=-256; i<256; i++){
4052
-        put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4053
-    }
4054
-    ff_rac_terminate(&s.c);
4055
-
4056
-    memset(s.header_state, 0, sizeof(s.header_state));
4057
-    ff_init_range_decoder(&s.c, buffer[0], 256*256);
4058
-    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4059
-
4060
-    for(i=-256; i<256; i++){
4061
-        int j;
4062
-        j= get_symbol(&s.c, s.header_state, 1);
4063
-        if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4064
-    }
4065
-#endif
4066 4045
     {
4067 4046
     int level, orientation, x, y;
4068 4047
     int64_t errors[8][4];
... ...
@@ -4120,7 +3789,6 @@ int main(void){
4120 4120
             buf+=stride>>1;
4121 4121
 
4122 4122
             memset(buffer[0], 0, sizeof(int)*width*height);
4123
-#if 1
4124 4123
             for(y=0; y<height; y++){
4125 4124
                 for(x=0; x<width; x++){
4126 4125
                     int tab[4]={0,2,3,1};
... ...
@@ -4128,15 +3796,6 @@ int main(void){
4128 4128
                 }
4129 4129
             }
4130 4130
             ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4131
-#else
4132
-            for(y=0; y<h; y++){
4133
-                for(x=0; x<w; x++){
4134
-                    buf[x + y*stride  ]=169;
4135
-                    buf[x + y*stride-w]=64;
4136
-                }
4137
-            }
4138
-            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4139
-#endif
4140 4131
             for(y=0; y<height; y++){
4141 4132
                 for(x=0; x<width; x++){
4142 4133
                     int64_t d= buffer[0][x + y*width];
... ...
@@ -591,7 +591,7 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
591 591
             }
592 592
         }
593 593
 
594
-        ff_h264_write_back_intra_pred_mode(h);
594
+        write_back_intra_pred_mode(h);
595 595
 
596 596
         if (mb_type == 8) {
597 597
             ff_h264_check_intra4x4_pred_mode(h);
... ...
@@ -27,6 +27,7 @@
27 27
 
28 28
 #include "libavutil/avstring.h"
29 29
 #include "libavutil/crc.h"
30
+#include "libavutil/mathematics.h"
30 31
 #include "libavutil/pixdesc.h"
31 32
 #include "libavutil/audioconvert.h"
32 33
 #include "libavutil/imgutils.h"
... ...
@@ -1039,7 +1039,7 @@ static const uint8_t subpel_idx[3][8] = {
1039 1039
  * @param s VP8 decoding context
1040 1040
  * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1041 1041
  * @param dst target buffer for block data at block position
1042
- * @param src reference picture buffer at origin (0, 0)
1042
+ * @param ref reference picture buffer at origin (0, 0)
1043 1043
  * @param mv motion vector (relative to block position) to get pixel data from
1044 1044
  * @param x_off horizontal position of block from origin (0, 0)
1045 1045
  * @param y_off vertical position of block from origin (0, 0)
... ...
@@ -47,6 +47,7 @@ MMX-OBJS-$(HAVE_YASM)                  += x86/dsputil_yasm.o            \
47 47
                                           x86/fmtconvert.o              \
48 48
                                           x86/h264_chromamc.o           \
49 49
                                           x86/h264_chromamc_10bit.o     \
50
+                                          x86/h264_qpel_10bit.o         \
50 51
                                           $(YASM-OBJS-yes)
51 52
 
52 53
 MMX-OBJS-$(CONFIG_FFT)                 += x86/fft.o
... ...
@@ -2530,44 +2530,56 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2530 2530
                 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
2531 2531
             }
2532 2532
 
2533
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
2534
-            c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
2535
-            c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
2536
-            c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \
2537
-            c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \
2538
-            c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \
2539
-            c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \
2540
-            c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \
2541
-            c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \
2542
-            c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \
2543
-            c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \
2544
-            c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \
2545
-            c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \
2546
-            c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \
2547
-            c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \
2548
-            c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \
2549
-            c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU
2550
-
2551
-            SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2);
2552
-            SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2);
2553
-            SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2);
2554
-            SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2);
2555
-            SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
2556
-            SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
2533
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
2534
+            c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
2535
+            c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
2536
+            c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
2537
+            c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
2538
+            c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
2539
+            c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
2540
+            c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
2541
+            c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
2542
+            c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
2543
+            c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
2544
+            c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
2545
+            c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
2546
+            c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
2547
+            c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
2548
+            c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
2549
+            c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU
2550
+
2551
+            SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
2552
+            SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
2553
+            SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
2554
+            SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, );
2555
+            SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, );
2556
+            SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, );
2557 2557
 
2558 2558
             if (!high_bit_depth) {
2559
-            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
2560
-            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
2561
-            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
2562
-            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
2563
-            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
2564
-            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
2559
+            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, );
2560
+            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, );
2561
+            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, );
2562
+            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
2563
+            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, );
2564
+            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, );
2565 2565
             }
2566
+#if HAVE_YASM
2567
+            else if (bit_depth == 10) {
2568
+#if !ARCH_X86_64
2569
+                SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
2570
+                SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
2571
+                SET_QPEL_FUNCS(put_h264_qpel, 1, 8,  10_mmxext, ff_);
2572
+                SET_QPEL_FUNCS(avg_h264_qpel, 1, 8,  10_mmxext, ff_);
2573
+#endif
2574
+                SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
2575
+                SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
2576
+            }
2577
+#endif
2566 2578
 
2567
-            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
2568
-            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
2569
-            SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
2570
-            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
2579
+            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
2580
+            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
2581
+            SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
2582
+            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
2571 2583
 
2572 2584
 #if HAVE_YASM
2573 2585
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
... ...
@@ -2627,26 +2639,26 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2627 2627
                 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
2628 2628
             }
2629 2629
 
2630
-            SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
2631
-            SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
2632
-            SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
2633
-            SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow);
2634
-            SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
2635
-            SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
2630
+            SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
2631
+            SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
2632
+            SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
2633
+            SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, );
2634
+            SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, );
2635
+            SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, );
2636 2636
 
2637 2637
             if (!high_bit_depth) {
2638
-            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
2639
-            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
2640
-            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
2641
-            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
2642
-            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
2643
-            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
2638
+            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, );
2639
+            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, );
2640
+            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, );
2641
+            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, );
2642
+            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, );
2643
+            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, );
2644 2644
             }
2645 2645
 
2646
-            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
2647
-            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
2648
-            SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
2649
-            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
2646
+            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, );
2647
+            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
2648
+            SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
2649
+            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
2650 2650
 
2651 2651
 #if HAVE_YASM
2652 2652
             if (!high_bit_depth) {
... ...
@@ -2690,7 +2702,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2690 2690
             H264_QPEL_FUNCS(3, 3, sse2);
2691 2691
             }
2692 2692
 #if HAVE_YASM
2693
+#define H264_QPEL_FUNCS_10(x, y, CPU)\
2694
+            c->put_h264_qpel_pixels_tab[0][x+y*4] = ff_put_h264_qpel16_mc##x##y##_10_##CPU;\
2695
+            c->put_h264_qpel_pixels_tab[1][x+y*4] = ff_put_h264_qpel8_mc##x##y##_10_##CPU;\
2696
+            c->avg_h264_qpel_pixels_tab[0][x+y*4] = ff_avg_h264_qpel16_mc##x##y##_10_##CPU;\
2697
+            c->avg_h264_qpel_pixels_tab[1][x+y*4] = ff_avg_h264_qpel8_mc##x##y##_10_##CPU;
2693 2698
             if (bit_depth == 10) {
2699
+                SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
2700
+                SET_QPEL_FUNCS(put_h264_qpel, 1, 8,  10_sse2, ff_);
2701
+                SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
2702
+                SET_QPEL_FUNCS(avg_h264_qpel, 1, 8,  10_sse2, ff_);
2703
+                H264_QPEL_FUNCS_10(1, 0, sse2_cache64)
2704
+                H264_QPEL_FUNCS_10(2, 0, sse2_cache64)
2705
+                H264_QPEL_FUNCS_10(3, 0, sse2_cache64)
2706
+
2694 2707
                 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2;
2695 2708
                 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2;
2696 2709
             }
... ...
@@ -2712,6 +2737,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2712 2712
             H264_QPEL_FUNCS(3, 2, ssse3);
2713 2713
             H264_QPEL_FUNCS(3, 3, ssse3);
2714 2714
             }
2715
+            else if (bit_depth == 10) {
2716
+                H264_QPEL_FUNCS_10(1, 0, ssse3_cache64)
2717
+                H264_QPEL_FUNCS_10(2, 0, ssse3_cache64)
2718
+                H264_QPEL_FUNCS_10(3, 0, ssse3_cache64)
2719
+            }
2715 2720
 #if HAVE_YASM
2716 2721
             if (!high_bit_depth) {
2717 2722
             c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
... ...
@@ -2807,6 +2837,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2807 2807
 #if HAVE_AVX && HAVE_YASM
2808 2808
         if (mm_flags & AV_CPU_FLAG_AVX) {
2809 2809
             if (bit_depth == 10) {
2810
+                //AVX implies !cache64.
2811
+                //TODO: Port cache(32|64) detection from x264.
2812
+                H264_QPEL_FUNCS_10(1, 0, sse2)
2813
+                H264_QPEL_FUNCS_10(2, 0, sse2)
2814
+                H264_QPEL_FUNCS_10(3, 0, sse2)
2815
+
2810 2816
                 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx;
2811 2817
                 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx;
2812 2818
             }
2813 2819
new file mode 100644
... ...
@@ -0,0 +1,891 @@
0
+;*****************************************************************************
1
+;* MMX/SSE2/AVX-optimized 10-bit H.264 qpel code
2
+;*****************************************************************************
3
+;* Copyright (C) 2011 x264 project
4
+;*
5
+;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
6
+;*
7
+;* This file is part of Libav.
8
+;*
9
+;* Libav is free software; you can redistribute it and/or
10
+;* modify it under the terms of the GNU Lesser General Public
11
+;* License as published by the Free Software Foundation; either
12
+;* version 2.1 of the License, or (at your option) any later version.
13
+;*
14
+;* Libav is distributed in the hope that it will be useful,
15
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
+;* Lesser General Public License for more details.
18
+;*
19
+;* You should have received a copy of the GNU Lesser General Public
20
+;* License along with Libav; if not, write to the Free Software
21
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
+;******************************************************************************
23
+
24
+%include "x86inc.asm"
25
+%include "x86util.asm"
26
+
27
+SECTION_RODATA 32
28
+
29
+cextern pw_16
30
+cextern pw_1
31
+cextern pb_0
32
+
33
+pw_pixel_max: times 8 dw ((1 << 10)-1)
34
+
35
+pad10: times 8 dw 10*1023
36
+pad20: times 8 dw 20*1023
37
+pad30: times 8 dw 30*1023
38
+depad: times 4 dd 32*20*1023 + 512
39
+depad2: times 8 dw 20*1023 + 16*1022 + 16
40
+unpad: times 8 dw 16*1022/32 ; needs to be mod 16
41
+
42
+tap1: times 4 dw  1, -5
43
+tap2: times 4 dw 20, 20
44
+tap3: times 4 dw -5,  1
45
+pd_0f: times 4 dd 0xffff
46
+
47
+SECTION .text
48
+
49
+
50
+%macro AVG_MOV 2
51
+    pavgw %2, %1
52
+    mova  %1, %2
53
+%endmacro
54
+
55
+%macro ADDW 3
56
+%if mmsize == 8
57
+    paddw %1, %2
58
+%else
59
+    movu  %3, %2
60
+    paddw %1, %3
61
+%endif
62
+%endmacro
63
+
64
+%macro FILT_H 4
65
+    paddw  %1, %4
66
+    psubw  %1, %2  ; a-b
67
+    psraw  %1, 2   ; (a-b)/4
68
+    psubw  %1, %2  ; (a-b)/4-b
69
+    paddw  %1, %3  ; (a-b)/4-b+c
70
+    psraw  %1, 2   ; ((a-b)/4-b+c)/4
71
+    paddw  %1, %3  ; ((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16
72
+%endmacro
73
+
74
+%macro PRELOAD_V 0
75
+    lea      r3, [r2*3]
76
+    sub      r1, r3
77
+    movu     m0, [r1+r2]
78
+    movu     m1, [r1+r2*2]
79
+    add      r1, r3
80
+    movu     m2, [r1]
81
+    movu     m3, [r1+r2]
82
+    movu     m4, [r1+r2*2]
83
+    add      r1, r3
84
+%endmacro
85
+
86
+%macro FILT_V 8
87
+    movu     %6, [r1]
88
+    paddw    %1, %6
89
+    mova     %7, %2
90
+    paddw    %7, %5
91
+    mova     %8, %3
92
+    paddw    %8, %4
93
+    FILT_H   %1, %7, %8, [pw_16]
94
+    psraw    %1, 1
95
+    CLIPW    %1, [pb_0], [pw_pixel_max]
96
+%endmacro
97
+
98
+%macro MC 1
99
+%define OP_MOV mova
100
+INIT_MMX
101
+%1 mmxext, put, 4
102
+INIT_XMM
103
+%1 sse2  , put, 8
104
+
105
+%define OP_MOV AVG_MOV
106
+INIT_MMX
107
+%1 mmxext, avg, 4
108
+INIT_XMM
109
+%1 sse2  , avg, 8
110
+%endmacro
111
+
112
+%macro MCAxA 8
113
+%ifdef ARCH_X86_64
114
+%ifnidn %1,mmxext
115
+MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
116
+%endif
117
+%else
118
+MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
119
+%endif
120
+%endmacro
121
+
122
+%macro MCAxA_OP 8
123
+cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
124
+%ifdef ARCH_X86_32
125
+    call stub_%2_h264_qpel%4_%3_10_%1
126
+    mov  r0, r0m
127
+    mov  r1, r1m
128
+    add  r0, %4*2
129
+    add  r1, %4*2
130
+    call stub_%2_h264_qpel%4_%3_10_%1
131
+    mov  r0, r0m
132
+    mov  r1, r1m
133
+    lea  r0, [r0+r2*%4]
134
+    lea  r1, [r1+r2*%4]
135
+    call stub_%2_h264_qpel%4_%3_10_%1
136
+    mov  r0, r0m
137
+    mov  r1, r1m
138
+    lea  r0, [r0+r2*%4+%4*2]
139
+    lea  r1, [r1+r2*%4+%4*2]
140
+    call stub_%2_h264_qpel%4_%3_10_%1
141
+    RET
142
+%else ; ARCH_X86_64
143
+    mov r10, r0
144
+    mov r11, r1
145
+    call stub_%2_h264_qpel%4_%3_10_%1
146
+    lea  r0, [r10+%4*2]
147
+    lea  r1, [r11+%4*2]
148
+    call stub_%2_h264_qpel%4_%3_10_%1
149
+    lea  r0, [r10+r2*%4]
150
+    lea  r1, [r11+r2*%4]
151
+    call stub_%2_h264_qpel%4_%3_10_%1
152
+    lea  r0, [r10+r2*%4+%4*2]
153
+    lea  r1, [r11+r2*%4+%4*2]
154
+%ifndef UNIX64 ; fall through to function
155
+    call stub_%2_h264_qpel%4_%3_10_%1
156
+    RET
157
+%endif
158
+%endif
159
+%endmacro
160
+
161
+;cpu, put/avg, mc, 4/8, ...
162
+%macro cglobal_mc 7
163
+%assign i %4*2
164
+MCAxA %1, %2, %3, %4, i, %5,%6,%7
165
+
166
+cglobal %2_h264_qpel%4_%3_10_%1, %5,%6,%7
167
+%ifndef UNIX64 ; no prologue or epilogue for UNIX64
168
+    call stub_%2_h264_qpel%4_%3_10_%1
169
+    RET
170
+%endif
171
+
172
+stub_%2_h264_qpel%4_%3_10_%1:
173
+%endmacro
174
+
175
+;-----------------------------------------------------------------------------
176
+; void h264_qpel_mc00(uint8_t *dst, uint8_t *src, int stride)
177
+;-----------------------------------------------------------------------------
178
+%macro COPY4 0
179
+    movu          m0, [r1     ]
180
+    OP_MOV [r0     ], m0
181
+    movu          m0, [r1+r2  ]
182
+    OP_MOV [r0+r2  ], m0
183
+    movu          m0, [r1+r2*2]
184
+    OP_MOV [r0+r2*2], m0
185
+    movu          m0, [r1+r3  ]
186
+    OP_MOV [r0+r3  ], m0
187
+%endmacro
188
+
189
+%macro MC00 1
190
+INIT_MMX
191
+cglobal_mc mmxext, %1, mc00, 4, 3,4,0
192
+    lea           r3, [r2*3]
193
+    COPY4
194
+    ret
195
+
196
+INIT_XMM
197
+cglobal %1_h264_qpel8_mc00_10_sse2, 3,4
198
+    lea  r3, [r2*3]
199
+    COPY4
200
+    lea  r0, [r0+r2*4]
201
+    lea  r1, [r1+r2*4]
202
+    COPY4
203
+    RET
204
+
205
+cglobal %1_h264_qpel16_mc00_10_sse2, 3,4
206
+    mov r3d, 8
207
+.loop:
208
+    movu           m0, [r1      ]
209
+    movu           m1, [r1   +16]
210
+    OP_MOV [r0      ], m0
211
+    OP_MOV [r0   +16], m1
212
+    movu           m0, [r1+r2   ]
213
+    movu           m1, [r1+r2+16]
214
+    OP_MOV [r0+r2   ], m0
215
+    OP_MOV [r0+r2+16], m1
216
+    lea            r0, [r0+r2*2]
217
+    lea            r1, [r1+r2*2]
218
+    dec r3d
219
+    jg .loop
220
+    REP_RET
221
+%endmacro
222
+
223
+%define OP_MOV mova
224
+MC00 put
225
+
226
+%define OP_MOV AVG_MOV
227
+MC00 avg
228
+
229
+;-----------------------------------------------------------------------------
230
+; void h264_qpel_mc20(uint8_t *dst, uint8_t *src, int stride)
231
+;-----------------------------------------------------------------------------
232
+%macro MC_CACHE 1
233
+%define OP_MOV mova
234
+%define PALIGNR PALIGNR_MMX
235
+INIT_MMX
236
+%1 mmxext       , put, 4
237
+INIT_XMM
238
+%1 sse2_cache64 , put, 8
239
+%define PALIGNR PALIGNR_SSSE3
240
+%1 ssse3_cache64, put, 8
241
+%1 sse2         , put, 8, 0
242
+
243
+%define OP_MOV AVG_MOV
244
+%define PALIGNR PALIGNR_MMX
245
+INIT_MMX
246
+%1 mmxext       , avg, 4
247
+INIT_XMM
248
+%1 sse2_cache64 , avg, 8
249
+%define PALIGNR PALIGNR_SSSE3
250
+%1 ssse3_cache64, avg, 8
251
+%1 sse2         , avg, 8, 0
252
+%endmacro
253
+
254
+%macro MC20 3-4
255
+cglobal_mc %1, %2, mc20, %3, 3,4,9
256
+    mov     r3d, %3
257
+    mova     m1, [pw_pixel_max]
258
+%if num_mmregs > 8
259
+    mova     m8, [pw_16]
260
+    %define p16 m8
261
+%else
262
+    %define p16 [pw_16]
263
+%endif
264
+.nextrow
265
+%if %0 == 4
266
+    movu     m2, [r1-4]
267
+    movu     m3, [r1-2]
268
+    movu     m4, [r1+0]
269
+    ADDW     m2, [r1+6], m5
270
+    ADDW     m3, [r1+4], m5
271
+    ADDW     m4, [r1+2], m5
272
+%else ; movu is slow on these processors
273
+%if mmsize==16
274
+    movu     m2, [r1-4]
275
+    movu     m0, [r1+6]
276
+    mova     m6, m0
277
+    psrldq   m0, 6
278
+
279
+    paddw    m6, m2
280
+    PALIGNR  m3, m0, m2, 2, m5
281
+    PALIGNR  m7, m0, m2, 8, m5
282
+    paddw    m3, m7
283
+    PALIGNR  m4, m0, m2, 4, m5
284
+    PALIGNR  m7, m0, m2, 6, m5
285
+    paddw    m4, m7
286
+    SWAP      2, 6
287
+%else
288
+    movu     m2, [r1-4]
289
+    movu     m6, [r1+4]
290
+    PALIGNR  m3, m6, m2, 2, m5
291
+    paddw    m3, m6
292
+    PALIGNR  m4, m6, m2, 4, m5
293
+    PALIGNR  m7, m6, m2, 6, m5
294
+    paddw    m4, m7
295
+    paddw    m2, [r1+6]
296
+%endif
297
+%endif
298
+
299
+    FILT_H   m2, m3, m4, p16
300
+    psraw    m2, 1
301
+    pxor     m0, m0
302
+    CLIPW    m2, m0, m1
303
+    OP_MOV [r0], m2
304
+    add      r0, r2
305
+    add      r1, r2
306
+    dec     r3d
307
+    jg .nextrow
308
+    rep ret
309
+%endmacro
310
+
311
+MC_CACHE MC20
312
+
313
+;-----------------------------------------------------------------------------
314
+; void h264_qpel_mc30(uint8_t *dst, uint8_t *src, int stride)
315
+;-----------------------------------------------------------------------------
316
+%macro MC30 3-4
317
+cglobal_mc %1, %2, mc30, %3, 3,5,9
318
+    lea r4, [r1+2]
319
+    jmp stub_%2_h264_qpel%3_mc10_10_%1.body
320
+%endmacro
321
+
322
+MC_CACHE MC30
323
+
324
+;-----------------------------------------------------------------------------
325
+; void h264_qpel_mc10(uint8_t *dst, uint8_t *src, int stride)
326
+;-----------------------------------------------------------------------------
327
+%macro MC10 3-4
328
+cglobal_mc %1, %2, mc10, %3, 3,5,9
329
+    mov      r4, r1
330
+.body
331
+    mov     r3d, %3
332
+    mova     m1, [pw_pixel_max]
333
+%if num_mmregs > 8
334
+    mova     m8, [pw_16]
335
+    %define p16 m8
336
+%else
337
+    %define p16 [pw_16]
338
+%endif
339
+.nextrow
340
+%if %0 == 4
341
+    movu     m2, [r1-4]
342
+    movu     m3, [r1-2]
343
+    movu     m4, [r1+0]
344
+    ADDW     m2, [r1+6], m5
345
+    ADDW     m3, [r1+4], m5
346
+    ADDW     m4, [r1+2], m5
347
+%else ; movu is slow on these processors
348
+%if mmsize==16
349
+    movu     m2, [r1-4]
350
+    movu     m0, [r1+6]
351
+    mova     m6, m0
352
+    psrldq   m0, 6
353
+
354
+    paddw    m6, m2
355
+    PALIGNR  m3, m0, m2, 2, m5
356
+    PALIGNR  m7, m0, m2, 8, m5
357
+    paddw    m3, m7
358
+    PALIGNR  m4, m0, m2, 4, m5
359
+    PALIGNR  m7, m0, m2, 6, m5
360
+    paddw    m4, m7
361
+    SWAP      2, 6
362
+%else
363
+    movu     m2, [r1-4]
364
+    movu     m6, [r1+4]
365
+    PALIGNR  m3, m6, m2, 2, m5
366
+    paddw    m3, m6
367
+    PALIGNR  m4, m6, m2, 4, m5
368
+    PALIGNR  m7, m6, m2, 6, m5
369
+    paddw    m4, m7
370
+    paddw    m2, [r1+6]
371
+%endif
372
+%endif
373
+
374
+    FILT_H   m2, m3, m4, p16
375
+    psraw    m2, 1
376
+    pxor     m0, m0
377
+    CLIPW    m2, m0, m1
378
+    movu     m3, [r4]
379
+    pavgw    m2, m3
380
+    OP_MOV [r0], m2
381
+    add      r0, r2
382
+    add      r1, r2
383
+    add      r4, r2
384
+    dec     r3d
385
+    jg .nextrow
386
+    rep ret
387
+%endmacro
388
+
389
+MC_CACHE MC10
390
+
391
+;-----------------------------------------------------------------------------
392
+; void h264_qpel_mc02(uint8_t *dst, uint8_t *src, int stride)
393
+;-----------------------------------------------------------------------------
394
+%macro V_FILT 11
395
+v_filt%9_%10_10_%11:
396
+    add    r4, r2
397
+.no_addr4:
398
+    FILT_V m0, m1, m2, m3, m4, m5, m6, m7
399
+    add    r1, r2
400
+    add    r0, r2
401
+    ret
402
+%endmacro
403
+
404
+INIT_MMX
405
+RESET_MM_PERMUTATION
406
+%assign i 0
407
+%rep 4
408
+V_FILT m0, m1, m2, m3, m4, m5, m6, m7, 4, i, mmxext
409
+SWAP 0,1,2,3,4,5
410
+%assign i i+1
411
+%endrep
412
+
413
+INIT_XMM
414
+RESET_MM_PERMUTATION
415
+%assign i 0
416
+%rep 6
417
+V_FILT m0, m1, m2, m3, m4, m5, m6, m7, 8, i, sse2
418
+SWAP 0,1,2,3,4,5
419
+%assign i i+1
420
+%endrep
421
+
422
+%macro MC02 3
423
+cglobal_mc %1, %2, mc02, %3, 3,4,8
424
+    PRELOAD_V
425
+
426
+    sub      r0, r2
427
+%assign j 0
428
+%rep %3
429
+    %assign i (j % 6)
430
+    call v_filt%3_ %+ i %+ _10_%1.no_addr4
431
+    OP_MOV [r0], m0
432
+    SWAP 0,1,2,3,4,5
433
+    %assign j j+1
434
+%endrep
435
+    ret
436
+%endmacro
437
+
438
+MC MC02
439
+
440
+;-----------------------------------------------------------------------------
441
+; void h264_qpel_mc01(uint8_t *dst, uint8_t *src, int stride)
442
+;-----------------------------------------------------------------------------
443
+%macro MC01 3
444
+cglobal_mc %1, %2, mc01, %3, 3,5,8
445
+    mov      r4, r1
446
+.body
447
+    PRELOAD_V
448
+
449
+    sub      r4, r2
450
+    sub      r0, r2
451
+%assign j 0
452
+%rep %3
453
+    %assign i (j % 6)
454
+    call v_filt%3_ %+ i %+ _10_%1
455
+    movu     m7, [r4]
456
+    pavgw    m0, m7
457
+    OP_MOV [r0], m0
458
+    SWAP 0,1,2,3,4,5
459
+    %assign j j+1
460
+%endrep
461
+    ret
462
+%endmacro
463
+
464
+MC MC01
465
+
466
+;-----------------------------------------------------------------------------
467
+; void h264_qpel_mc03(uint8_t *dst, uint8_t *src, int stride)
468
+;-----------------------------------------------------------------------------
469
+%macro MC03 3
470
+cglobal_mc %1, %2, mc03, %3, 3,5,8
471
+    lea r4, [r1+r2]
472
+    jmp stub_%2_h264_qpel%3_mc01_10_%1.body
473
+%endmacro
474
+
475
+MC MC03
476
+
477
+;-----------------------------------------------------------------------------
478
+; void h264_qpel_mc11(uint8_t *dst, uint8_t *src, int stride)
479
+;-----------------------------------------------------------------------------
480
+%macro H_FILT_AVG 3-4
481
+h_filt%2_%3_10_%1:
482
+;FILT_H with fewer registers and averaged with the FILT_V result
483
+;m6,m7 are tmp registers, m0 is the FILT_V result, the rest are to be used next in the next iteration
484
+;unfortunately I need three registers, so m5 will have to be re-read from memory
485
+    movu     m5, [r4-4]
486
+    ADDW     m5, [r4+6], m7
487
+    movu     m6, [r4-2]
488
+    ADDW     m6, [r4+4], m7
489
+    paddw    m5, [pw_16]
490
+    psubw    m5, m6  ; a-b
491
+    psraw    m5, 2   ; (a-b)/4
492
+    psubw    m5, m6  ; (a-b)/4-b
493
+    movu     m6, [r4+0]
494
+    ADDW     m6, [r4+2], m7
495
+    paddw    m5, m6  ; (a-b)/4-b+c
496
+    psraw    m5, 2   ; ((a-b)/4-b+c)/4
497
+    paddw    m5, m6  ; ((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16
498
+    psraw    m5, 1
499
+    CLIPW    m5, [pb_0], [pw_pixel_max]
500
+;avg FILT_V, FILT_H
501
+    pavgw    m0, m5
502
+%if %0!=4
503
+    movu     m5, [r1+r5]
504
+%endif
505
+    ret
506
+%endmacro
507
+
508
+INIT_MMX
509
+RESET_MM_PERMUTATION
510
+%assign i 0
511
+%rep 3
512
+H_FILT_AVG mmxext, 4, i
513
+SWAP 0,1,2,3,4,5
514
+%assign i i+1
515
+%endrep
516
+H_FILT_AVG mmxext, 4, i, 0
517
+
518
+INIT_XMM
519
+RESET_MM_PERMUTATION
520
+%assign i 0
521
+%rep 6
522
+%if i==1
523
+H_FILT_AVG sse2,   8, i, 0
524
+%else
525
+H_FILT_AVG sse2,   8, i
526
+%endif
527
+SWAP 0,1,2,3,4,5
528
+%assign i i+1
529
+%endrep
530
+
531
+%macro MC11 3
532
+; this REALLY needs x86_64
533
+cglobal_mc %1, %2, mc11, %3, 3,6,8
534
+    mov      r4, r1
535
+.body
536
+    PRELOAD_V
537
+
538
+    sub      r0, r2
539
+    sub      r4, r2
540
+    mov      r5, r2
541
+    neg      r5
542
+%assign j 0
543
+%rep %3
544
+    %assign i (j % 6)
545
+    call v_filt%3_ %+ i %+ _10_%1
546
+    call h_filt%3_ %+ i %+ _10_%1
547
+%if %3==8 && i==1
548
+    movu     m5, [r1+r5]
549
+%endif
550
+    OP_MOV [r0], m0
551
+    SWAP 0,1,2,3,4,5
552
+    %assign j j+1
553
+%endrep
554
+    ret
555
+%endmacro
556
+
557
+MC MC11
558
+
559
+;-----------------------------------------------------------------------------
560
+; void h264_qpel_mc31(uint8_t *dst, uint8_t *src, int stride)
561
+;-----------------------------------------------------------------------------
562
+%macro MC31 3
563
+cglobal_mc %1, %2, mc31, %3, 3,6,8
564
+    mov r4, r1
565
+    add r1, 2
566
+    jmp stub_%2_h264_qpel%3_mc11_10_%1.body
567
+%endmacro
568
+
569
+MC MC31
570
+
571
+;-----------------------------------------------------------------------------
572
+; void h264_qpel_mc13(uint8_t *dst, uint8_t *src, int stride)
573
+;-----------------------------------------------------------------------------
574
+%macro MC13 3
575
+cglobal_mc %1, %2, mc13, %3, 3,7,12
576
+    lea r4, [r1+r2]
577
+    jmp stub_%2_h264_qpel%3_mc11_10_%1.body
578
+%endmacro
579
+
580
+MC MC13
581
+
582
+;-----------------------------------------------------------------------------
583
+; void h264_qpel_mc33(uint8_t *dst, uint8_t *src, int stride)
584
+;-----------------------------------------------------------------------------
585
+%macro MC33 3
586
+cglobal_mc %1, %2, mc33, %3, 3,6,8
587
+    lea r4, [r1+r2]
588
+    add r1, 2
589
+    jmp stub_%2_h264_qpel%3_mc11_10_%1.body
590
+%endmacro
591
+
592
+MC MC33
593
+
594
+;-----------------------------------------------------------------------------
595
+; void h264_qpel_mc22(uint8_t *dst, uint8_t *src, int stride)
596
+;-----------------------------------------------------------------------------
597
+%macro FILT_H2 3
598
+    psubw  %1, %2  ; a-b
599
+    psubw  %2, %3  ; b-c
600
+    psllw  %2, 2
601
+    psubw  %1, %2  ; a-5*b+4*c
602
+    psllw  %3, 4
603
+    paddw  %1, %3  ; a-5*b+20*c
604
+%endmacro
605
+
606
+%macro FILT_VNRD 8
607
+    movu     %6, [r1]
608
+    paddw    %1, %6
609
+    mova     %7, %2
610
+    paddw    %7, %5
611
+    mova     %8, %3
612
+    paddw    %8, %4
613
+    FILT_H2  %1, %7, %8
614
+%endmacro
615
+
616
+%macro HV 2
617
+%ifidn %1,sse2
618
+%define PAD 12
619
+%define COUNT 2
620
+%else
621
+%define PAD 0
622
+%define COUNT 3
623
+%endif
624
+put_hv%2_10_%1:
625
+    neg      r2           ; This actually saves instructions
626
+    lea      r1, [r1+r2*2-mmsize+PAD]
627
+    lea      r4, [rsp+PAD+gprsize]
628
+    mov     r3d, COUNT
629
+.v_loop:
630
+    movu     m0, [r1]
631
+    sub      r1, r2
632
+    movu     m1, [r1]
633
+    sub      r1, r2
634
+    movu     m2, [r1]
635
+    sub      r1, r2
636
+    movu     m3, [r1]
637
+    sub      r1, r2
638
+    movu     m4, [r1]
639
+    sub      r1, r2
640
+%assign i 0
641
+%rep %2-1
642
+    FILT_VNRD m0, m1, m2, m3, m4, m5, m6, m7
643
+    psubw    m0, [pad20]
644
+    movu     [r4+i*mmsize*3], m0
645
+    sub      r1, r2
646
+    SWAP 0,1,2,3,4,5
647
+%assign i i+1
648
+%endrep
649
+    FILT_VNRD m0, m1, m2, m3, m4, m5, m6, m7
650
+    psubw    m0, [pad20]
651
+    movu     [r4+i*mmsize*3], m0
652
+    add      r4, mmsize
653
+    lea      r1, [r1+r2*8+mmsize]
654
+%if %2==8
655
+    lea      r1, [r1+r2*4]
656
+%endif
657
+    dec      r3d
658
+    jg .v_loop
659
+    neg      r2
660
+    ret
661
+%endmacro
662
+
663
+INIT_MMX
664
+HV mmxext, 4
665
+INIT_XMM
666
+HV sse2  , 8
667
+
668
+%macro H_LOOP 2
669
+%if num_mmregs > 8
670
+    %define s1 m8
671
+    %define s2 m9
672
+    %define s3 m10
673
+    %define d1 m11
674
+%else
675
+    %define s1 [tap1]
676
+    %define s2 [tap2]
677
+    %define s3 [tap3]
678
+    %define d1 [depad]
679
+%endif
680
+h%2_loop_op_%1:
681
+    movu       m1, [r1+mmsize-4]
682
+    movu       m2, [r1+mmsize-2]
683
+    mova       m3, [r1+mmsize+0]
684
+    movu       m4, [r1+mmsize+2]
685
+    movu       m5, [r1+mmsize+4]
686
+    movu       m6, [r1+mmsize+6]
687
+%if num_mmregs > 8
688
+    pmaddwd    m1, s1
689
+    pmaddwd    m2, s1
690
+    pmaddwd    m3, s2
691
+    pmaddwd    m4, s2
692
+    pmaddwd    m5, s3
693
+    pmaddwd    m6, s3
694
+    paddd      m1, d1
695
+    paddd      m2, d1
696
+%else
697
+    mova       m0, s1
698
+    pmaddwd    m1, m0
699
+    pmaddwd    m2, m0
700
+    mova       m0, s2
701
+    pmaddwd    m3, m0
702
+    pmaddwd    m4, m0
703
+    mova       m0, s3
704
+    pmaddwd    m5, m0
705
+    pmaddwd    m6, m0
706
+    mova       m0, d1
707
+    paddd      m1, m0
708
+    paddd      m2, m0
709
+%endif
710
+    paddd      m3, m5
711
+    paddd      m4, m6
712
+    paddd      m1, m3
713
+    paddd      m2, m4
714
+    psrad      m1, 10
715
+    psrad      m2, 10
716
+    pslld      m2, 16
717
+    pand       m1, [pd_0f]
718
+    por        m1, m2
719
+%if num_mmregs <= 8
720
+    pxor       m0, m0
721
+%endif
722
+    CLIPW      m1, m0, m7
723
+    add        r1, mmsize*3
724
+    ret
725
+%endmacro
726
+
727
+INIT_MMX
728
+H_LOOP mmxext, 4
729
+INIT_XMM
730
+H_LOOP sse2  , 8
731
+
732
+%macro MC22 3
733
+cglobal_mc %1, %2, mc22, %3, 3,7,12
734
+%define PAD mmsize*8*4*2      ; SIZE*16*4*sizeof(pixel)
735
+    mov      r6, rsp          ; backup stack pointer
736
+    and     rsp, ~(mmsize-1)  ; align stack
737
+    sub     rsp, PAD
738
+
739
+    call put_hv%3_10_%1
740
+
741
+    mov       r3d, %3
742
+    mova       m7, [pw_pixel_max]
743
+%if num_mmregs > 8
744
+    pxor       m0, m0
745
+    mova       m8, [tap1]
746
+    mova       m9, [tap2]
747
+    mova      m10, [tap3]
748
+    mova      m11, [depad]
749
+%endif
750
+    mov        r1, rsp
751
+.h_loop:
752
+    call h%3_loop_op_%1
753
+
754
+    OP_MOV   [r0], m1
755
+    add        r0, r2
756
+    dec       r3d
757
+    jg .h_loop
758
+
759
+    mov     rsp, r6          ; restore stack pointer
760
+    ret
761
+%endmacro
762
+
763
+MC MC22
764
+
765
+;-----------------------------------------------------------------------------
766
+; void h264_qpel_mc12(uint8_t *dst, uint8_t *src, int stride)
767
+;-----------------------------------------------------------------------------
768
+%macro MC12 3
769
+cglobal_mc %1, %2, mc12, %3, 3,7,12
770
+%define PAD mmsize*8*4*2        ; SIZE*16*4*sizeof(pixel)
771
+    mov        r6, rsp          ; backup stack pointer
772
+    and       rsp, ~(mmsize-1)  ; align stack
773
+    sub       rsp, PAD
774
+
775
+    call put_hv%3_10_%1
776
+
777
+    xor       r4d, r4d
778
+.body
779
+    mov       r3d, %3
780
+    pxor       m0, m0
781
+    mova       m7, [pw_pixel_max]
782
+%if num_mmregs > 8
783
+    mova       m8, [tap1]
784
+    mova       m9, [tap2]
785
+    mova      m10, [tap3]
786
+    mova      m11, [depad]
787
+%endif
788
+    mov        r1, rsp
789
+.h_loop:
790
+    call h%3_loop_op_%1
791
+
792
+    movu       m3, [r1+r4-2*mmsize] ; movu needed for mc32, etc
793
+    paddw      m3, [depad2]
794
+    psrlw      m3, 5
795
+    psubw      m3, [unpad]
796
+    CLIPW      m3, m0, m7
797
+    pavgw      m1, m3
798
+
799
+    OP_MOV   [r0], m1
800
+    add        r0, r2
801
+    dec       r3d
802
+    jg .h_loop
803
+
804
+    mov     rsp, r6          ; restore stack pointer
805
+    ret
806
+%endmacro
807
+
808
+MC MC12
809
+
810
+;-----------------------------------------------------------------------------
811
+; void h264_qpel_mc32(uint8_t *dst, uint8_t *src, int stride)
812
+;-----------------------------------------------------------------------------
813
+%macro MC32 3
814
+cglobal_mc %1, %2, mc32, %3, 3,7,12
815
+%define PAD mmsize*8*3*2  ; SIZE*16*4*sizeof(pixel)
816
+    mov  r6, rsp          ; backup stack pointer
817
+    and rsp, ~(mmsize-1)  ; align stack
818
+    sub rsp, PAD
819
+
820
+    call put_hv%3_10_%1
821
+
822
+    mov r4d, 2            ; sizeof(pixel)
823
+    jmp stub_%2_h264_qpel%3_mc12_10_%1.body
824
+%endmacro
825
+
826
+MC MC32
827
+
828
+;-----------------------------------------------------------------------------
829
+; void h264_qpel_mc21(uint8_t *dst, uint8_t *src, int stride)
830
+;-----------------------------------------------------------------------------
831
+%macro H_NRD 2
832
+put_h%2_10_%1:
833
+    add       rsp, gprsize
834
+    mov       r3d, %2
835
+    xor       r4d, r4d
836
+    mova       m6, [pad20]
837
+.nextrow
838
+    movu       m2, [r5-4]
839
+    movu       m3, [r5-2]
840
+    movu       m4, [r5+0]
841
+    ADDW       m2, [r5+6], m5
842
+    ADDW       m3, [r5+4], m5
843
+    ADDW       m4, [r5+2], m5
844
+
845
+    FILT_H2    m2, m3, m4
846
+    psubw      m2, m6
847
+    mova [rsp+r4], m2
848
+    add       r4d, mmsize*3
849
+    add        r5, r2
850
+    dec       r3d
851
+    jg .nextrow
852
+    sub       rsp, gprsize
853
+    ret
854
+%endmacro
855
+
856
+INIT_MMX
857
+H_NRD mmxext, 4
858
+INIT_XMM
859
+H_NRD sse2  , 8
860
+
861
+%macro MC21 3
862
+cglobal_mc %1, %2, mc21, %3, 3,7,12
863
+    mov   r5, r1
864
+.body
865
+%define PAD mmsize*8*3*2   ; SIZE*16*4*sizeof(pixel)
866
+    mov   r6, rsp          ; backup stack pointer
867
+    and  rsp, ~(mmsize-1)  ; align stack
868
+
869
+    sub  rsp, PAD
870
+    call put_h%3_10_%1
871
+
872
+    sub  rsp, PAD
873
+    call put_hv%3_10_%1
874
+
875
+    mov r4d, PAD-mmsize    ; H buffer
876
+    jmp stub_%2_h264_qpel%3_mc12_10_%1.body
877
+%endmacro
878
+
879
+MC MC21
880
+
881
+;-----------------------------------------------------------------------------
882
+; void h264_qpel_mc23(uint8_t *dst, uint8_t *src, int stride)
883
+;-----------------------------------------------------------------------------
884
+%macro MC23 3
885
+cglobal_mc %1, %2, mc23, %3, 3,7,12
886
+    lea   r5, [r1+r2]
887
+    jmp stub_%2_h264_qpel%3_mc21_10_%1.body
888
+%endmacro
889
+
890
+MC MC23
... ...
@@ -1,5 +1,6 @@
1 1
 /*
2 2
  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
3
+ * Copyright (c) 2011 Daniel Kang
3 4
  *
4 5
  * This file is part of FFmpeg.
5 6
  *
... ...
@@ -1199,3 +1200,100 @@ H264_MC_816(H264_MC_HV, sse2)
1199 1199
 H264_MC_816(H264_MC_H, ssse3)
1200 1200
 H264_MC_816(H264_MC_HV, ssse3)
1201 1201
 #endif
1202
+
1203
+
1204
+
1205
+//10bit
1206
+#define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
1207
+void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
1208
+    (uint8_t *dst, uint8_t *src, int stride);
1209
+
1210
+#define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
1211
+    LUMA_MC_OP(put,  4, DEPTH, TYPE, OPT) \
1212
+    LUMA_MC_OP(avg,  4, DEPTH, TYPE, OPT) \
1213
+    LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
1214
+    LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
1215
+    LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
1216
+    LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
1217
+
1218
+#define LUMA_MC_816(DEPTH, TYPE, OPT) \
1219
+    LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
1220
+    LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
1221
+    LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
1222
+    LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
1223
+
1224
+LUMA_MC_ALL(10, mc00, mmxext)
1225
+LUMA_MC_ALL(10, mc10, mmxext)
1226
+LUMA_MC_ALL(10, mc20, mmxext)
1227
+LUMA_MC_ALL(10, mc30, mmxext)
1228
+LUMA_MC_ALL(10, mc01, mmxext)
1229
+LUMA_MC_ALL(10, mc11, mmxext)
1230
+LUMA_MC_ALL(10, mc21, mmxext)
1231
+LUMA_MC_ALL(10, mc31, mmxext)
1232
+LUMA_MC_ALL(10, mc02, mmxext)
1233
+LUMA_MC_ALL(10, mc12, mmxext)
1234
+LUMA_MC_ALL(10, mc22, mmxext)
1235
+LUMA_MC_ALL(10, mc32, mmxext)
1236
+LUMA_MC_ALL(10, mc03, mmxext)
1237
+LUMA_MC_ALL(10, mc13, mmxext)
1238
+LUMA_MC_ALL(10, mc23, mmxext)
1239
+LUMA_MC_ALL(10, mc33, mmxext)
1240
+
1241
+LUMA_MC_816(10, mc00, sse2)
1242
+LUMA_MC_816(10, mc10, sse2)
1243
+LUMA_MC_816(10, mc10, sse2_cache64)
1244
+LUMA_MC_816(10, mc10, ssse3_cache64)
1245
+LUMA_MC_816(10, mc20, sse2)
1246
+LUMA_MC_816(10, mc20, sse2_cache64)
1247
+LUMA_MC_816(10, mc20, ssse3_cache64)
1248
+LUMA_MC_816(10, mc30, sse2)
1249
+LUMA_MC_816(10, mc30, sse2_cache64)
1250
+LUMA_MC_816(10, mc30, ssse3_cache64)
1251
+LUMA_MC_816(10, mc01, sse2)
1252
+LUMA_MC_816(10, mc11, sse2)
1253
+LUMA_MC_816(10, mc21, sse2)
1254
+LUMA_MC_816(10, mc31, sse2)
1255
+LUMA_MC_816(10, mc02, sse2)
1256
+LUMA_MC_816(10, mc12, sse2)
1257
+LUMA_MC_816(10, mc22, sse2)
1258
+LUMA_MC_816(10, mc32, sse2)
1259
+LUMA_MC_816(10, mc03, sse2)
1260
+LUMA_MC_816(10, mc13, sse2)
1261
+LUMA_MC_816(10, mc23, sse2)
1262
+LUMA_MC_816(10, mc33, sse2)
1263
+
1264
+#define QPEL16_OPMC(OP, MC, MMX)\
1265
+void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
1266
+    ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
1267
+    ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
1268
+    src += 8*stride;\
1269
+    dst += 8*stride;\
1270
+    ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
1271
+    ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
1272
+}
1273
+
1274
+#define QPEL16_OP(MC, MMX)\
1275
+QPEL16_OPMC(put, MC, MMX)\
1276
+QPEL16_OPMC(avg, MC, MMX)
1277
+
1278
+#define QPEL16(MMX)\
1279
+QPEL16_OP(mc00, MMX)\
1280
+QPEL16_OP(mc01, MMX)\
1281
+QPEL16_OP(mc02, MMX)\
1282
+QPEL16_OP(mc03, MMX)\
1283
+QPEL16_OP(mc10, MMX)\
1284
+QPEL16_OP(mc11, MMX)\
1285
+QPEL16_OP(mc12, MMX)\
1286
+QPEL16_OP(mc13, MMX)\
1287
+QPEL16_OP(mc20, MMX)\
1288
+QPEL16_OP(mc21, MMX)\
1289
+QPEL16_OP(mc22, MMX)\
1290
+QPEL16_OP(mc23, MMX)\
1291
+QPEL16_OP(mc30, MMX)\
1292
+QPEL16_OP(mc31, MMX)\
1293
+QPEL16_OP(mc32, MMX)\
1294
+QPEL16_OP(mc33, MMX)
1295
+
1296
+#if ARCH_X86_32 // ARCH_X86_64 implies sse2+
1297
+QPEL16(mmxext)
1298
+#endif
... ...
@@ -18,6 +18,8 @@
18 18
  * License along with FFmpeg; if not, write to the Free Software
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21
+
22
+#include "libavutil/mathematics.h"
21 23
 #include "libavutil/imgutils.h"
22 24
 #include "avcodec.h"
23 25
 #include "get_bits.h"
... ...
@@ -47,6 +47,7 @@
47 47
 
48 48
 #include <alsa/asoundlib.h>
49 49
 #include "libavutil/opt.h"
50
+#include "libavutil/mathematics.h"
50 51
 
51 52
 #include "avdevice.h"
52 53
 #include "alsa-audio.h"
... ...
@@ -23,7 +23,10 @@
23 23
 #define AVFILTER_AVFILTER_H
24 24
 
25 25
 #include "libavutil/avutil.h"
26
+#include "libavutil/log.h"
26 27
 #include "libavutil/samplefmt.h"
28
+#include "libavutil/pixfmt.h"
29
+#include "libavutil/rational.h"
27 30
 
28 31
 #define LIBAVFILTER_VERSION_MAJOR  2
29 32
 #define LIBAVFILTER_VERSION_MINOR 24
... ...
@@ -83,8 +83,8 @@ static char *parse_link_name(const char **buf, void *log_ctx)
83 83
  * Create an instance of a filter, initialize and insert it in the
84 84
  * filtergraph in *ctx.
85 85
  *
86
+ * @param filt_ctx put here a filter context in case of successful creation and configuration, NULL otherwise.
86 87
  * @param ctx the filtergraph context
87
- * @param put here a filter context in case of successful creation and configuration, NULL otherwise.
88 88
  * @param index an index which is supposed to be unique for each filter instance added to the filtergraph
89 89
  * @param filt_name the name of the filter to create
90 90
  * @param args the arguments provided to the filter during its initialization
... ...
@@ -23,6 +23,7 @@
23 23
  * aspect ratio modification video filters
24 24
  */
25 25
 
26
+#include "libavutil/mathematics.h"
26 27
 #include "avfilter.h"
27 28
 
28 29
 typedef struct {
... ...
@@ -30,6 +30,7 @@
30 30
 #include "libavutil/avstring.h"
31 31
 #include "libavutil/libm.h"
32 32
 #include "libavutil/imgutils.h"
33
+#include "libavutil/mathematics.h"
33 34
 
34 35
 static const char *var_names[] = {
35 36
     "E",
... ...
@@ -30,6 +30,7 @@
30 30
 #include "libavutil/avstring.h"
31 31
 #include "libavutil/pixdesc.h"
32 32
 #include "libavutil/imgutils.h"
33
+#include "libavutil/mathematics.h"
33 34
 #include "internal.h"
34 35
 
35 36
 static const char *var_names[] = {
... ...
@@ -32,6 +32,7 @@
32 32
 #include "libavutil/avassert.h"
33 33
 #include "libavutil/imgutils.h"
34 34
 #include "libavutil/parseutils.h"
35
+#include "libavutil/mathematics.h"
35 36
 #include "drawutils.h"
36 37
 
37 38
 static const char *var_names[] = {
... ...
@@ -26,6 +26,7 @@
26 26
 #include "avfilter.h"
27 27
 #include "libavutil/avstring.h"
28 28
 #include "libavutil/eval.h"
29
+#include "libavutil/mathematics.h"
29 30
 #include "libavutil/pixdesc.h"
30 31
 #include "libavutil/avassert.h"
31 32
 #include "libswscale/swscale.h"
... ...
@@ -27,6 +27,7 @@
27 27
 /* #define DEBUG */
28 28
 
29 29
 #include "libavutil/eval.h"
30
+#include "libavutil/mathematics.h"
30 31
 #include "avfilter.h"
31 32
 
32 33
 static const char *var_names[] = {
... ...
@@ -25,6 +25,7 @@
25 25
 
26 26
 #include "libavutil/avstring.h"
27 27
 #include "libavutil/eval.h"
28
+#include "libavutil/mathematics.h"
28 29
 #include "libavutil/rational.h"
29 30
 #include "avfilter.h"
30 31
 #include "internal.h"
... ...
@@ -27,6 +27,7 @@
27 27
 #include "libavutil/pixdesc.h"
28 28
 #include "libavutil/colorspace.h"
29 29
 #include "libavutil/imgutils.h"
30
+#include "libavutil/mathematics.h"
30 31
 #include "libavutil/parseutils.h"
31 32
 #include "drawutils.h"
32 33
 
... ...
@@ -23,6 +23,7 @@
23 23
 
24 24
 #include "libavutil/avstring.h"
25 25
 #include "libavutil/eval.h"
26
+#include "libavutil/mathematics.h"
26 27
 #include "libavutil/parseutils.h"
27 28
 #include "avfilter.h"
28 29
 
... ...
@@ -28,6 +28,7 @@
28 28
  */
29 29
 
30 30
 #include "libavutil/intreadwrite.h"
31
+#include "libavutil/intfloat_readwrite.h"
31 32
 #include "avformat.h"
32 33
 
33 34
 #define     RIFF_TAG MKTAG('R', 'I', 'F', 'F')
... ...
@@ -55,7 +55,6 @@ static int a64_write_header(struct AVFormatContext *s)
55 55
         break;
56 56
     default:
57 57
         return AVERROR(EINVAL);
58
-        break;
59 58
     }
60 59
     avio_write(s->pb, header, 2);
61 60
     c->prev_pkt.size = 0;
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/intfloat_readwrite.h"
22 23
 #include "avformat.h"
23 24
 #include "aiff.h"
24 25
 #include "avio_internal.h"
... ...
@@ -27,6 +27,7 @@
27 27
 
28 28
 #include "libavutil/avstring.h"
29 29
 #include "libavutil/intreadwrite.h"
30
+#include "libavutil/mathematics.h"
30 31
 #include "libavutil/opt.h"
31 32
 #include "libavutil/dict.h"
32 33
 #include "avformat.h"
... ...
@@ -25,6 +25,7 @@
25 25
 #include "libavutil/common.h"
26 26
 #include "libavutil/avstring.h"
27 27
 #include "libavutil/dict.h"
28
+#include "libavutil/mathematics.h"
28 29
 #include "libavcodec/mpegaudio.h"
29 30
 #include "avformat.h"
30 31
 #include "avio_internal.h"
... ...
@@ -84,13 +85,11 @@ static const ff_asf_guid index_guid = {
84 84
     0x90, 0x08, 0x00, 0x33, 0xb1, 0xe5, 0xcf, 0x11, 0x89, 0xf4, 0x00, 0xa0, 0xc9, 0x03, 0x49, 0xcb
85 85
 };
86 86
 
87
+#ifdef DEBUG
87 88
 static const ff_asf_guid stream_bitrate_guid = { /* (http://get.to/sdp) */
88 89
     0xce, 0x75, 0xf8, 0x7b, 0x8d, 0x46, 0xd1, 0x11, 0x8d, 0x82, 0x00, 0x60, 0x97, 0xc9, 0xa2, 0xb2
89 90
 };
90
-/**********************************/
91
-/* decoding */
92 91
 
93
-#ifdef DEBUG
94 92
 #define PRINT_IF_GUID(g,cmp) \
95 93
 if (!ff_guidcmp(g, &cmp)) \
96 94
     av_dlog(NULL, "(GUID: %s) ", #cmp)
... ...
@@ -1094,8 +1093,6 @@ static int asf_read_packet(AVFormatContext *s, AVPacket *pkt)
1094 1094
             assert(asf->packet_size_left < FRAME_HEADER_SIZE || asf->packet_segments < 1);
1095 1095
         asf->packet_time_start = 0;
1096 1096
     }
1097
-
1098
-    return 0;
1099 1097
 }
1100 1098
 
1101 1099
 // Added to support seeking after packets have been read
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/mathematics.h"
22 23
 #include "avformat.h"
23 24
 #include "internal.h"
24 25
 
... ...
@@ -21,6 +21,7 @@
21 21
  */
22 22
 
23 23
 #include "libavutil/fifo.h"
24
+#include "libavutil/mathematics.h"
24 25
 #include "avformat.h"
25 26
 #include "audiointerleave.h"
26 27
 #include "internal.h"
... ...
@@ -41,6 +41,7 @@ const char *avformat_license(void);
41 41
 #include <stdio.h>  /* FILE */
42 42
 #include "libavcodec/avcodec.h"
43 43
 #include "libavutil/dict.h"
44
+#include "libavutil/log.h"
44 45
 
45 46
 #include "avio.h"
46 47
 #include "libavformat/version.h"
... ...
@@ -21,6 +21,7 @@
21 21
 
22 22
 #include <strings.h>
23 23
 #include "libavutil/intreadwrite.h"
24
+#include "libavutil/mathematics.h"
24 25
 #include "libavutil/bswap.h"
25 26
 #include "libavutil/opt.h"
26 27
 #include "libavutil/dict.h"
... ...
@@ -1289,20 +1290,16 @@ static int avi_load_index(AVFormatContext *s)
1289 1289
                 (tag >> 16) & 0xff,
1290 1290
                 (tag >> 24) & 0xff,
1291 1291
                 size);
1292
-        switch(tag) {
1293
-        case MKTAG('i', 'd', 'x', '1'):
1294
-            if (avi_read_idx1(s, size) < 0)
1295
-                goto skip;
1292
+
1293
+        if (tag == MKTAG('i', 'd', 'x', '1') &&
1294
+            avi_read_idx1(s, size) >= 0) {
1296 1295
             ret = 0;
1297
-                goto the_end;
1298
-            break;
1299
-        default:
1300
-        skip:
1301
-            size += (size & 1);
1302
-            if (avio_skip(pb, size) < 0)
1303
-                goto the_end; // something is wrong here
1304 1296
             break;
1305 1297
         }
1298
+
1299
+        size += (size & 1);
1300
+        if (avio_skip(pb, size) < 0)
1301
+            break; // something is wrong here
1306 1302
     }
1307 1303
  the_end:
1308 1304
     avio_seek(pb, pos, SEEK_SET);
... ...
@@ -220,8 +220,6 @@ static int vid_read_packet(AVFormatContext *s,
220 220
             av_log(s, AV_LOG_ERROR, "unknown block (character = %c, decimal = %d, hex = %x)!!!\n",
221 221
                    block_type, block_type, block_type); return -1;
222 222
     }
223
-
224
-    return 0;
225 223
 }
226 224
 
227 225
 AVInputFormat ff_bethsoftvid_demuxer = {
... ...
@@ -29,6 +29,7 @@
29 29
 #include "riff.h"
30 30
 #include "isom.h"
31 31
 #include "libavutil/intreadwrite.h"
32
+#include "libavutil/intfloat_readwrite.h"
32 33
 #include "libavutil/dict.h"
33 34
 #include "caf.h"
34 35
 
... ...
@@ -24,6 +24,7 @@
24 24
 #include "riff.h"
25 25
 #include "isom.h"
26 26
 #include "avio_internal.h"
27
+#include "libavutil/intfloat_readwrite.h"
27 28
 
28 29
 typedef struct {
29 30
     int64_t data;
... ...
@@ -32,6 +32,7 @@
32 32
 #include "avformat.h"
33 33
 #include "libavcodec/dvdata.h"
34 34
 #include "libavutil/intreadwrite.h"
35
+#include "libavutil/mathematics.h"
35 36
 #include "dv.h"
36 37
 
37 38
 struct DVDemuxContext {
... ...
@@ -35,6 +35,7 @@
35 35
 #include "libavcodec/dvdata.h"
36 36
 #include "dv.h"
37 37
 #include "libavutil/fifo.h"
38
+#include "libavutil/mathematics.h"
38 39
 
39 40
 struct DVMuxContext {
40 41
     const DVprofile*  sys;           /* current DV profile, e.g.: 525/60, 625/50 */
... ...
@@ -20,6 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "libavutil/intreadwrite.h"
23
+#include "libavutil/intfloat_readwrite.h"
23 24
 #include "avformat.h"
24 25
 #include "ffm.h"
25 26
 #if CONFIG_FFSERVER
... ...
@@ -20,6 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "libavutil/intreadwrite.h"
23
+#include "libavutil/intfloat_readwrite.h"
23 24
 #include "avformat.h"
24 25
 #include "ffm.h"
25 26
 
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/mathematics.h"
22 23
 #include "avformat.h"
23 24
 #include "ffmeta.h"
24 25
 #include "internal.h"
... ...
@@ -26,6 +26,7 @@
26 26
 
27 27
 #include "libavutil/avstring.h"
28 28
 #include "libavutil/dict.h"
29
+#include "libavutil/intfloat_readwrite.h"
29 30
 #include "libavcodec/bytestream.h"
30 31
 #include "libavcodec/mpeg4audio.h"
31 32
 #include "avformat.h"
... ...
@@ -20,6 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "libavutil/intreadwrite.h"
23
+#include "libavutil/intfloat_readwrite.h"
23 24
 #include "avformat.h"
24 25
 #include "flv.h"
25 26
 #include "internal.h"
... ...
@@ -19,6 +19,8 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/intfloat_readwrite.h"
23
+#include "libavutil/mathematics.h"
22 24
 #include "avformat.h"
23 25
 #include "gxf.h"
24 26
 #include "riff.h"
... ...
@@ -209,7 +209,6 @@ static int roq_read_packet(AVFormatContext *s,
209 209
         default:
210 210
             av_log(s, AV_LOG_ERROR, "  unknown RoQ chunk (%04X)\n", chunk_type);
211 211
             return AVERROR_INVALIDDATA;
212
-            break;
213 212
         }
214 213
     }
215 214
 
... ...
@@ -155,14 +155,14 @@ void ff_put_v(AVIOContext *bc, uint64_t val);
155 155
 
156 156
 /**
157 157
  * Read a whole line of text from AVIOContext. Stop reading after reaching
158
- * either a \n, a \0 or EOF. The returned string is always \0 terminated,
158
+ * either a \\n, a \\0 or EOF. The returned string is always \\0-terminated,
159 159
  * and may be truncated if the buffer is too small.
160 160
  *
161 161
  * @param s the read-only AVIOContext
162 162
  * @param buf buffer to store the read line
163 163
  * @param maxlen size of the buffer
164 164
  * @return the length of the string written in the buffer, not including the
165
- *         final \0
165
+ *         final \\0
166 166
  */
167 167
 int ff_get_line(AVIOContext *s, char *buf, int maxlen);
168 168
 
... ...
@@ -28,6 +28,8 @@
28 28
 #include "avlanguage.h"
29 29
 #include "libavutil/samplefmt.h"
30 30
 #include "libavutil/intreadwrite.h"
31
+#include "libavutil/intfloat_readwrite.h"
32
+#include "libavutil/mathematics.h"
31 33
 #include "libavutil/random_seed.h"
32 34
 #include "libavutil/lfg.h"
33 35
 #include "libavutil/dict.h"
... ...
@@ -184,8 +184,6 @@ static int read_packet(AVFormatContext *s,
184 184
             avio_skip(pb, length);
185 185
         }
186 186
     }
187
-
188
-    return 0;
189 187
 }
190 188
 
191 189
 AVInputFormat ff_mm_demuxer = {
... ...
@@ -208,7 +208,6 @@ static int get_http_header_data(MMSHContext *mmsh)
208 208
             }
209 209
         }
210 210
     }
211
-    return 0;
212 211
 }
213 212
 
214 213
 static int mmsh_open(URLContext *h, const char *uri, int flags)
... ...
@@ -26,6 +26,8 @@
26 26
 //#define MOV_EXPORT_ALL_METADATA
27 27
 
28 28
 #include "libavutil/intreadwrite.h"
29
+#include "libavutil/intfloat_readwrite.h"
30
+#include "libavutil/mathematics.h"
29 31
 #include "libavutil/avstring.h"
30 32
 #include "libavutil/dict.h"
31 33
 #include "avformat.h"
... ...
@@ -2327,7 +2329,6 @@ static int mov_probe(AVProbeData *p)
2327 2327
             return score;
2328 2328
         }
2329 2329
     }
2330
-    return score;
2331 2330
 }
2332 2331
 
2333 2332
 // must be done after parsing all trak because there's no order requirement
... ...
@@ -32,6 +32,8 @@
32 32
 #include "libavcodec/put_bits.h"
33 33
 #include "internal.h"
34 34
 #include "libavutil/avstring.h"
35
+#include "libavutil/intfloat_readwrite.h"
36
+#include "libavutil/mathematics.h"
35 37
 #include "libavutil/opt.h"
36 38
 #include "libavutil/dict.h"
37 39
 #include "rtpenc.h"
... ...
@@ -22,6 +22,7 @@
22 22
 #include "libavutil/avstring.h"
23 23
 #include "libavutil/intreadwrite.h"
24 24
 #include "libavutil/dict.h"
25
+#include "libavutil/mathematics.h"
25 26
 #include "avformat.h"
26 27
 #include "id3v2.h"
27 28
 #include "id3v1.h"
... ...
@@ -20,6 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "libavutil/fifo.h"
23
+#include "libavutil/mathematics.h"
23 24
 #include "libavcodec/put_bits.h"
24 25
 #include "avformat.h"
25 26
 #include "mpeg.h"
... ...
@@ -22,6 +22,7 @@
22 22
 #include "libavutil/bswap.h"
23 23
 #include "libavutil/crc.h"
24 24
 #include "libavutil/dict.h"
25
+#include "libavutil/mathematics.h"
25 26
 #include "libavutil/opt.h"
26 27
 #include "libavcodec/mpegvideo.h"
27 28
 #include "avformat.h"
... ...
@@ -46,6 +46,7 @@
46 46
 //#define DEBUG
47 47
 
48 48
 #include "libavutil/aes.h"
49
+#include "libavutil/mathematics.h"
49 50
 #include "libavcodec/bytestream.h"
50 51
 #include "avformat.h"
51 52
 #include "mxf.h"
... ...
@@ -18,6 +18,8 @@
18 18
  * License along with FFmpeg; if not, write to the Free Software
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21
+
22
+#include "libavutil/mathematics.h"
21 23
 #include "avformat.h"
22 24
 #include "riff.h"
23 25
 #include "libavutil/dict.h"
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/mathematics.h"
22 23
 #include "libavutil/tree.h"
23 24
 #include "nut.h"
24 25
 #include "internal.h"
... ...
@@ -24,6 +24,7 @@
24 24
 #include "libavutil/avstring.h"
25 25
 #include "libavutil/bswap.h"
26 26
 #include "libavutil/dict.h"
27
+#include "libavutil/mathematics.h"
27 28
 #include "libavutil/tree.h"
28 29
 #include "avio_internal.h"
29 30
 #include "nut.h"
... ...
@@ -20,6 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "libavutil/intreadwrite.h"
23
+#include "libavutil/mathematics.h"
23 24
 #include "libavutil/tree.h"
24 25
 #include "libavutil/dict.h"
25 26
 #include "libavcodec/mpegaudiodata.h"
... ...
@@ -20,6 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "libavutil/intreadwrite.h"
23
+#include "libavutil/intfloat_readwrite.h"
23 24
 #include "avformat.h"
24 25
 #include "riff.h"
25 26
 
... ...
@@ -21,6 +21,7 @@
21 21
 
22 22
 #include "libavutil/crc.h"
23 23
 #include "libavutil/opt.h"
24
+#include "libavutil/mathematics.h"
24 25
 #include "libavutil/random_seed.h"
25 26
 #include "libavcodec/xiph.h"
26 27
 #include "libavcodec/bytestream.h"
... ...
@@ -149,7 +149,6 @@ static int oma_read_header(AVFormatContext *s,
149 149
         default:
150 150
             av_log(s, AV_LOG_ERROR, "Unsupported codec %d!\n",buf[32]);
151 151
             return -1;
152
-            break;
153 152
     }
154 153
 
155 154
     st->codec->block_align = framesize;
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/mathematics.h"
22 23
 #include "avformat.h"
23 24
 #include "pcm.h"
24 25
 
... ...
@@ -234,7 +234,6 @@ static int str_read_packet(AVFormatContext *s,
234 234
             pkt->stream_index =
235 235
                 str->channels[channel].audio_stream_index;
236 236
             return 0;
237
-            break;
238 237
         default:
239 238
             av_log(s, AV_LOG_WARNING, "Unknown sector type %02X\n", sector[0x12]);
240 239
             /* drop the sector and move on */
... ...
@@ -23,6 +23,7 @@
23 23
 
24 24
 #include "libavutil/intreadwrite.h"
25 25
 #include "libavutil/dict.h"
26
+#include "libavutil/mathematics.h"
26 27
 #include "avformat.h"
27 28
 
28 29
 typedef struct {
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/mathematics.h"
22 23
 #include "libavcodec/avcodec.h"
23 24
 #include "avformat.h"
24 25
 #include "avio_internal.h"
... ...
@@ -34,6 +34,7 @@
34 34
  */
35 35
 
36 36
 #include "libavutil/intreadwrite.h"
37
+#include "libavutil/mathematics.h"
37 38
 #include "avformat.h"
38 39
 
39 40
 #define EXTRADATA1_SIZE (6 + 256 * 3) ///< video base, clr, palette
... ...
@@ -21,6 +21,7 @@
21 21
 
22 22
 #include "libavcodec/bytestream.h"
23 23
 #include "libavutil/avstring.h"
24
+#include "libavutil/intfloat_readwrite.h"
24 25
 #include "avformat.h"
25 26
 
26 27
 #include "rtmppkt.h"
... ...
@@ -26,6 +26,7 @@
26 26
 
27 27
 #include "libavcodec/bytestream.h"
28 28
 #include "libavutil/avstring.h"
29
+#include "libavutil/intfloat_readwrite.h"
29 30
 #include "libavutil/lfg.h"
30 31
 #include "libavutil/sha.h"
31 32
 #include "avformat.h"
... ...
@@ -761,7 +762,6 @@ static int get_packet(URLContext *s, int for_header)
761 761
         }
762 762
         ff_rtmp_packet_destroy(&rpkt);
763 763
     }
764
-    return 0;
765 764
 }
766 765
 
767 766
 static int rtmp_close(URLContext *h)
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/mathematics.h"
22 23
 #include "libavcodec/get_bits.h"
23 24
 #include "avformat.h"
24 25
 #include "mpegts.h"
... ...
@@ -22,6 +22,7 @@
22 22
 #include "avformat.h"
23 23
 #include "mpegts.h"
24 24
 #include "internal.h"
25
+#include "libavutil/mathematics.h"
25 26
 #include "libavutil/random_seed.h"
26 27
 #include "libavutil/opt.h"
27 28
 
... ...
@@ -22,6 +22,7 @@
22 22
 #include "libavutil/base64.h"
23 23
 #include "libavutil/avstring.h"
24 24
 #include "libavutil/intreadwrite.h"
25
+#include "libavutil/mathematics.h"
25 26
 #include "libavutil/parseutils.h"
26 27
 #include "libavutil/random_seed.h"
27 28
 #include "libavutil/dict.h"
... ...
@@ -428,11 +429,6 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
428 428
     }
429 429
 }
430 430
 
431
-/**
432
- * Parse the sdp description and allocate the rtp streams and the
433
- * pollfd array used for udp ones.
434
- */
435
-
436 431
 int ff_sdp_parse(AVFormatContext *s, const char *content)
437 432
 {
438 433
     RTSPState *rt = s->priv_data;
... ...
@@ -1050,9 +1046,6 @@ retry:
1050 1050
     return 0;
1051 1051
 }
1052 1052
 
1053
-/**
1054
- * @return 0 on success, <0 on error, 1 if protocol is unavailable.
1055
- */
1056 1053
 int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
1057 1054
                               int lower_transport, const char *real_challenge)
1058 1055
 {
... ...
@@ -1078,7 +1071,7 @@ int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
1078 1078
     for (j = RTSP_RTP_PORT_MIN, i = 0; i < rt->nb_rtsp_streams; ++i) {
1079 1079
         char transport[2048];
1080 1080
 
1081
-        /**
1081
+        /*
1082 1082
          * WMS serves all UDP data over a single connection, the RTX, which
1083 1083
          * isn't necessarily the first in the SDP but has to be the first
1084 1084
          * to be set up, else the second/third SETUP will fail with a 461.
... ...
@@ -1151,7 +1144,7 @@ int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
1151 1151
 
1152 1152
         /* RTP/TCP */
1153 1153
         else if (lower_transport == RTSP_LOWER_TRANSPORT_TCP) {
1154
-            /** For WMS streams, the application streams are only used for
1154
+            /* For WMS streams, the application streams are only used for
1155 1155
              * UDP. When trying to set it up for TCP streams, the server
1156 1156
              * will return an error. Therefore, we skip those streams. */
1157 1157
             if (rt->server_type == RTSP_SERVER_WMS &&
... ...
@@ -1482,14 +1475,14 @@ redirect:
1482 1482
         cmd[0] = 0;
1483 1483
         if (rt->server_type == RTSP_SERVER_REAL)
1484 1484
             av_strlcat(cmd,
1485
-                       /**
1485
+                       /*
1486 1486
                         * The following entries are required for proper
1487 1487
                         * streaming from a Realmedia server. They are
1488 1488
                         * interdependent in some way although we currently
1489 1489
                         * don't quite understand how. Values were copied
1490 1490
                         * from mplayer SVN r23589.
1491
-                        * @param CompanyID is a 16-byte ID in base64
1492
-                        * @param ClientChallenge is a 16-byte ID in hex
1491
+                        *   ClientChallenge is a 16-byte ID in hex
1492
+                        *   CompanyID is a 16-byte ID in base64
1493 1493
                         */
1494 1494
                        "ClientChallenge: 9e26d33f2984236010ef6253fb1887f7\r\n"
1495 1495
                        "PlayerStarttime: [28/03/2003:22:50:23 00:00]\r\n"
... ...
@@ -505,8 +505,9 @@ int ff_rtsp_setup_input_streams(AVFormatContext *s, RTSPMessageHeader *reply);
505 505
 int ff_rtsp_setup_output_streams(AVFormatContext *s, const char *addr);
506 506
 
507 507
 /**
508
- * Parse a SDP description of streams by populating an RTSPState struct
509
- * within the AVFormatContext.
508
+ * Parse an SDP description of streams by populating an RTSPState struct
509
+ * within the AVFormatContext; also allocate the RTP streams and the
510
+ * pollfd array used for UDP streams.
510 511
  */
511 512
 int ff_sdp_parse(AVFormatContext *s, const char *content);
512 513
 
... ...
@@ -525,6 +526,7 @@ int ff_rtsp_fetch_packet(AVFormatContext *s, AVPacket *pkt);
525 525
 /**
526 526
  * Do the SETUP requests for each stream for the chosen
527 527
  * lower transport mode.
528
+ * @return 0 on success, <0 on error, 1 if protocol is unavailable
528 529
  */
529 530
 int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
530 531
                                int lower_transport, const char *real_challenge);
... ...
@@ -21,6 +21,7 @@
21 21
 
22 22
 #include "libavutil/avstring.h"
23 23
 #include "libavutil/intreadwrite.h"
24
+#include "libavutil/mathematics.h"
24 25
 #include "libavutil/opt.h"
25 26
 #include "avformat.h"
26 27
 
... ...
@@ -21,6 +21,7 @@
21 21
  */
22 22
 
23 23
 #include "seek.h"
24
+#include "libavutil/mathematics.h"
24 25
 #include "libavutil/mem.h"
25 26
 #include "internal.h"
26 27
 
... ...
@@ -30,6 +30,7 @@
30 30
  */
31 31
 
32 32
 #include "libavutil/intreadwrite.h"
33
+#include "libavutil/intfloat_readwrite.h"
33 34
 #include "libavutil/dict.h"
34 35
 #include "avformat.h"
35 36
 #include "pcm.h"
... ...
@@ -30,6 +30,7 @@
30 30
  */
31 31
 
32 32
 #include "libavutil/intreadwrite.h"
33
+#include "libavutil/intfloat_readwrite.h"
33 34
 #include "libavutil/dict.h"
34 35
 #include "avformat.h"
35 36
 #include "avio_internal.h"
... ...
@@ -204,7 +204,6 @@ static int swf_read_packet(AVFormatContext *s, AVPacket *pkt)
204 204
     skip:
205 205
         avio_skip(pb, len);
206 206
     }
207
-    return 0;
208 207
 }
209 208
 
210 209
 AVInputFormat ff_swf_demuxer = {
... ...
@@ -20,6 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "libavutil/intreadwrite.h"
23
+#include "libavutil/intfloat_readwrite.h"
23 24
 #include "avformat.h"
24 25
 
25 26
 typedef struct ThpDemuxContext {
... ...
@@ -32,6 +32,7 @@
32 32
 #include "metadata.h"
33 33
 #include "id3v2.h"
34 34
 #include "libavutil/avstring.h"
35
+#include "libavutil/mathematics.h"
35 36
 #include "riff.h"
36 37
 #include "audiointerleave.h"
37 38
 #include "url.h"
... ...
@@ -22,6 +22,7 @@
22 22
 #include "avformat.h"
23 23
 #include "libavutil/intreadwrite.h"
24 24
 #include "libavutil/dict.h"
25
+#include "libavutil/mathematics.h"
25 26
 
26 27
 typedef struct VqfContext {
27 28
     int frame_bit_len;
... ...
@@ -22,6 +22,8 @@
22 22
  * License along with FFmpeg; if not, write to the Free Software
23 23
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 24
  */
25
+
26
+#include "libavutil/mathematics.h"
25 27
 #include "avformat.h"
26 28
 #include "avio_internal.h"
27 29
 #include "pcm.h"
... ...
@@ -152,7 +152,6 @@ static int wc3_read_header(AVFormatContext *s,
152 152
                 (uint8_t)fourcc_tag, (uint8_t)(fourcc_tag >> 8), (uint8_t)(fourcc_tag >> 16), (uint8_t)(fourcc_tag >> 24),
153 153
                 (uint8_t)fourcc_tag, (uint8_t)(fourcc_tag >> 8), (uint8_t)(fourcc_tag >> 16), (uint8_t)(fourcc_tag >> 24));
154 154
             return AVERROR_INVALIDDATA;
155
-            break;
156 155
         }
157 156
 
158 157
         fourcc_tag = avio_rl32(pb);
... ...
@@ -716,7 +716,7 @@ enum {
716 716
  * Parse WTV chunks
717 717
  * @param mode SEEK_TO_DATA or SEEK_TO_PTS
718 718
  * @param seekts timestamp
719
- * @param[out] len Length of data chunk
719
+ * @param[out] len_ptr Length of data chunk
720 720
  * @return stream index of data chunk, or <0 on error
721 721
  */
722 722
 static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_ptr)
... ...
@@ -54,6 +54,12 @@ static uint32_t enc_multbl[4][256];
54 54
 static uint32_t dec_multbl[4][256];
55 55
 #endif
56 56
 
57
+#if HAVE_BIGENDIAN
58
+#   define ROT(x, s) ((x >> s) | (x << (32-s)))
59
+#else
60
+#   define ROT(x, s) ((x << s) | (x >> (32-s)))
61
+#endif
62
+
57 63
 static inline void addkey(av_aes_block *dst, const av_aes_block *src,
58 64
                           const av_aes_block *round_key)
59 65
 {
... ...
@@ -86,7 +92,6 @@ static void subshift(av_aes_block s0[2], int s, const uint8_t *box)
86 86
 
87 87
 static inline int mix_core(uint32_t multbl[][256], int a, int b, int c, int d){
88 88
 #if CONFIG_SMALL
89
-#define ROT(x,s) ((x<<s)|(x>>(32-s)))
90 89
     return multbl[0][a] ^ ROT(multbl[0][b], 8) ^ ROT(multbl[0][c], 16) ^ ROT(multbl[0][d], 24);
91 90
 #else
92 91
     return multbl[0][a] ^ multbl[1][b] ^ multbl[2][c] ^ multbl[3][d];
... ...
@@ -127,7 +132,7 @@ void av_aes_crypt(AVAES *a, uint8_t *dst_, const uint8_t *src_,
127 127
             crypt(a, 0, inv_sbox, dec_multbl);
128 128
             if (iv) {
129 129
                 addkey(&a->state[0], &a->state[0], iv);
130
-                memcpy(iv, src, 16);
130
+                *iv = *src;
131 131
             }
132 132
             addkey(dst, &a->state[0], &a->round_key[0]);
133 133
         } else {
... ...
@@ -136,29 +141,36 @@ void av_aes_crypt(AVAES *a, uint8_t *dst_, const uint8_t *src_,
136 136
             crypt(a, 2, sbox, enc_multbl);
137 137
             addkey(dst, &a->state[0], &a->round_key[0]);
138 138
             if (iv)
139
-                memcpy(iv, dst, 16);
139
+                *iv = *dst;
140 140
         }
141 141
         src++;
142 142
         dst++;
143 143
     }
144 144
 }
145 145
 
146
-static void init_multbl2(uint8_t tbl[1024], const int c[4],
146
+static void init_multbl2(uint32_t tbl[][256], const int c[4],
147 147
                          const uint8_t *log8, const uint8_t *alog8,
148 148
                          const uint8_t *sbox)
149 149
 {
150
-    int i, j;
151
-
152
-    for (i = 0; i < 1024; i++) {
153
-        int x = sbox[i >> 2];
154
-        if (x)
155
-            tbl[i] = alog8[log8[x] + log8[c[i & 3]]];
156
-    }
150
+    int i;
151
+
152
+    for (i = 0; i < 256; i++) {
153
+        int x = sbox[i];
154
+        if (x) {
155
+            int k, l, m, n;
156
+            x = log8[x];
157
+            k = alog8[x + log8[c[0]]];
158
+            l = alog8[x + log8[c[1]]];
159
+            m = alog8[x + log8[c[2]]];
160
+            n = alog8[x + log8[c[3]]];
161
+            tbl[0][i] = AV_NE(MKBETAG(k,l,m,n), MKTAG(k,l,m,n));
157 162
 #if !CONFIG_SMALL
158
-    for (j = 256; j < 1024; j++)
159
-        for (i = 0; i < 4; i++)
160
-            tbl[4*j + i] = tbl[4*j + ((i - 1) & 3) - 1024];
163
+            tbl[1][i] = ROT(tbl[0][i], 8);
164
+            tbl[2][i] = ROT(tbl[0][i], 16);
165
+            tbl[3][i] = ROT(tbl[0][i], 24);
161 166
 #endif
167
+        }
168
+    }
162 169
 }
163 170
 
164 171
 // this is based on the reference AES code by Paulo Barreto and Vincent Rijmen
... ...
@@ -187,9 +199,9 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
187 187
             inv_sbox[j] = i;
188 188
             sbox[i] = j;
189 189
         }
190
-        init_multbl2(dec_multbl[0], (const int[4]) { 0xe, 0x9, 0xd, 0xb },
190
+        init_multbl2(dec_multbl, (const int[4]) { 0xe, 0x9, 0xd, 0xb },
191 191
                      log8, alog8, inv_sbox);
192
-        init_multbl2(enc_multbl[0], (const int[4]) { 0x2, 0x1, 0x1, 0x3 },
192
+        init_multbl2(enc_multbl, (const int[4]) { 0x2, 0x1, 0x1, 0x3 },
193 193
                      log8, alog8, sbox);
194 194
     }
195 195
 
... ...
@@ -221,15 +233,14 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
221 221
     if (decrypt) {
222 222
         for (i = 1; i < rounds; i++) {
223 223
             av_aes_block tmp[3];
224
-            memcpy(&tmp[2], &a->round_key[i], 16);
224
+            tmp[2] = a->round_key[i];
225 225
             subshift(&tmp[1], 0, sbox);
226 226
             mix(tmp, dec_multbl, 1, 3);
227
-            memcpy(&a->round_key[i], &tmp[0], 16);
227
+            a->round_key[i] = tmp[0];
228 228
         }
229 229
     } else {
230 230
         for (i = 0; i < (rounds + 1) >> 1; i++) {
231
-            for (j = 0; j < 16; j++)
232
-                FFSWAP(int, a->round_key[i].u8[j], a->round_key[rounds-i].u8[j]);
231
+            FFSWAP(av_aes_block, a->round_key[i], a->round_key[rounds-i]);
233 232
         }
234 233
     }
235 234
 
... ...
@@ -126,9 +126,5 @@ char av_get_picture_type_char(enum AVPictureType pict_type);
126 126
 #include "common.h"
127 127
 #include "error.h"
128 128
 #include "mathematics.h"
129
-#include "rational.h"
130
-#include "intfloat_readwrite.h"
131
-#include "log.h"
132
-#include "pixfmt.h"
133 129
 
134 130
 #endif /* AVUTIL_AVUTIL_H */
... ...
@@ -39,6 +39,7 @@ static const uint8_t IP_shuffle[] = {
39 39
 };
40 40
 #undef T
41 41
 
42
+#if defined(CONFIG_SMALL) || defined(GENTABLES)
42 43
 #define T(a, b, c, d) 32-a,32-b,32-c,32-d
43 44
 static const uint8_t P_shuffle[] = {
44 45
     T(16,  7, 20, 21),
... ...
@@ -51,6 +52,7 @@ static const uint8_t P_shuffle[] = {
51 51
     T(22, 11,  4, 25)
52 52
 };
53 53
 #undef T
54
+#endif
54 55
 
55 56
 #define T(a, b, c, d, e, f, g) 64-a,64-b,64-c,64-d,64-e,64-f,64-g
56 57
 static const uint8_t PC1_shuffle[] = {
... ...
@@ -402,7 +404,7 @@ int main(void) {
402 402
         printf("Partial Monte-Carlo test failed\n");
403 403
         return 1;
404 404
     }
405
-    for (i = 0; i < 1000000; i++) {
405
+    for (i = 0; i < 1000; i++) {
406 406
         key[0] = rand64(); key[1] = rand64(); key[2] = rand64();
407 407
         data = rand64();
408 408
         av_des_init(&d, key, 192, 0);
... ...
@@ -28,6 +28,7 @@
28 28
 
29 29
 #include "avutil.h"
30 30
 #include "eval.h"
31
+#include "log.h"
31 32
 
32 33
 typedef struct Parser {
33 34
     const AVClass *class;
... ...
@@ -17,6 +17,7 @@
17 17
  */
18 18
 
19 19
 #include "file.h"
20
+#include "log.h"
20 21
 #include <fcntl.h>
21 22
 #include <sys/stat.h>
22 23
 #include <unistd.h>
... ...
@@ -23,6 +23,7 @@
23 23
 
24 24
 #include "imgutils.h"
25 25
 #include "internal.h"
26
+#include "log.h"
26 27
 #include "pixdesc.h"
27 28
 
28 29
 void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
... ...
@@ -69,7 +69,7 @@ int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
69 69
  *
70 70
  * @param data pointers array to be filled with the pointer for each image plane
71 71
  * @param ptr the pointer to a buffer which will contain the image
72
- * @param linesizes[4] the array containing the linesize for each
72
+ * @param linesizes the array containing the linesize for each
73 73
  * plane, should be filled by av_image_fill_linesizes()
74 74
  * @return the size in bytes required for the image buffer, a negative
75 75
  * error code in case of failure
... ...
@@ -106,8 +106,8 @@ void av_image_copy_plane(uint8_t       *dst, int dst_linesize,
106 106
 /**
107 107
  * Copy image in src_data to dst_data.
108 108
  *
109
- * @param dst_linesize linesizes for the image in dst_data
110
- * @param src_linesize linesizes for the image in src_data
109
+ * @param dst_linesizes linesizes for the image in dst_data
110
+ * @param src_linesizes linesizes for the image in src_data
111 111
  */
112 112
 void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4],
113 113
                    const uint8_t *src_data[4], const int src_linesizes[4],
... ...
@@ -55,7 +55,7 @@ static inline unsigned int av_mlfg_get(AVLFG *c){
55 55
  * Get the next two numbers generated by a Box-Muller Gaussian
56 56
  * generator using the random numbers issued by lfg.
57 57
  *
58
- * @param out[2] array where the two generated numbers are placed
58
+ * @param out array where the two generated numbers are placed
59 59
  */
60 60
 void av_bmg_get(AVLFG *lfg, double out[2]);
61 61
 
... ...
@@ -30,8 +30,9 @@
30 30
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
31 31
  */
32 32
 
33
-#include <string.h>
33
+#include <stdint.h>
34 34
 #include "bswap.h"
35
+#include "intreadwrite.h"
35 36
 #include "md5.h"
36 37
 
37 38
 typedef struct AVMD5{
... ...
@@ -40,7 +41,7 @@ typedef struct AVMD5{
40 40
     uint32_t ABCD[4];
41 41
 } AVMD5;
42 42
 
43
-const int av_md5_size= sizeof(AVMD5);
43
+const int av_md5_size = sizeof(AVMD5);
44 44
 
45 45
 static const uint8_t S[4][4] = {
46 46
     { 7, 12, 17, 22 },  /* round 1 */
... ...
@@ -71,42 +72,49 @@ static const uint32_t T[64] = { // T[i]= fabs(sin(i+1)<<32)
71 71
     0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
72 72
 };
73 73
 
74
-#define CORE(i, a, b, c, d) \
75
-        t = S[i>>4][i&3];\
76
-        a += T[i];\
77
-\
78
-        if(i<32){\
79
-            if(i<16) a += (d ^ (b&(c^d))) + X[      i &15 ];\
80
-            else     a += (c ^ (d&(c^b))) + X[ (1+5*i)&15 ];\
81
-        }else{\
82
-            if(i<48) a += (b^c^d)         + X[ (5+3*i)&15 ];\
83
-            else     a += (c^(b|~d))      + X[ (  7*i)&15 ];\
84
-        }\
85
-        a = b + (( a << t ) | ( a >> (32 - t) ));
86
-
87
-static void body(uint32_t ABCD[4], uint32_t X[16]){
88
-
74
+#define CORE(i, a, b, c, d) do {                                        \
75
+        t = S[i >> 4][i & 3];                                           \
76
+        a += T[i];                                                      \
77
+                                                                        \
78
+        if (i < 32) {                                                   \
79
+            if (i < 16) a += (d ^ (b & (c ^ d))) + X[       i  & 15];   \
80
+            else        a += (c ^ (d & (c ^ b))) + X[(1 + 5*i) & 15];   \
81
+        } else {                                                        \
82
+            if (i < 48) a += (b ^ c ^ d)         + X[(5 + 3*i) & 15];   \
83
+            else        a += (c ^ (b | ~d))      + X[(    7*i) & 15];   \
84
+        }                                                               \
85
+        a = b + (a << t | a >> (32 - t));                               \
86
+    } while (0)
87
+
88
+static void body(uint32_t ABCD[4], uint32_t X[16])
89
+{
89 90
     int t;
90 91
     int i av_unused;
91
-    unsigned int a= ABCD[3];
92
-    unsigned int b= ABCD[2];
93
-    unsigned int c= ABCD[1];
94
-    unsigned int d= ABCD[0];
92
+    unsigned int a = ABCD[3];
93
+    unsigned int b = ABCD[2];
94
+    unsigned int c = ABCD[1];
95
+    unsigned int d = ABCD[0];
95 96
 
96 97
 #if HAVE_BIGENDIAN
97
-    for(i=0; i<16; i++)
98
-        X[i]= av_bswap32(X[i]);
98
+    for (i = 0; i < 16; i++)
99
+        X[i] = av_bswap32(X[i]);
99 100
 #endif
100 101
 
101 102
 #if CONFIG_SMALL
102
-    for( i = 0; i < 64; i++ ){
103
-        CORE(i,a,b,c,d)
104
-        t=d; d=c; c=b; b=a; a=t;
103
+    for (i = 0; i < 64; i++) {
104
+        CORE(i, a, b, c, d);
105
+        t = d;
106
+        d = c;
107
+        c = b;
108
+        b = a;
109
+        a = t;
105 110
     }
106 111
 #else
107
-#define CORE2(i) CORE(i,a,b,c,d) CORE((i+1),d,a,b,c) CORE((i+2),c,d,a,b) CORE((i+3),b,c,d,a)
108
-#define CORE4(i) CORE2(i) CORE2((i+4)) CORE2((i+8)) CORE2((i+12))
109
-CORE4(0) CORE4(16) CORE4(32) CORE4(48)
112
+#define CORE2(i)                                                        \
113
+    CORE( i,   a,b,c,d); CORE((i+1),d,a,b,c);                           \
114
+    CORE((i+2),c,d,a,b); CORE((i+3),b,c,d,a)
115
+#define CORE4(i) CORE2(i); CORE2((i+4)); CORE2((i+8)); CORE2((i+12))
116
+    CORE4(0); CORE4(16); CORE4(32); CORE4(48);
110 117
 #endif
111 118
 
112 119
     ABCD[0] += d;
... ...
@@ -115,8 +123,9 @@ CORE4(0) CORE4(16) CORE4(32) CORE4(48)
115 115
     ABCD[3] += a;
116 116
 }
117 117
 
118
-void av_md5_init(AVMD5 *ctx){
119
-    ctx->len    = 0;
118
+void av_md5_init(AVMD5 *ctx)
119
+{
120
+    ctx->len     = 0;
120 121
 
121 122
     ctx->ABCD[0] = 0x10325476;
122 123
     ctx->ABCD[1] = 0x98badcfe;
... ...
@@ -124,59 +133,72 @@ void av_md5_init(AVMD5 *ctx){
124 124
     ctx->ABCD[3] = 0x67452301;
125 125
 }
126 126
 
127
-void av_md5_update(AVMD5 *ctx, const uint8_t *src, const int len){
127
+void av_md5_update(AVMD5 *ctx, const uint8_t *src, const int len)
128
+{
128 129
     int i, j;
129 130
 
130
-    j= ctx->len & 63;
131
+    j = ctx->len & 63;
131 132
     ctx->len += len;
132 133
 
133
-    for( i = 0; i < len; i++ ){
134
+    for (i = 0; i < len; i++) {
134 135
         ctx->block[j++] = src[i];
135
-        if( 64 == j ){
136
-            body(ctx->ABCD, (uint32_t*) ctx->block);
136
+        if (j == 64) {
137
+            body(ctx->ABCD, (uint32_t *) ctx->block);
137 138
             j = 0;
138 139
         }
139 140
     }
140 141
 }
141 142
 
142
-void av_md5_final(AVMD5 *ctx, uint8_t *dst){
143
+void av_md5_final(AVMD5 *ctx, uint8_t *dst)
144
+{
143 145
     int i;
144
-    uint64_t finalcount= av_le2ne64(ctx->len<<3);
146
+    uint64_t finalcount = av_le2ne64(ctx->len << 3);
145 147
 
146 148
     av_md5_update(ctx, "\200", 1);
147
-    while((ctx->len & 63)!=56)
149
+    while ((ctx->len & 63) != 56)
148 150
         av_md5_update(ctx, "", 1);
149 151
 
150
-    av_md5_update(ctx, (uint8_t*)&finalcount, 8);
152
+    av_md5_update(ctx, (uint8_t *)&finalcount, 8);
151 153
 
152
-    for(i=0; i<4; i++)
153
-        ((uint32_t*)dst)[i]= av_le2ne32(ctx->ABCD[3-i]);
154
+    for (i = 0; i < 4; i++)
155
+        AV_WL32(dst + 4*i, ctx->ABCD[3 - i]);
154 156
 }
155 157
 
156
-void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len){
157
-    AVMD5 ctx[1];
158
+void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len)
159
+{
160
+    AVMD5 ctx;
158 161
 
159
-    av_md5_init(ctx);
160
-    av_md5_update(ctx, src, len);
161
-    av_md5_final(ctx, dst);
162
+    av_md5_init(&ctx);
163
+    av_md5_update(&ctx, src, len);
164
+    av_md5_final(&ctx, dst);
162 165
 }
163 166
 
164 167
 #ifdef TEST
165
-#include <stdio.h>
166
-#include <inttypes.h>
167 168
 #undef printf
169
+#include <stdio.h>
170
+
171
+static void print_md5(uint8_t *md5)
172
+{
173
+    int i;
174
+    for (i = 0; i < 16; i++)
175
+        printf("%02x", md5[i]);
176
+    printf("\n");
177
+}
178
+
168 179
 int main(void){
169
-    uint64_t md5val;
180
+    uint8_t md5val[16];
170 181
     int i;
171 182
     uint8_t in[1000];
172 183
 
173
-    for(i=0; i<1000; i++) in[i]= i*i;
174
-    av_md5_sum( (uint8_t*)&md5val, in,  1000); printf("%"PRId64"\n", md5val);
175
-    av_md5_sum( (uint8_t*)&md5val, in,  63); printf("%"PRId64"\n", md5val);
176
-    av_md5_sum( (uint8_t*)&md5val, in,  64); printf("%"PRId64"\n", md5val);
177
-    av_md5_sum( (uint8_t*)&md5val, in,  65); printf("%"PRId64"\n", md5val);
178
-    for(i=0; i<1000; i++) in[i]= i % 127;
179
-    av_md5_sum( (uint8_t*)&md5val, in,  999); printf("%"PRId64"\n", md5val);
184
+    for (i = 0; i < 1000; i++)
185
+        in[i] = i * i;
186
+    av_md5_sum(md5val, in, 1000); print_md5(md5val);
187
+    av_md5_sum(md5val, in,   63); print_md5(md5val);
188
+    av_md5_sum(md5val, in,   64); print_md5(md5val);
189
+    av_md5_sum(md5val, in,   65); print_md5(md5val);
190
+    for (i = 0; i < 1000; i++)
191
+        in[i] = i % 127;
192
+    av_md5_sum(md5val, in,  999); print_md5(md5val);
180 193
 
181 194
     return 0;
182 195
 }
... ...
@@ -30,6 +30,7 @@
30 30
 #include "opt.h"
31 31
 #include "eval.h"
32 32
 #include "dict.h"
33
+#include "log.h"
33 34
 
34 35
 #if FF_API_FIND_OPT
35 36
 //FIXME order them and do a bin search
... ...
@@ -195,7 +196,6 @@ int av_set_string3(void *obj, const char *name, const char *val, int alloc, cons
195 195
                 return 0;
196 196
             notfirst=1;
197 197
         }
198
-        return AVERROR(EINVAL);
199 198
     }
200 199
 
201 200
     if (alloc) {
... ...
@@ -28,6 +28,7 @@
28 28
 #include "avstring.h"
29 29
 #include "avutil.h"
30 30
 #include "eval.h"
31
+#include "log.h"
31 32
 #include "random_seed.h"
32 33
 #include "parseutils.h"
33 34
 
... ...
@@ -462,7 +463,6 @@ const char *small_strptime(const char *p, const char *fmt,
462 462
             p++;
463 463
         }
464 464
     }
465
-    return p;
466 465
 }
467 466
 
468 467
 static time_t mktimegm(struct tm *tm)
... ...
@@ -28,6 +28,7 @@
28 28
 
29 29
 #include <string.h>
30 30
 #include "libavutil/avutil.h"
31
+#include "libavutil/log.h"
31 32
 #include "postprocess.h"
32 33
 
33 34
 #define V_DEBLOCK       0x01
... ...
@@ -28,6 +28,8 @@
28 28
  */
29 29
 
30 30
 #include "libavutil/avutil.h"
31
+#include "libavutil/log.h"
32
+#include "libavutil/pixfmt.h"
31 33
 
32 34
 #define LIBSWSCALE_VERSION_MAJOR 2
33 35
 #define LIBSWSCALE_VERSION_MINOR 0
... ...
@@ -247,7 +249,6 @@ int sws_scale_ordered(struct SwsContext *context, const uint8_t* const src[],
247 247
 
248 248
 /**
249 249
  * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
250
- * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
251 250
  * @return -1 if not supported
252 251
  */
253 252
 int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
... ...
@@ -28,6 +28,8 @@
28 28
 #endif
29 29
 
30 30
 #include "libavutil/avutil.h"
31
+#include "libavutil/log.h"
32
+#include "libavutil/pixfmt.h"
31 33
 
32 34
 #define STR(s)         AV_TOSTRING(s) //AV_STRINGIFY is too long
33 35
 
... ...
@@ -43,6 +43,7 @@ include $(SRC_PATH)/tests/fate/amrnb.mak
43 43
 include $(SRC_PATH)/tests/fate/amrwb.mak
44 44
 include $(SRC_PATH)/tests/fate/fft.mak
45 45
 include $(SRC_PATH)/tests/fate/h264.mak
46
+include $(SRC_PATH)/tests/fate/libavutil.mak
46 47
 include $(SRC_PATH)/tests/fate/mp3.mak
47 48
 include $(SRC_PATH)/tests/fate/vorbis.mak
48 49
 include $(SRC_PATH)/tests/fate/vp8.mak
49 50
new file mode 100644
... ...
@@ -0,0 +1,30 @@
0
+FATE_TESTS += fate-adler32
1
+fate-adler32: libavutil/adler32-test$(EXESUF)
2
+fate-adler32: CMD = run libavutil/adler32-test
3
+fate-adler32: REF = /dev/null
4
+
5
+FATE_TESTS += fate-aes
6
+fate-aes: libavutil/aes-test$(EXESUF)
7
+fate-aes: CMD = run libavutil/aes-test
8
+fate-aes: REF = /dev/null
9
+
10
+FATE_TESTS += fate-base64
11
+fate-base64: libavutil/base64-test$(EXESUF)
12
+fate-base64: CMD = run libavutil/base64-test
13
+
14
+FATE_TESTS += fate-crc
15
+fate-crc: libavutil/crc-test$(EXESUF)
16
+fate-crc: CMD = run libavutil/crc-test
17
+
18
+FATE_TESTS += fate-des
19
+fate-des: libavutil/des-test$(EXESUF)
20
+fate-des: CMD = run libavutil/des-test
21
+fate-des: REF = /dev/null
22
+
23
+FATE_TESTS += fate-md5
24
+fate-md5: libavutil/md5-test$(EXESUF)
25
+fate-md5: CMD = run libavutil/md5-test
26
+
27
+FATE_TESTS += fate-sha
28
+fate-sha: libavutil/sha-test$(EXESUF)
29
+fate-sha: CMD = run libavutil/sha-test
... ...
@@ -213,24 +213,6 @@ fate-mjpegb: CMD = framecrc -idct simple -flags +bitexact -i $(SAMPLES)/mjpegb/m
213 213
 FATE_TESTS += fate-rv30
214 214
 fate-rv30: CMD = framecrc -flags +bitexact -dct fastint -idct simple -i $(SAMPLES)/real/rv30.rm -an
215 215
 
216
-FATE_TESTS += fate-sha
217
-fate-sha: libavutil/sha-test$(EXESUF)
218
-fate-sha: CMD = run libavutil/sha-test
219
-
220
-FATE_TESTS += fate-adler32
221
-fate-adler32: libavutil/adler32-test$(EXESUF)
222
-fate-adler32: CMD = run libavutil/adler32-test
223
-fate-adler32: REF = /dev/null
224
-
225
-FATE_TESTS += fate-aes
226
-fate-aes: libavutil/aes-test$(EXESUF)
227
-fate-aes: CMD = run libavutil/aes-test
228
-fate-aes: REF = /dev/null
229
-
230
-FATE_TESTS += fate-base64
231
-fate-base64: libavutil/base64-test$(EXESUF)
232
-fate-base64: CMD = run libavutil/base64-test
233
-
234 216
 FATE_TESTS += fate-musepack7
235 217
 fate-musepack7: CMD = pcm -i $(SAMPLES)/musepack/inside-mp7.mpc
236 218
 fate-musepack7: CMP = oneoff
237 219
new file mode 100644
... ...
@@ -0,0 +1,4 @@
0
+crc EDB88320 =3D5CDD04
1
+crc 04C11DB7 =E0BAF5C0
2
+crc 00008005 =BB1F
3
+crc 00000007 =E3
0 4
new file mode 100644
... ...
@@ -0,0 +1,5 @@
0
+0bf1bcc8a1d72e2cf58d42182b637e56
1
+993a3eb298e52aca83ecfbb6a766b4d0
2
+07c01ca7c733475fad38c84c56f305c1
3
+9fc8404827cac26385f48f4f58fd32ce
4
+a22bfef14302c5ca46e0ae91092bc0e0
... ...
@@ -25,6 +25,7 @@
25 25
 #include <string.h>
26 26
 
27 27
 #include "libavutil/common.h"
28
+#include "libavutil/mathematics.h"
28 29
 #include "libavformat/avformat.h"
29 30
 
30 31
 #undef exit