xiph / rav1e

The fastest and safest AV1 encoder.
BSD 2-Clause "Simplified" License
3.63k stars 246 forks source link

Add Rust-native implementation matching available dav1d assembly #3077

Open barrbrain opened 1 year ago

barrbrain commented 1 year ago

For x86_64 builds of rav1e at the time of opening this issue, there are 212 functions included in assembly that are not referenced by rav1e Rust code. These mostly correspond to features that are yet unimplemented. Some are for existing features with assembly integration friction.

``` rav1e_blend_16bpc_avx2 rav1e_blend_16bpc_ssse3 rav1e_blend_8bpc_avx2 rav1e_blend_8bpc_avx512icl rav1e_blend_8bpc_ssse3 rav1e_blend_h_16bpc_avx2 rav1e_blend_h_16bpc_ssse3 rav1e_blend_h_8bpc_avx2 rav1e_blend_h_8bpc_avx512icl rav1e_blend_h_8bpc_ssse3 rav1e_blend_v_16bpc_avx2 rav1e_blend_v_16bpc_ssse3 rav1e_blend_v_8bpc_avx2 rav1e_blend_v_8bpc_avx512icl rav1e_blend_v_8bpc_ssse3 rav1e_cdef_dir_16bpc_sse4 rav1e_cdef_dir_16bpc_ssse3 rav1e_cdef_dir_8bpc_sse4 rav1e_cdef_dir_8bpc_ssse3 rav1e_cdef_filter_4x4_16bpc_avx2 rav1e_cdef_filter_4x4_16bpc_ssse3 rav1e_cdef_filter_4x4_8bpc_sse2 rav1e_cdef_filter_4x4_8bpc_sse4 rav1e_cdef_filter_4x4_8bpc_ssse3 rav1e_cdef_filter_4x8_16bpc_avx2 rav1e_cdef_filter_4x8_16bpc_ssse3 rav1e_cdef_filter_4x8_8bpc_sse2 rav1e_cdef_filter_4x8_8bpc_sse4 rav1e_cdef_filter_4x8_8bpc_ssse3 rav1e_cdef_filter_8x8_16bpc_avx2 rav1e_cdef_filter_8x8_16bpc_ssse3 rav1e_cdef_filter_8x8_8bpc_sse2 rav1e_cdef_filter_8x8_8bpc_sse4 rav1e_cdef_filter_8x8_8bpc_ssse3 rav1e_emu_edge_16bpc_avx2 rav1e_emu_edge_16bpc_ssse3 rav1e_emu_edge_8bpc_avx2 rav1e_emu_edge_8bpc_ssse3 rav1e_inv_txfm_add_wht_wht_4x4_16bpc_avx2 rav1e_inv_txfm_add_wht_wht_4x4_16bpc_sse2 rav1e_inv_txfm_add_wht_wht_4x4_8bpc_avx2 rav1e_inv_txfm_add_wht_wht_4x4_8bpc_sse2 rav1e_ipred_cfl_ac_420_16bpc_avx2 rav1e_ipred_cfl_ac_420_8bpc_avx2 rav1e_ipred_cfl_ac_420_8bpc_ssse3 rav1e_ipred_cfl_ac_422_16bpc_avx2 rav1e_ipred_cfl_ac_422_8bpc_avx2 rav1e_ipred_cfl_ac_422_8bpc_ssse3 rav1e_ipred_cfl_ac_444_16bpc_avx2 rav1e_ipred_cfl_ac_444_8bpc_avx2 rav1e_ipred_cfl_ac_444_8bpc_ssse3 rav1e_ipred_filter_16bpc_avx2 rav1e_ipred_filter_8bpc_avx2 rav1e_ipred_filter_8bpc_ssse3 rav1e_mask_16bpc_avx2 rav1e_mask_16bpc_ssse3 rav1e_mask_8bpc_avx2 rav1e_mask_8bpc_avx512icl rav1e_mask_8bpc_ssse3 rav1e_obmc_masks_avx2 rav1e_pal_pred_16bpc_avx2 rav1e_pal_pred_8bpc_avx2 rav1e_pal_pred_8bpc_ssse3 rav1e_prep_8tap_16bpc_avx2 rav1e_prep_8tap_16bpc_ssse3 rav1e_prep_8tap_8bpc_avx2 rav1e_prep_8tap_8bpc_avx512icl rav1e_prep_8tap_8bpc_sse2 rav1e_prep_8tap_8bpc_ssse3 rav1e_prep_8tap_scaled_16bpc_avx2 rav1e_prep_8tap_scaled_16bpc_ssse3 rav1e_prep_8tap_scaled_8bpc_avx2 rav1e_prep_8tap_scaled_8bpc_ssse3 rav1e_prep_8tap_scaled_regular_16bpc_avx2 rav1e_prep_8tap_scaled_regular_16bpc_ssse3 rav1e_prep_8tap_scaled_regular_8bpc_avx2 rav1e_prep_8tap_scaled_regular_8bpc_ssse3 rav1e_prep_8tap_scaled_regular_sharp_16bpc_avx2 rav1e_prep_8tap_scaled_regular_sharp_16bpc_ssse3 rav1e_prep_8tap_scaled_regular_sharp_8bpc_avx2 rav1e_prep_8tap_scaled_regular_sharp_8bpc_ssse3 rav1e_prep_8tap_scaled_regular_smooth_16bpc_avx2 rav1e_prep_8tap_scaled_regular_smooth_16bpc_ssse3 rav1e_prep_8tap_scaled_regular_smooth_8bpc_avx2 rav1e_prep_8tap_scaled_regular_smooth_8bpc_ssse3 rav1e_prep_8tap_scaled_sharp_16bpc_avx2 rav1e_prep_8tap_scaled_sharp_16bpc_ssse3 rav1e_prep_8tap_scaled_sharp_8bpc_avx2 rav1e_prep_8tap_scaled_sharp_8bpc_ssse3 rav1e_prep_8tap_scaled_sharp_regular_16bpc_avx2 rav1e_prep_8tap_scaled_sharp_regular_16bpc_ssse3 rav1e_prep_8tap_scaled_sharp_regular_8bpc_avx2 rav1e_prep_8tap_scaled_sharp_regular_8bpc_ssse3 rav1e_prep_8tap_scaled_sharp_smooth_16bpc_avx2 rav1e_prep_8tap_scaled_sharp_smooth_16bpc_ssse3 rav1e_prep_8tap_scaled_sharp_smooth_8bpc_avx2 rav1e_prep_8tap_scaled_sharp_smooth_8bpc_ssse3 rav1e_prep_8tap_scaled_smooth_16bpc_avx2 rav1e_prep_8tap_scaled_smooth_16bpc_ssse3 rav1e_prep_8tap_scaled_smooth_8bpc_avx2 rav1e_prep_8tap_scaled_smooth_8bpc_ssse3 rav1e_prep_8tap_scaled_smooth_regular_16bpc_avx2 rav1e_prep_8tap_scaled_smooth_regular_16bpc_ssse3 rav1e_prep_8tap_scaled_smooth_regular_8bpc_avx2 rav1e_prep_8tap_scaled_smooth_regular_8bpc_ssse3 rav1e_prep_8tap_scaled_smooth_sharp_16bpc_avx2 rav1e_prep_8tap_scaled_smooth_sharp_16bpc_ssse3 rav1e_prep_8tap_scaled_smooth_sharp_8bpc_avx2 rav1e_prep_8tap_scaled_smooth_sharp_8bpc_ssse3 rav1e_prep_bilin_scaled_16bpc_avx2 rav1e_prep_bilin_scaled_16bpc_ssse3 rav1e_prep_bilin_scaled_8bpc_avx2 rav1e_prep_bilin_scaled_8bpc_ssse3 rav1e_put_8tap_16bpc_avx2 rav1e_put_8tap_16bpc_ssse3 rav1e_put_8tap_8bpc_avx2 rav1e_put_8tap_8bpc_avx512icl rav1e_put_8tap_8bpc_ssse3 rav1e_put_8tap_scaled_16bpc_avx2 rav1e_put_8tap_scaled_16bpc_ssse3 rav1e_put_8tap_scaled_8bpc_avx2 rav1e_put_8tap_scaled_8bpc_ssse3 rav1e_put_8tap_scaled_regular_16bpc_avx2 rav1e_put_8tap_scaled_regular_16bpc_ssse3 rav1e_put_8tap_scaled_regular_8bpc_avx2 rav1e_put_8tap_scaled_regular_8bpc_ssse3 rav1e_put_8tap_scaled_regular_sharp_16bpc_avx2 rav1e_put_8tap_scaled_regular_sharp_16bpc_ssse3 rav1e_put_8tap_scaled_regular_sharp_8bpc_avx2 rav1e_put_8tap_scaled_regular_sharp_8bpc_ssse3 rav1e_put_8tap_scaled_regular_smooth_16bpc_avx2 rav1e_put_8tap_scaled_regular_smooth_16bpc_ssse3 rav1e_put_8tap_scaled_regular_smooth_8bpc_avx2 rav1e_put_8tap_scaled_regular_smooth_8bpc_ssse3 rav1e_put_8tap_scaled_sharp_16bpc_avx2 rav1e_put_8tap_scaled_sharp_16bpc_ssse3 rav1e_put_8tap_scaled_sharp_8bpc_avx2 rav1e_put_8tap_scaled_sharp_8bpc_ssse3 rav1e_put_8tap_scaled_sharp_regular_16bpc_avx2 rav1e_put_8tap_scaled_sharp_regular_16bpc_ssse3 rav1e_put_8tap_scaled_sharp_regular_8bpc_avx2 rav1e_put_8tap_scaled_sharp_regular_8bpc_ssse3 rav1e_put_8tap_scaled_sharp_smooth_16bpc_avx2 rav1e_put_8tap_scaled_sharp_smooth_16bpc_ssse3 rav1e_put_8tap_scaled_sharp_smooth_8bpc_avx2 rav1e_put_8tap_scaled_sharp_smooth_8bpc_ssse3 rav1e_put_8tap_scaled_smooth_16bpc_avx2 rav1e_put_8tap_scaled_smooth_16bpc_ssse3 rav1e_put_8tap_scaled_smooth_8bpc_avx2 rav1e_put_8tap_scaled_smooth_8bpc_ssse3 rav1e_put_8tap_scaled_smooth_regular_16bpc_avx2 rav1e_put_8tap_scaled_smooth_regular_16bpc_ssse3 rav1e_put_8tap_scaled_smooth_regular_8bpc_avx2 rav1e_put_8tap_scaled_smooth_regular_8bpc_ssse3 rav1e_put_8tap_scaled_smooth_sharp_16bpc_avx2 rav1e_put_8tap_scaled_smooth_sharp_16bpc_ssse3 rav1e_put_8tap_scaled_smooth_sharp_8bpc_avx2 rav1e_put_8tap_scaled_smooth_sharp_8bpc_ssse3 rav1e_put_bilin_scaled_16bpc_avx2 rav1e_put_bilin_scaled_16bpc_ssse3 rav1e_put_bilin_scaled_8bpc_avx2 rav1e_put_bilin_scaled_8bpc_ssse3 rav1e_resize_16bpc_avx2 rav1e_resize_16bpc_ssse3 rav1e_resize_8bpc_avx2 rav1e_resize_8bpc_avx512icl rav1e_resize_8bpc_ssse3 rav1e_sgr_filter_3x3_16bpc_avx2 rav1e_sgr_filter_3x3_8bpc_avx2 rav1e_sgr_filter_5x5_16bpc_avx2 rav1e_sgr_filter_5x5_8bpc_avx2 rav1e_sgr_filter_mix_16bpc_avx2 rav1e_sgr_filter_mix_8bpc_avx2 rav1e_sgr_x_by_x_avx2 rav1e_warp_affine_8x8_16bpc_avx2 rav1e_warp_affine_8x8_16bpc_ssse3 rav1e_warp_affine_8x8_8bpc_avx2 rav1e_warp_affine_8x8_8bpc_avx512icl rav1e_warp_affine_8x8_8bpc_sse2 rav1e_warp_affine_8x8_8bpc_sse4 rav1e_warp_affine_8x8_8bpc_ssse3 rav1e_warp_affine_8x8t_16bpc_avx2 rav1e_warp_affine_8x8t_16bpc_ssse3 rav1e_warp_affine_8x8t_8bpc_avx2 rav1e_warp_affine_8x8t_8bpc_avx512icl rav1e_warp_affine_8x8t_8bpc_sse2 rav1e_warp_affine_8x8t_8bpc_sse4 rav1e_warp_affine_8x8t_8bpc_ssse3 rav1e_w_avg_16bpc_avx2 rav1e_w_avg_16bpc_ssse3 rav1e_w_avg_8bpc_avx2 rav1e_w_avg_8bpc_avx512icl rav1e_w_avg_8bpc_ssse3 rav1e_wiener_filter5_16bpc_avx2 rav1e_wiener_filter5_8bpc_avx2 rav1e_wiener_filter7_16bpc_avx2 rav1e_wiener_filter7_8bpc_avx2 rav1e_w_mask_420_16bpc_avx2 rav1e_w_mask_420_16bpc_ssse3 rav1e_w_mask_420_8bpc_avx2 rav1e_w_mask_420_8bpc_avx512icl rav1e_w_mask_420_8bpc_ssse3 rav1e_w_mask_422_16bpc_avx2 rav1e_w_mask_422_16bpc_ssse3 rav1e_w_mask_422_8bpc_avx2 rav1e_w_mask_422_8bpc_avx512icl rav1e_w_mask_422_8bpc_ssse3 rav1e_w_mask_444_16bpc_avx2 rav1e_w_mask_444_16bpc_ssse3 rav1e_w_mask_444_8bpc_avx2 rav1e_w_mask_444_8bpc_avx512icl rav1e_w_mask_444_8bpc_ssse3 ```
barrbrain commented 6 months ago

There is a new project related to this issue: https://github.com/memorysafety/rav1d When it reaches initial release, we might attempt integration.