源码阅读 x264 - 运动补偿

发表于 2021-01-22 更新于 2021-11-07 分类于源码阅读

本文主要记录 x264 中的 x264_mc_init 函数，该函数主要对 x264_mc_functions_t 结构体中的函数指针进行赋值，完成了像素内插、拷贝、求平均的等运动补偿相关函数的初始化。

void x264_mc_init(uint32_t cpu, x264_mc_functions_t *pf, int cpu_independent)
{
    pf->mc_luma   = mc_luma;
    pf->get_ref   = get_ref;

    pf->mc_chroma = mc_chroma;

    pf->avg[PIXEL_16x16]= pixel_avg_16x16;
    pf->avg[PIXEL_16x8] = pixel_avg_16x8;
    pf->avg[PIXEL_8x16] = pixel_avg_8x16;
    pf->avg[PIXEL_8x8]  = pixel_avg_8x8;
    pf->avg[PIXEL_8x4]  = pixel_avg_8x4;
    pf->avg[PIXEL_4x16] = pixel_avg_4x16;
    pf->avg[PIXEL_4x8]  = pixel_avg_4x8;
    pf->avg[PIXEL_4x4]  = pixel_avg_4x4;
    pf->avg[PIXEL_4x2]  = pixel_avg_4x2;
    pf->avg[PIXEL_2x8]  = pixel_avg_2x8;
    pf->avg[PIXEL_2x4]  = pixel_avg_2x4;
    pf->avg[PIXEL_2x2]  = pixel_avg_2x2;

    pf->weight    = mc_weight_wtab;
    pf->offsetadd = mc_weight_wtab;
    pf->offsetsub = mc_weight_wtab;
    pf->weight_cache = weight_cache;

    pf->copy_16x16_unaligned = mc_copy_w16;
    pf->copy[PIXEL_16x16] = mc_copy_w16;
    pf->copy[PIXEL_8x8]   = mc_copy_w8;
    pf->copy[PIXEL_4x4]   = mc_copy_w4;

    pf->store_interleave_chroma       = store_interleave_chroma;
    pf->load_deinterleave_chroma_fenc = load_deinterleave_chroma_fenc;
    pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;

    pf->plane_copy = x264_plane_copy_c;
    pf->plane_copy_swap = x264_plane_copy_swap_c;
    pf->plane_copy_interleave = x264_plane_copy_interleave_c;

    pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
    pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_c;
    pf->plane_copy_deinterleave_rgb = plane_copy_deinterleave_rgb_c;
    pf->plane_copy_deinterleave_v210 = plane_copy_deinterleave_v210_c;

    pf->hpel_filter = hpel_filter;

    pf->prefetch_fenc_400 = prefetch_fenc_null;
    pf->prefetch_fenc_420 = prefetch_fenc_null;
    pf->prefetch_fenc_422 = prefetch_fenc_null;
    pf->prefetch_ref  = prefetch_ref_null;
    pf->memcpy_aligned = memcpy;
    pf->memzero_aligned = memzero_aligned;
    pf->frame_init_lowres_core = frame_init_lowres_core;

    pf->integral_init4h = integral_init4h;
    pf->integral_init8h = integral_init8h;
    pf->integral_init4v = integral_init4v;
    pf->integral_init8v = integral_init8v;

    pf->mbtree_propagate_cost = mbtree_propagate_cost;
    pf->mbtree_propagate_list = mbtree_propagate_list;
    pf->mbtree_fix8_pack      = mbtree_fix8_pack;
    pf->mbtree_fix8_unpack    = mbtree_fix8_unpack;

    /* 此处省略台的汇编函数初始化代码 */
    ......
}

由于运动估计和运动补偿在 x264 中属于相对复杂的环节，其中许多函数的作用很难三言两语表述出来，因此只分析两个相对重要的例子：

半像素内插函数 hpel_filter()
获取亚像素数据的函数 get_ref()

半像素内插函数

hpel_filter() 用于进行半像素插值，函数定义如下所示：

// 半像素插值公式
// b = (E - 5F + 20G + 20H - 5I + J) / 32
// d 取 1，水平滤波器
// d 取 stride，垂直滤波器（这里没有除以 32）
#define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))

// x > 0 return 0;
// 0 <= x <= 255 return x;
// x > 255 return 255;
static ALWAYS_INLINE pixel x264_clip_pixel(int x) {
    return ((x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
}

/*
 * 半像素插值
 * dsth : 水平滤波得到的半像素点 (aa,bb,b,s,gg,hh)
 * dstv : 垂直滤波的到的半像素点 (cc,dd,h,m,ee,ff)
 * dstc : "水平 + 垂直" 滤波得到的位于 4 个像素中间的半像素点（j）
 */
static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
                         intptr_t stride, int width, int height, int16_t *buf ) {
    const int pad = (BIT_DEPTH> 9) ? (-10 * PIXEL_MAX) : 0;
    /*
     * 几种半像素点之间的位置关系
     *
     * X : 像素点
     * H : 水平滤波半像素点
     * V : 垂直滤波半像素点
     * C : 中间位置半像素点
     *
     * X   H   X       X       X
     *
     * V   C
     *
     * X       X       X       X
     *
     *
     *
     * X       X       X       X
     *
     */
    // 一行一行处理
    for (int y = 0; y < height; y++) {
        for (int x = -2; x < width + 3; x++) {
            int v = TAPFILTER(src,stride);
            dstv[x] = x264_clip_pixel( (v + 16) >> 5 );
            /* transform v for storage in a 16-bit integer */
            buf[x+2] = v + pad;
        }
        for (int x = 0; x < width; x++)
            dstc[x] = x264_clip_pixel( (TAPFILTER(buf+2,1) - 32*pad + 512) >> 10 );
        for (int x = 0; x < width; x++)
            dsth[x] = x264_clip_pixel( (TAPFILTER(src,1) + 16) >> 5 );
        dsth += stride;
        dstv += stride;
        dstc += stride;
        src  += stride;
    }
}

半像素插值示意图如下：

半像素点的计算关系如下：

m : 由 B、D、H、N、S、U 计算
h : 由 A、C、G、M、R、T 计算
s : 由 K、L、M、N、P、Q 计算
j : 由 cc、dd、h、m、ee、ff 计算。需要注意 j 点的运算量比较大，因为 cc、dd、ee、ff 都需要通过半像素内插方法进行计算。

获取亚像素数据

get_ref() 函数用于获取亚像素数据

const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2};

// 获取运动矢量中亚像素的部分的数据
// 可以是半像素数据或者 1/4 像素数据
static pixel *get_ref( pixel *dst,   intptr_t *i_dst_stride,
                       pixel *src[4], intptr_t i_src_stride,
                       int mvx, int mvy,
                       int i_width, int i_height, const x264_weight_t *weight ) {
    /*
     * qpel_idx 为 hpel_ref0[]，hpel_ref1[] 的索引值
     *
     * 运动矢量 (mvy,mvx) 位置和 qpel_idx 对应关系如下
     *  0pixel |   0p   | 1/4p   | 1/2p   | 3/4p   | 1pixel |
     * --------+--------+--------+--------+--------+--------+
     *      0p | 0<<2+0 | 0<<2+1 | 0<<2+2 | 0<<2+3 |        |
     * --------+--------+--------+--------+--------+--------+
     *    1/4p | 1<<2+0 | 1<<2+1 | 1<<2+2 | 1<<2+3 |        |
     * --------+--------+--------+--------+--------+--------+
     *    1/2p | 2<<2+0 | 2<<2+1 | 2<<2+2 | 2<<2+3 |        |
     * --------+--------+--------+--------+--------+--------+
     *    3/4p | 3<<2+0 | 3<<2+1 | 3<<2+2 | 3<<2+3 |        |
     * --------+--------+--------+--------+--------+--------+
     *  1pixel |
     * --------+
     * 计算出来后
     *  0pixel |   0p   | 1/4p   | 1/2p   | 3/4p   | 1pixel |
     * --------+--------+--------+--------+--------+--------+
     *      0p |      0 |      1 |      2 |      3 |        |
     * --------+--------+--------+--------+--------+--------+
     *    1/4p |      4 |      5 |      6 |      7 |        |
     * --------+--------+--------+--------+--------+--------+
     *    1/2p |      8 |      9 |     10 |     11 |        |
     * --------+--------+--------+--------+--------+--------+
     *    3/4p |     12 |     13 |     14 |     15 |        |
     * --------+--------+--------+--------+--------+--------+
     *  1pixel |
     * --------+
     */
    int qpel_idx = ((mvy&3)<<2) + (mvx&3);
    // offset 是匹配块相对当前宏块的整数偏移量
    int offset = (mvy>>2)*i_src_stride + (mvx>>2);

    // src[4] 中有 4 个分量，分别代表：整像素点 Full，水平半像素点 H，垂直半像素点 V，对角线半像素点 C 的取值
    // 几种半像素点的值已经提前计算出来，而 1/4 像素点的值则是临时计算
    // 注意上述几种半像素点是按照 "分量" 的方式存储的

    // src1[] 为选择后的半像素数据
    // 选择了 Full,H,V,C 几种 "分量" 中的 1 种
    pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;

    // qpel_idx & 5，5 是 0101， 代表 qpel_idx 最后 1 位（对应 x 分量）为 1 或者倒数第 3 位为 1（对应 y 分量）
    // 即 x 或者 y 中有 1/4 或者 3/4 像素点（此时需要 1/4 像素内插）
    // 只有需要 1/4 内插的点才会 qpel_idx & 5!=0。这时候需要通过线性内插获得 1/4 像素点的值
    if (qpel_idx & 5) /* qpel interpolation needed */ {
        // src2[] 为用于内插的数据另一组数据
        pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
        // 进行 1/4 像素线性内插
        pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
                   src2, i_src_stride, i_width, i_height );
        if (weight->weightfn )
            mc_weight(dst, *i_dst_stride, dst, *i_dst_stride, weight, i_width, i_height);
        return dst;
    } else if (weight->weightfn ) {
        mc_weight(dst, *i_dst_stride, src1, i_src_stride, weight, i_width, i_height);
        return dst;
    } else {
        *i_dst_stride = i_src_stride; // 只需要半像素滤波
        return src1;
    }
}