分享

ffmpeg音频重采样分析

 mediatv 2016-02-27

  • 主机环境:Windows XP

    开发环境:CodeBlocks13.12

    ffmpeg版本:ffmpeg2.4

    在学习ffmpeg教程链接地址的 过程中,由于教程中的ffmpeg版本较低,与ffmpeg2.4API有些许出入,解码后的音频不能直接播放了,在ffplayer.c中都对解码后的 音频进行了重采样操作,于是乎去了解了一下重采样的相关知识,学习例程是ffmpeg2.4源代码目录下的doc/examples /resampling_audio.c文件,为便于学习修改后的代码如下

    001/*
    002 * Copyright (c) 2012 Stefano Sabatini
    003 *
    004 * Permission is hereby granted, free of charge, to any person obtaining a copy
    005 * of this software and associated documentation files (the "Software"), to deal
    006 * in the Software without restriction, including without limitation the rights
    007 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    008 * copies of the Software, and to permit persons to whom the Software is
    009 * furnished to do so, subject to the following conditions:
    010 *
    011 * The above copyright notice and this permission notice shall be included in
    012 * all copies or substantial portions of the Software.
    013 *
    014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
    017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    019 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    020 * THE SOFTWARE.
    021 */
    022 
    023/**
    024 * @example resampling_audio.c
    025 * libswresample API use example.
    026 */
    027 
    028#include <libavutil/opt.h>
    029#include <libavutil/channel_layout.h>
    030#include <libavutil/samplefmt.h>
    031#include <libswresample/swresample.h>
    032 
    033/*
    034获取采样格式
    035*/
    036static int get_format_from_sample_fmt(const char **fmt,
    037                                      enum AVSampleFormat sample_fmt)
    038{
    039    int i;
    040    struct sample_fmt_entry {
    041        enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;
    042    } sample_fmt_entries[] = {
    043        { AV_SAMPLE_FMT_U8,  "u8",    "u8"    },
    044        { AV_SAMPLE_FMT_S16, "s16be", "s16le" },
    045        { AV_SAMPLE_FMT_S32, "s32be", "s32le" },
    046        { AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
    047        { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
    048    };
    049    *fmt = NULL;
    050 
    051    for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
    052        struct sample_fmt_entry *entry = &sample_fmt_entries[i];
    053        if (sample_fmt == entry->sample_fmt) {
    054            *fmt = AV_NE(entry->fmt_be, entry->fmt_le);
    055            return 0;
    056        }
    057    }
    058 
    059    fprintf(stderr,
    060            "Sample format %s not supported as output format\n",
    061            av_get_sample_fmt_name(sample_fmt));
    062    return AVERROR(EINVAL);
    063}
    064 
    065/**
    066 * Fill dst buffer with nb_samples, generated starting from t.
    067 *使用nb_samples 填充dst buffer,确保从t开始
    068 *在440hz的曲线上以sample_rate的频率取nb_samples个样本点存储在dst中,nb_channels通道数据都一样
    069 */
    070static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
    071{
    072    int i, j;
    073    double tincr = 1.0 / sample_rate, *dstp = dst;//tincr是时间间隔
    074    const double c = 2 * M_PI * 440.0;//频率440Hz
    075    /* generate sin tone with 440Hz frequency and duplicated channels */
    076    for (i = 0; i < nb_samples; i++) {
    077        *dstp = sin(c * *t);//得到曲线上的采样点
    078        for (j = 1; j < nb_channels; j++)
    079            dstp[j] = dstp[0];//每一个通道都与第一个通道写一样的值
    080        dstp += nb_channels;//顺序写满通道后移动采样点
    081        *t += tincr;//时间向后移动
    082    }
    083}
    084 
    085int main(int argc, char **argv)
    086{
    087    int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_SURROUND;
    088    //源文件布局为立体声,目的文件布局为立体环绕声
    089    int src_rate = 48000, dst_rate = 44100;//源文件及目的文件的码率
    090    uint8_t **src_data = NULL, **dst_data = NULL;//源文件及目的文件数据初始化为空
    091    int src_nb_channels = 0, dst_nb_channels = 0;//源文件及目的文件通道数初始化为0
    092    int src_linesize, dst_linesize;//源文件及目的文件通道数据大小
    093    int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;//源文件及目的文件样品数
    094    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL, dst_sample_fmt = AV_SAMPLE_FMT_S16;
    095    //设置源文件及目的文件的样品格式,通过采样格式可知一个样品所占的字节数
    096    const char *src_filename = NULL,*dst_filename = NULL;//目的文件名字
    097    FILE *src_file,*dst_file;//目的文件指针
    098    int dst_bufsize;//目的文件缓存大小
    099    const char *fmt;
    100    struct SwrContext *swr_ctx;//对其成员变量不能直接操作,需使用avoption api操作
    101    double t;
    102    int ret;
    103 
    104    /*
    105    重采样音频帧以特定的格式并输出到目的文件中
    106    */
    107    if (argc != 3) {
    108        fprintf(stderr, "Usage: %s input_file output_file\n"
    109                "API example program to show how to resample an audio stream with libswresample.\n"
    110                "This program generates a series of audio frames, resamples them to a specified "
    111                "output format and rate and saves them to a input file named input_file and an output file named output_file.\n",
    112            argv[0]);
    113        exit(1);
    114    }
    115    src_filename = argv[1];
    116    dst_filename = argv[2];//赋值目的文件名字
    117 
    118    src_file = fopen(src_filename, "wb");//以二进制写方式打开目的文件
    119    if (!src_file) {
    120        fprintf(stderr, "Could not open src file %s\n", src_filename);
    121        exit(1);//打开失败退出
    122    }
    123    dst_file = fopen(dst_filename, "wb");//以二进制写方式打开目的文件
    124    if (!dst_file) {
    125        fprintf(stderr, "Could not open destination file %s\n", dst_filename);
    126        exit(1);//打开失败退出
    127    }
    128 
    129    /* create resampler context 创建重采样上下文*/
    130    swr_ctx = swr_alloc();//为重采样上下文申请空间
    131    if (!swr_ctx) {
    132        fprintf(stderr, "Could not allocate resampler context\n");
    133        ret = AVERROR(ENOMEM);//创建重采样上下文失败返回
    134        goto end;
    135    }
    136 
    137    /* set options 设置重采样上下文以avoption api方式(间接)*/
    138    av_opt_set_int(swr_ctx, "in_channel_layout",    src_ch_layout, 0);//设置输入源的通道布局
    139    av_opt_set_int(swr_ctx, "in_sample_rate",       src_rate, 0);//设置输入源的采样率
    140    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);//设置输入源的采样格式
    141 
    142    av_opt_set_int(swr_ctx, "out_channel_layout",    dst_ch_layout, 0);//设置输出源的通道布局
    143    av_opt_set_int(swr_ctx, "out_sample_rate",       dst_rate, 0);//设置输出源的采样率
    144    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);//设置输出源的采样格式
    145 
    146    /* initialize the resampling context 调用swr_init生效*/
    147    if ((ret = swr_init(swr_ctx)) < 0) {
    148        fprintf(stderr, "Failed to initialize the resampling context\n");
    149        goto end;//初始化失败退出
    150    }
    151 
    152    /* allocate source and destination samples buffers 申请输入源、输出源样品缓存*/
    153 
    154    src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);//得到输入源的通道数,数值为2
    155    ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
    156                                             src_nb_samples, src_sample_fmt, 0);
    157    printf("src_linesize:%d\n",src_linesize);//16384:src_linesize=src_nb_samples*src_sample_fmt(size)*src_nb_channels=1024*8*2
    158    //为输入源申请采样空间
    159    if (ret < 0) {
    160        fprintf(stderr, "Could not allocate source samples\n");
    161        goto end;
    162    }
    163 
    164    /* compute the number of converted samples: buffering is avoided
    165     * ensuring that the output buffer will contain at least all the
    166     * converted input samples 计算输出源的样品数,要避免溢出*/
    167    max_dst_nb_samples = dst_nb_samples =
    168        av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);//输出源与输入源什么量是一定的关系式:时间一定
    169    printf("max_dst_nb_samples:%d\n",max_dst_nb_samples);
    170    /* buffer is going to be directly written to a rawaudio file, no alignment */
    171    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);//得到输出源的通道数:3
    172    ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
    173                                             dst_nb_samples, dst_sample_fmt, 0);
    174    //为输出源申请空间dst_linesize=dst_nb_samples*2*3
    175    printf("dst_linesize:%d\n",dst_linesize);
    176    if (ret < 0) {
    177        fprintf(stderr, "Could not allocate destination samples\n");
    178        goto end;
    179    }
    180 
    181    t = 0;
    182    do {
    183        /* generate synthetic audio 生成合成音频作为输入源*/
    184        fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);
    185 
    186        /* compute destination number of samples 计算输出源的采样数*/
    187        dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
    188                                        src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
    189        //printf("dst_nb_samples:%d\n",dst_nb_samples);
    190        if (dst_nb_samples > max_dst_nb_samples) {
    191            av_freep(&dst_data[0]);//如果计算所得的空间小于之前所申请的空间 ?
    192            ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
    193                                   dst_nb_samples, dst_sample_fmt, 1);//重新申请输出源空间
    194            if (ret < 0)
    195                break;
    196            max_dst_nb_samples = dst_nb_samples;//更新max_dst_nb_samples
    197        }
    198 
    199        /* convert to destination format 转换成目标格式*/
    200        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
    201        if (ret < 0) {
    202            fprintf(stderr, "Error while converting\n");
    203            goto end;//转换失败,退出
    204        }
    205        dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
    206                                                 ret, dst_sample_fmt, 1);
    207        //得到输出源实际所需要的空间大小
    208        if (dst_bufsize < 0) {
    209            fprintf(stderr, "Could not get sample buffer size\n");
    210            goto end;
    211        }
    212        printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
    213        fwrite((double *)src_data[0], 1, src_linesize, src_file);
    214        fwrite(dst_data[0], 1, dst_bufsize, dst_file);//写入文件大小为dst_bufsize
    215    } while (t < 1);
    216 
    217    if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)
    218        goto end;
    219    fprintf(stderr, "Resampling succeeded. Play the output file with the command:\n"
    220            "ffplay -f %s -channel_layout %"PRId64" -channels %d -ar %d %s\n",
    221            fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename);
    222 
    223end:
    224    if (src_file)
    225        fclose(src_file);
    226    if (dst_file)
    227        fclose(dst_file);//关闭文件
    228 
    229    if (src_data)
    230        av_freep(&src_data[0]);//释放资源
    231    av_freep(&src_data);//释放资源
    232 
    233    if (dst_data)
    234        av_freep(&dst_data[0]);//释放资源
    235    av_freep(&dst_data);//释放资源
    236 
    237    swr_free(&swr_ctx);//释放重采样上下文资源
    238    return ret < 0;
    239}

    这里面一个很重要的结构体是SwrContext其说明如下

    1/**
    2 * The libswresample context. Unlike libavcodec and libavformat, this structure
    3 * is opaque. This means that if you would like to set options, you must use
    4 * the @ref avoptions API and cannot directly set values to members of the
    5 * structure.
    6 */
    7typedef struct SwrContext SwrContext;

    说明中提到不能对其成员直接进行操作,需借助avoptions API来实现对其成员变量的赋值。

    在主函数中声明了一些源数据以及目的数据的一些相关信息,源数据通道布局为 STEREO,目的数据通道布局为SURROUND,增加了一个通道,源数据采样率为48000,目的数据为44100等等,其中指明了源数据的样品大小 为1024,且源数据样品格式为DBL类型,目的数据格式为S16,初始化SwrContext结构体之前分别打开了源文件以及目的文 件,SwrContext有两种初始化方法

    01* @code
    02* SwrContext *swr = swr_alloc();
    03* av_opt_set_channel_layout(swr, "in_channel_layout",  AV_CH_LAYOUT_5POINT1, 0);
    04* av_opt_set_channel_layout(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO,  0);
    05* av_opt_set_int(swr, "in_sample_rate",     48000,                0);
    06* av_opt_set_int(swr, "out_sample_rate",    44100,                0);
    07* av_opt_set_sample_fmt(swr, "in_sample_fmt",  AV_SAMPLE_FMT_FLTP, 0);
    08* av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16,  0);
    09* @endcode
    10*
    11* The same job can be done using swr_alloc_set_opts() as well:
    12* @code
    13* SwrContext *swr = swr_alloc_set_opts(NULL,  // we're allocating a new context
    14*                       AV_CH_LAYOUT_STEREO,  // out_ch_layout
    15*                       AV_SAMPLE_FMT_S16,    // out_sample_fmt
    16*                       44100,                // out_sample_rate
    17*                       AV_CH_LAYOUT_5POINT1, // in_ch_layout
    18*                       AV_SAMPLE_FMT_FLTP,   // in_sample_fmt
    19*                       48000,                // in_sample_rate
    20*                       0,                    // log_offset
    21*                       NULL);                // log_ctx
    22* @endcode
    23*

    例程中是采用的前者,更直观一些,初始化完毕后,分别对源数据以及目的数据进行了参数的设置(通道布局、采样率、采样格式),通过调用swr_init函数来使其生效。

    接着为源数据申请空间

    1src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);//得到输入源的通道数,数值为2
    2ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
    3                                         src_nb_samples, src_sample_fmt, 0);
    4printf("src_linesize:%d\n",src_linesize);//16384:src_linesize=src_nb_samples*src_sample_fmt(size)*src_nb_channels=1024*8*2

    首先通过av_get_channel_layout_nb_channels函数得到源数据的通道数,接着通过 av_sample_alloc_array_and_samples函数来计算源数据的数据量,接着根据源数据的样品数来计算所需要的目的样品数通过 av_rescale_rnd函数来实现

    1/**
    2 * Rescale a 64-bit integer with specified rounding.
    3 * A simple a*b/c isn't possible as it can overflow.
    4 *
    5 * @return rescaled value a, or if AV_ROUND_PASS_MINMAX is set and a is
    6 *         INT64_MIN or INT64_MAX then a is passed through unchanged.
    7 */
    8int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding) av_const;

    可以看到其返回的值是a*b/c,因为源数据与目的数据的时间是一定的,即源数据采样src_nb_samples所需要的时间为 src_nb_samples/src_rate=dst_nb_samples/dst_rate,因此 dst_nb_samples=src_nb_samples*dst_rate/src_rate。同时也为目的数据申请所需要的空间,该空间是理论上 计算所得。接下来进入了fdo-while循环,do-while循环中做了两件事,一个是通过fill_samples来填充源数据,一个是通过 swr_convert对源数据进行重采样转成目的数据。

    01/**
    02 * Fill dst buffer with nb_samples, generated starting from t.
    03 *使用nb_samples 填充dst buffer,确保从t开始
    04 *在440hz的曲线上以sample_rate的频率取nb_samples个样本点存储在dst中,nb_channels通道数据都一样
    05 */
    06static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
    07{
    08    int i, j;
    09    double tincr = 1.0 / sample_rate, *dstp = dst;//tincr是时间间隔
    10    const double c = 2 * M_PI * 440.0;//频率440Hz
    11    /* generate sin tone with 440Hz frequency and duplicated channels */
    12    for (i = 0; i < nb_samples; i++) {
    13        *dstp = sin(c * *t);//得到曲线上的采样点
    14        for (j = 1; j < nb_channels; j++)
    15            dstp[j] = dstp[0];//每一个通道都与第一个通道写一样的值
    16        dstp += nb_channels;//顺序写满通道后移动采样点
    17        *t += tincr;//时间向后移动
    18    }
    19}

    fill_samples是以正弦波形来填充源数据的正弦波形函数为 y=Asin(ωx+φ)+h,其中A为振幅,h为相对于y轴的距离,φ为相位,与x轴距离,ω决定周期,其最小正周期T=2π/|ω|, 因此fill_samples实际上产生了一个频率为440Hz的正弦波形,函数中以sample_rate的采样率在该波形上提取了 nb_samples个样本存储在源数据中,且源数据中两个通道的数据是相同的。当源数据的nb_samples个样品采样完毕后,又一次计算了目的数据 的样品数,加上了一个时间因子(swr_get_delay),如果该次计算的样品数大于之前计算所得的样品数则对目的数据重新进行空间申请,防止目的数 据溢出,同时更新max_nb_samples数值,swr_convert是对源数据的转换,

    01/** Convert audio.
    02 *
    03 * in and in_count can be set to 0 to flush the last few samples out at the
    04 * end.
    05 *
    06 * If more input is provided than output space then the input will be buffered.
    07 * You can avoid this buffering by providing more output space than input.
    08 * Conversion will run directly without copying whenever possible.
    09 *
    10 * @param s         allocated Swr context, with parameters set
    11 * @param out       output buffers, only the first one need be set in case of packed audio
    12 * @param out_count amount of space available for output in samples per channel
    13 * @param in        input buffers, only the first one need to be set in case of packed audio
    14 * @param in_count  number of input samples available in one channel
    15 *
    16 * @return number of samples output per channel, negative value on error
    17 */
    18int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
    19                                const uint8_t **in , int in_count);

    转换后的目的数据存储于out中,需注意的是out_count要足够大,避免转换后的数据溢出,该函数返回目的数据的实际样品大小,例程运行结果如下:

    由图中可以看出一开始计算的目的样本数以及空间是可以满足需要的,不过还是小心谨慎为好。

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多