ffmpeg音频重采样分析

mediatv 2016-02-27

展开全文

主机环境：Windows XP

开发环境：CodeBlocks13.12

ffmpeg版本：ffmpeg2.4

在学习ffmpeg教程链接地址的过程中，由于教程中的ffmpeg版本较低，与ffmpeg2.4API有些许出入，解码后的音频不能直接播放了，在ffplayer.c中都对解码后的音频进行了重采样操作，于是乎去了解了一下重采样的相关知识，学习例程是ffmpeg2.4源代码目录下的doc/examples /resampling_audio.c文件，为便于学习修改后的代码如下

001 /*
002 * Copyright (c) 2012 Stefano Sabatini
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a copy
005 * of this software and associated documentation files (the "Software"), to deal
006 * in the Software without restriction, including without limitation the rights
007 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
008 * copies of the Software, and to permit persons to whom the Software is
009 * furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
019 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
020 * THE SOFTWARE.
021 */
022
023 /**
024 * @example resampling_audio.c
025 * libswresample API use example.
026 */
027
028 #include <libavutil/opt.h>
029 #include <libavutil/channel_layout.h>
030 #include <libavutil/samplefmt.h>
031 #include <libswresample/swresample.h>
032
033 /*
034 获取采样格式
035 */
036 static int get_format_from_sample_fmt(const char **fmt,
037                                       enum AVSampleFormat sample_fmt)
038 {
039     int i;
040     struct sample_fmt_entry {
041         enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;
042     } sample_fmt_entries[] = {
043         { AV_SAMPLE_FMT_U8, "u8",    "u8"    },
044         { AV_SAMPLE_FMT_S16, "s16be", "s16le" },
045         { AV_SAMPLE_FMT_S32, "s32be", "s32le" },
046         { AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
047         { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
048     };
049     *fmt = NULL;
050
051     for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
052         struct sample_fmt_entry *entry = &sample_fmt_entries[i];
053         if (sample_fmt == entry->sample_fmt) {
054             *fmt = AV_NE(entry->fmt_be, entry->fmt_le);
055             return 0;
056         }
057     }
058
059     fprintf(stderr,
060             "Sample format %s not supported as output format\n",
061             av_get_sample_fmt_name(sample_fmt));
062     return AVERROR(EINVAL);
063 }
064
065 /**
066 * Fill dst buffer with nb_samples, generated starting from t.
067 *使用nb_samples 填充dst buffer，确保从t开始
068 *在440hz的曲线上以sample_rate的频率取nb_samples个样本点存储在dst中，nb_channels通道数据都一样
069 */
070 static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
071 {
072     int i, j;
073     double tincr = 1.0 / sample_rate, *dstp = dst;//tincr是时间间隔
074     const double c = 2 * M_PI * 440.0;//频率440Hz
075     /* generate sin tone with 440Hz frequency and duplicated channels */
076     for (i = 0; i < nb_samples; i++) {
077         *dstp = sin(c * *t);//得到曲线上的采样点
078         for (j = 1; j < nb_channels; j++)
079             dstp[j] = dstp[0];//每一个通道都与第一个通道写一样的值
080         dstp += nb_channels;//顺序写满通道后移动采样点
081         *t += tincr;//时间向后移动
082     }
083 }
084
085 int main(int argc, char **argv)
086 {
087     int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_SURROUND;
088     //源文件布局为立体声，目的文件布局为立体环绕声
089     int src_rate = 48000, dst_rate = 44100;//源文件及目的文件的码率
090     uint8_t **src_data = NULL, **dst_data = NULL;//源文件及目的文件数据初始化为空
091     int src_nb_channels = 0, dst_nb_channels = 0;//源文件及目的文件通道数初始化为0
092     int src_linesize, dst_linesize;//源文件及目的文件通道数据大小
093     int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;//源文件及目的文件样品数
094     enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL, dst_sample_fmt = AV_SAMPLE_FMT_S16;
095     //设置源文件及目的文件的样品格式,通过采样格式可知一个样品所占的字节数
096     const char *src_filename = NULL,*dst_filename = NULL;//目的文件名字
097     FILE *src_file,*dst_file;//目的文件指针
098     int dst_bufsize;//目的文件缓存大小
099     const char *fmt;
100     struct SwrContext *swr_ctx;//对其成员变量不能直接操作，需使用avoption api操作
101     double t;
102     int ret;
103
104     /*
105     重采样音频帧以特定的格式并输出到目的文件中
106     */
107     if (argc != 3) {
108         fprintf(stderr, "Usage: %s input_file output_file\n"
109                 "API example program to show how to resample an audio stream with libswresample.\n"
110                 "This program generates a series of audio frames, resamples them to a specified "
111                 "output format and rate and saves them to a input file named input_file and an output file named output_file.\n",
112             argv[0]);
113         exit(1);
114     }
115     src_filename = argv[1];
116     dst_filename = argv[2];//赋值目的文件名字
117
118     src_file = fopen(src_filename, "wb");//以二进制写方式打开目的文件
119     if (!src_file) {
120         fprintf(stderr, "Could not open src file %s\n", src_filename);
121         exit(1);//打开失败退出
122     }
123     dst_file = fopen(dst_filename, "wb");//以二进制写方式打开目的文件
124     if (!dst_file) {
125         fprintf(stderr, "Could not open destination file %s\n", dst_filename);
126         exit(1);//打开失败退出
127     }
128
129     /* create resampler context 创建重采样上下文*/
130     swr_ctx = swr_alloc();//为重采样上下文申请空间
131     if (!swr_ctx) {
132         fprintf(stderr, "Could not allocate resampler context\n");
133         ret = AVERROR(ENOMEM);//创建重采样上下文失败返回
134         goto end;
135     }
136
137     /* set options 设置重采样上下文以avoption api方式(间接)*/
138     av_opt_set_int(swr_ctx, "in_channel_layout",    src_ch_layout, 0);//设置输入源的通道布局
139     av_opt_set_int(swr_ctx, "in_sample_rate",       src_rate, 0);//设置输入源的采样率
140     av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);//设置输入源的采样格式
141
142     av_opt_set_int(swr_ctx, "out_channel_layout",    dst_ch_layout, 0);//设置输出源的通道布局
143     av_opt_set_int(swr_ctx, "out_sample_rate",       dst_rate, 0);//设置输出源的采样率
144     av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);//设置输出源的采样格式
145
146     /* initialize the resampling context 调用swr_init生效*/
147     if ((ret = swr_init(swr_ctx)) < 0) {
148         fprintf(stderr, "Failed to initialize the resampling context\n");
149         goto end;//初始化失败退出
150     }
151
152     /* allocate source and destination samples buffers 申请输入源、输出源样品缓存*/
153
154     src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);//得到输入源的通道数,数值为2
155     ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
156                                              src_nb_samples, src_sample_fmt, 0);
157     printf("src_linesize:%d\n",src_linesize);//16384:src_linesize=src_nb_samples*src_sample_fmt(size)*src_nb_channels=1024*8*2
158     //为输入源申请采样空间
159     if (ret < 0) {
160         fprintf(stderr, "Could not allocate source samples\n");
161         goto end;
162     }
163
164     /* compute the number of converted samples: buffering is avoided
165      * ensuring that the output buffer will contain at least all the
166      * converted input samples 计算输出源的样品数，要避免溢出*/
167     max_dst_nb_samples = dst_nb_samples =
168         av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);//输出源与输入源什么量是一定的关系式：时间一定
169     printf("max_dst_nb_samples:%d\n",max_dst_nb_samples);
170     /* buffer is going to be directly written to a rawaudio file, no alignment */
171     dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);//得到输出源的通道数：3
172     ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
173                                              dst_nb_samples, dst_sample_fmt, 0);
174     //为输出源申请空间dst_linesize=dst_nb_samples*2*3
175     printf("dst_linesize:%d\n",dst_linesize);
176     if (ret < 0) {
177         fprintf(stderr, "Could not allocate destination samples\n");
178         goto end;
179     }
180
181     t = 0;
182     do {
183         /* generate synthetic audio 生成合成音频作为输入源*/
184         fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);
185
186         /* compute destination number of samples 计算输出源的采样数*/
187         dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
188                                         src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
189         //printf("dst_nb_samples:%d\n",dst_nb_samples);
190         if (dst_nb_samples > max_dst_nb_samples) {
191             av_freep(&dst_data[0]);//如果计算所得的空间小于之前所申请的空间 ?
192             ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
193                                    dst_nb_samples, dst_sample_fmt, 1);//重新申请输出源空间
194             if (ret < 0)
195                 break;
196             max_dst_nb_samples = dst_nb_samples;//更新max_dst_nb_samples
197         }
198
199         /* convert to destination format 转换成目标格式*/
200         ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
201         if (ret < 0) {
202             fprintf(stderr, "Error while converting\n");
203             goto end;//转换失败，退出
204         }
205         dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
206                                                  ret, dst_sample_fmt, 1);
207         //得到输出源实际所需要的空间大小
208         if (dst_bufsize < 0) {
209             fprintf(stderr, "Could not get sample buffer size\n");
210             goto end;
211         }
212         printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
213         fwrite((double *)src_data[0], 1, src_linesize, src_file);
214         fwrite(dst_data[0], 1, dst_bufsize, dst_file);//写入文件大小为dst_bufsize
215     } while (t < 1);
216
217     if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)
218         goto end;
219     fprintf(stderr, "Resampling succeeded. Play the output file with the command:\n"
220             "ffplay -f %s -channel_layout %"PRId64" -channels %d -ar %d %s\n",
221             fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename);
222
223 end:
224     if (src_file)
225         fclose(src_file);
226     if (dst_file)
227         fclose(dst_file);//关闭文件
228
229     if (src_data)
230         av_freep(&src_data[0]);//释放资源
231     av_freep(&src_data);//释放资源
232
233     if (dst_data)
234         av_freep(&dst_data[0]);//释放资源
235     av_freep(&dst_data);//释放资源
236
237     swr_free(&swr_ctx);//释放重采样上下文资源
238     return ret < 0;
239 }

这里面一个很重要的结构体是SwrContext其说明如下

1 /**
2 * The libswresample context. Unlike libavcodec and libavformat, this structure
3 * is opaque. This means that if you would like to set options, you must use
4 * the @ref avoptions API and cannot directly set values to members of the
5 * structure.
6 */
7 typedef struct SwrContext SwrContext;

说明中提到不能对其成员直接进行操作，需借助avoptions API来实现对其成员变量的赋值。
在主函数中声明了一些源数据以及目的数据的一些相关信息，源数据通道布局为 STEREO，目的数据通道布局为SURROUND，增加了一个通道，源数据采样率为48000，目的数据为44100等等，其中指明了源数据的样品大小为1024，且源数据样品格式为DBL类型，目的数据格式为S16，初始化SwrContext结构体之前分别打开了源文件以及目的文件，SwrContext有两种初始化方法

01 * @code
02 * SwrContext *swr = swr_alloc();
03 * av_opt_set_channel_layout(swr, "in_channel_layout", AV_CH_LAYOUT_5POINT1, 0);
04 * av_opt_set_channel_layout(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
05 * av_opt_set_int(swr, "in_sample_rate",     48000,                0);
06 * av_opt_set_int(swr, "out_sample_rate",    44100,                0);
07 * av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
08 * av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
09 * @endcode
10 *
11 * The same job can be done using swr_alloc_set_opts() as well:
12 * @code
13 * SwrContext *swr = swr_alloc_set_opts(NULL, // we're allocating a new context
14 *                       AV_CH_LAYOUT_STEREO, // out_ch_layout
15 *                       AV_SAMPLE_FMT_S16,    // out_sample_fmt
16 *                       44100,                // out_sample_rate
17 *                       AV_CH_LAYOUT_5POINT1, // in_ch_layout
18 *                       AV_SAMPLE_FMT_FLTP,   // in_sample_fmt
19 *                       48000,                // in_sample_rate
20 *                       0,                    // log_offset
21 *                       NULL);                // log_ctx
22 * @endcode
23 *

例程中是采用的前者，更直观一些，初始化完毕后，分别对源数据以及目的数据进行了参数的设置(通道布局、采样率、采样格式)，通过调用swr_init函数来使其生效。
接着为源数据申请空间

1 src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);//得到输入源的通道数,数值为2
2 ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
3                                          src_nb_samples, src_sample_fmt, 0);
4 printf("src_linesize:%d\n",src_linesize);//16384:src_linesize=src_nb_samples*src_sample_fmt(size)*src_nb_channels=1024*8*2

首先通过av_get_channel_layout_nb_channels函数得到源数据的通道数，接着通过 av_sample_alloc_array_and_samples函数来计算源数据的数据量，接着根据源数据的样品数来计算所需要的目的样品数通过 av_rescale_rnd函数来实现

1 /**
2 * Rescale a 64-bit integer with specified rounding.
3 * A simple a*b/c isn't possible as it can overflow.
4 *
5 * @return rescaled value a, or if AV_ROUND_PASS_MINMAX is set and a is
6 *         INT64_MIN or INT64_MAX then a is passed through unchanged.
7 */
8 int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding) av_const;

可以看到其返回的值是a*b/c，因为源数据与目的数据的时间是一定的，即源数据采样src_nb_samples所需要的时间为 src_nb_samples/src_rate=dst_nb_samples/dst_rate，因此 dst_nb_samples=src_nb_samples*dst_rate/src_rate。同时也为目的数据申请所需要的空间，该空间是理论上计算所得。接下来进入了fdo-while循环，do-while循环中做了两件事，一个是通过fill_samples来填充源数据，一个是通过 swr_convert对源数据进行重采样转成目的数据。

01 /**
02 * Fill dst buffer with nb_samples, generated starting from t.
03 *使用nb_samples 填充dst buffer，确保从t开始
04 *在440hz的曲线上以sample_rate的频率取nb_samples个样本点存储在dst中，nb_channels通道数据都一样
05 */
06 static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
07 {
08     int i, j;
09     double tincr = 1.0 / sample_rate, *dstp = dst;//tincr是时间间隔
10     const double c = 2 * M_PI * 440.0;//频率440Hz
11     /* generate sin tone with 440Hz frequency and duplicated channels */
12     for (i = 0; i < nb_samples; i++) {
13         *dstp = sin(c * *t);//得到曲线上的采样点
14         for (j = 1; j < nb_channels; j++)
15             dstp[j] = dstp[0];//每一个通道都与第一个通道写一样的值
16         dstp += nb_channels;//顺序写满通道后移动采样点
17         *t += tincr;//时间向后移动
18     }
19 }

fill_samples是以正弦波形来填充源数据的正弦波形函数为 y=Asin（ωx+φ）+h，其中A为振幅，h为相对于y轴的距离，φ为相位，与x轴距离，ω决定周期，其最小正周期T=2π/|ω|，因此fill_samples实际上产生了一个频率为440Hz的正弦波形，函数中以sample_rate的采样率在该波形上提取了 nb_samples个样本存储在源数据中，且源数据中两个通道的数据是相同的。当源数据的nb_samples个样品采样完毕后，又一次计算了目的数据的样品数，加上了一个时间因子(swr_get_delay)，如果该次计算的样品数大于之前计算所得的样品数则对目的数据重新进行空间申请，防止目的数据溢出，同时更新max_nb_samples数值，swr_convert是对源数据的转换，

01 /** Convert audio.
02 *
03 * in and in_count can be set to 0 to flush the last few samples out at the
04 * end.
05 *
06 * If more input is provided than output space then the input will be buffered.
07 * You can avoid this buffering by providing more output space than input.
08 * Conversion will run directly without copying whenever possible.
09 *
10 * @param s         allocated Swr context, with parameters set
11 * @param out       output buffers, only the first one need be set in case of packed audio
12 * @param out_count amount of space available for output in samples per channel
13 * @param in        input buffers, only the first one need to be set in case of packed audio
14 * @param in_count number of input samples available in one channel
15 *
16 * @return number of samples output per channel, negative value on error
17 */
18 int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
19                                 const uint8_t **in , int in_count);

转换后的目的数据存储于out中，需注意的是out_count要足够大，避免转换后的数据溢出，该函数返回目的数据的实际样品大小，例程运行结果如下：

由图中可以看出一开始计算的目的样本数以及空间是可以满足需要的，不过还是小心谨慎为好。