ffmpeg框架详解之容器格式分析 | 云里飘博客

梦中家园 2013-11-08

展开全文

ffmpeg框架详解之容器格式分析

2013年08月30日 ? 编程 ? 共 6384字 ? 暂无评论 ? 被围观 179+

ffmpeg是一个开源的音视频编解码框架，结构清晰，可扩展性强，本人对ffmpeg有研究过一段时间，在此详细说明下ffmpeg的框架，由于内容较多会分为多篇日志。

我们常说的mpg，rmvb，ts，mkv，mp4等其实就是容器格式，用于封装已经编码过的音频数据和视频数据。一个视频文件的播放第一步需要知道它的容器格式，并把音频数据和视频数据从容器中分离出来，我们称这过程是解复用。ffmpeg中在avformat.h文件中定义了很多用于解复用的数据结构，其中常用的有AVInputFormat(用于解复用)，AVOutputFormat(用于复用)，AVStream(用于描述解复用后的数据流)，AVFormatContext(复用/解复用的上下文)。

AVFormatContext的结构如下

typedef struct AVFormatContext {
const AVClass *av_class; /**< Set by avformat_alloc_context. */
/* Can only be iformat or oformat, not both at the same time. */
struct AVInputFormat *iformat;
struct AVOutputFormat *oformat;
void *priv_data;
ByteIOContext *pb;
unsigned int nb_streams;
AVStream *streams[MAX_STREAMS];
char filename[1024]; /**< input or output filename */
/* stream info */
int64_t timestamp;
#if LIBAVFORMAT_VERSION_INT < (53<<16)
char title[512];
char author[512];
char copyright[512];
char comment[512];
char album[512];
int year; /**< ID3 year, 0 if none */
int track; /**< track number, 0 if none */
char genre[32]; /**< ID3 genre */
#endif

int ctx_flags; /**< Format-specific flags, see AVFMTCTX_xx */
/* private data for pts handling (do not modify directly). */
/** This buffer is only needed when packets were already buffered but
not decoded, for example to get the codec parameters in MPEG
streams. */
struct AVPacketList *packet_buffer;

/** Decoding: position of the first frame of the component, in
AV_TIME_BASE fractional seconds. NEVER set this value directly:
It is deduced from the AVStream values. */
int64_t start_time;
/** Decoding: duration of the stream, in AV_TIME_BASE fractional
seconds. Only set this value if you know none of the individual stream
durations and also dont set any of them. This is deduced from the
AVStream values if not set. */
int64_t duration;
/** decoding: total file size, 0 if unknown */
int64_t file_size;
/** Decoding: total stream bitrate in bit/s, 0 if not
available. Never set it directly if the file_size and the
duration are known as FFmpeg can compute it automatically. */
int bit_rate;

/* av_read_frame() support */
AVStream *cur_st;
#if LIBAVFORMAT_VERSION_INT < (53<<16)
const uint8_t *cur_ptr_deprecated;
int cur_len_deprecated;
AVPacket cur_pkt_deprecated;
#endif

/* av_seek_frame() support */
int64_t data_offset; /** offset of the first packet */
int index_built;

int mux_rate;
unsigned int packet_size;
int preload;
int max_delay;

#define AVFMT_NOOUTPUTLOOP -1
#define AVFMT_INFINITEOUTPUTLOOP 0
/** number of times to loop output in formats that support it */
int loop_output;

int flags;
#define AVFMT_FLAG_GENPTS 0x0001 ///< Generate missing pts even if it requires parsing future frames.
#define AVFMT_FLAG_IGNIDX 0x0002 ///< Ignore index.
#define AVFMT_FLAG_NONBLOCK 0x0004 ///< Do not block when reading packets from input.
#define AVFMT_FLAG_IGNDTS 0x0008 ///< Ignore DTS on frames that contain both DTS & PTS
#define AVFMT_FLAG_NOFILLIN 0x0010 ///< Do not infer any values from other values, just return what is stored in the container
#define AVFMT_FLAG_NOPARSE 0x0020 ///< Do not use AVParsers, you also must set AVFMT_FLAG_NOFILLIN as the fillin code works on frames and no parsing -> no frames. Also seeking to frames can not work if parsing to find frame boundaries has been disabled
#define AVFMT_FLAG_RTP_HINT 0x0040 ///< Add RTP hinting to the output file

int loop_input;
/** decoding: size of data to probe; encoding: unused. */
unsigned int probesize;

/**
* Maximum time (in AV_TIME_BASE units) during which the input should
* be analyzed in av_find_stream_info().
*/
int max_analyze_duration;

const uint8_t *key;
int keylen;

unsigned int nb_programs;
AVProgram **programs;

/**
* Forced video codec_id.
* Demuxing: Set by user.
*/
enum CodecID video_codec_id;
/**
* Forced audio codec_id.
* Demuxing: Set by user.
*/
enum CodecID audio_codec_id;
/**
* Forced subtitle codec_id.
* Demuxing: Set by user.
*/
enum CodecID subtitle_codec_id;

/**
* Maximum amount of memory in bytes to use for the index of each stream.
* If the index exceeds this size, entries will be discarded as
* needed to maintain a smaller size. This can lead to slower or less
* accurate seeking (depends on demuxer).
* Demuxers for which a full in-memory index is mandatory will ignore
* this.
* muxing : unused
* demuxing: set by user
*/
unsigned int max_index_size;

/**
* Maximum amount of memory in bytes to use for buffering frames
* obtained from realtime capture devices.
*/
unsigned int max_picture_buffer;

unsigned int nb_chapters;
AVChapter **chapters;

/**
* Flags to enable debugging.
*/
int debug;
#define FF_FDEBUG_TS 0x0001

/**
* Raw packets from the demuxer, prior to parsing and decoding.
* This buffer is used for buffering packets until the codec can
* be identified, as parsing cannot be done without knowing the
* codec.
*/
struct AVPacketList *raw_packet_buffer;
struct AVPacketList *raw_packet_buffer_end;

struct AVPacketList *packet_buffer_end;

AVMetadata *metadata;

/**
* Remaining size available for raw_packet_buffer, in bytes.
* NOT PART OF PUBLIC API
*/
#define RAW_PACKET_BUFFER_SIZE 2500000
int raw_packet_buffer_remaining_size;

/**
* Start time of the stream in real world time, in microseconds
* since the unix epoch (00:00 1st January 1970). That is, pts=0
* in the stream was captured at this real world time.
* - encoding: Set by user.
* - decoding: Unused.
*/
int64_t start_time_realtime;
} AVFormatContext;

在该结构中我们可以获得所有容器格式相关的信息，包括流的数量和相应的流描述AVStream结构，开始时间，文件大小，总时长等，其中ByteIOContext结构是ffmpeg数据IO的接口，这个在后面会详细说明。

使用ffmpeg时，首先使用av_register_all()注册所有的容器格式，编解码格式，然后会用av_open_input_file打开文件。av_open_input_file的原型是

int av_open_input_file(AVFormatContext **ic_ptr, const char *filename,
AVInputFormat *fmt,
int buf_size,
AVFormatParameters *ap)

参数ic_ptr是指向AVFormatContext指针的二级指针，是输出参数

参数filename是文件名，是输入参数

参数fmt是指向AVInputFormat结构的指针，是输入参数

该函数会打开指定的文件，探测文件的容器格式，打开数据流，代码如下

int av_open_input_file(AVFormatContext **ic_ptr, const char *filename,
AVInputFormat *fmt,
int buf_size,
AVFormatParameters *ap)
{
int err;
AVProbeData probe_data, *pd = &probe_data;
ByteIOContext *pb = NULL;
void *logctx= ap && ap->prealloced_context ? *ic_ptr : NULL;

pd->filename = "";
if (filename)
pd->filename = filename;
pd->buf = NULL;
pd->buf_size = 0;

if (!fmt) {
/* guess format if no file can be opened */
fmt = av_probe_input_format(pd, 0);
}

/* Do not open file if the format does not need it. XXX: specific
hack needed to handle RTSP/TCP */
if (!fmt || !(fmt->flags & AVFMT_NOFILE)) {
/* if no file needed do not try to open one */
if ((err=url_fopen(&pb, filename, URL_RDONLY)) < 0) {
goto fail;
}
if (buf_size > 0) {
url_setbufsize(pb, buf_size);
}
if (!fmt && (err = ff_probe_input_buffer(&pb, &fmt, filename, logctx, 0, logctx ? (*ic_ptr)->probesize : 0)) < 0) {
goto fail;
}
}

/* if still no format found, error */
if (!fmt) {
err = AVERROR_INVALIDDATA;
goto fail;
}

/* check filename in case an image number is expected */
if (fmt->flags & AVFMT_NEEDNUMBER) {
if (!av_filename_number_test(filename)) {
err = AVERROR_NUMEXPECTED;
goto fail;
}
}
err = av_open_input_stream(ic_ptr, pb, filename, fmt, ap);
if (err)
goto fail;
return 0;
fail:
av_freep(&pd->buf);
if (pb)
url_fclose(pb);
if (ap && ap->prealloced_context)
av_free(*ic_ptr);
*ic_ptr = NULL;
return err;

}